diff options
Diffstat (limited to 'kernel/srcu.c')
-rw-r--r-- | kernel/srcu.c | 92 |
1 files changed, 56 insertions, 36 deletions
diff --git a/kernel/srcu.c b/kernel/srcu.c index 43f1d61e513e..b6b9ea2eb51c 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -249,26 +249,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); | |||
249 | */ | 249 | */ |
250 | #define SYNCHRONIZE_SRCU_READER_DELAY 5 | 250 | #define SYNCHRONIZE_SRCU_READER_DELAY 5 |
251 | 251 | ||
252 | /* | 252 | static void wait_idx(struct srcu_struct *sp, int idx, bool expedited) |
253 | * Flip the readers' index by incrementing ->completed, then wait | ||
254 | * until there are no more readers using the counters referenced by | ||
255 | * the old index value. (Recall that the index is the bottom bit | ||
256 | * of ->completed.) | ||
257 | * | ||
258 | * Of course, it is possible that a reader might be delayed for the | ||
259 | * full duration of flip_idx_and_wait() between fetching the | ||
260 | * index and incrementing its counter. This possibility is handled | ||
261 | * by __synchronize_srcu() invoking flip_idx_and_wait() twice. | ||
262 | */ | ||
263 | static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited) | ||
264 | { | 253 | { |
265 | int idx; | ||
266 | int trycount = 0; | 254 | int trycount = 0; |
267 | 255 | ||
268 | idx = sp->completed++ & 0x1; | ||
269 | |||
270 | /* | 256 | /* |
271 | * If a reader fetches the index before the above increment, | 257 | * If a reader fetches the index before the ->completed increment, |
272 | * but increments its counter after srcu_readers_active_idx_check() | 258 | * but increments its counter after srcu_readers_active_idx_check() |
273 | * sums it, then smp_mb() D will pair with __srcu_read_lock()'s | 259 | * sums it, then smp_mb() D will pair with __srcu_read_lock()'s |
274 | * smp_mb() B to ensure that the SRCU read-side critical section | 260 | * smp_mb() B to ensure that the SRCU read-side critical section |
@@ -298,17 +284,38 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited) | |||
298 | * sees srcu_read_unlock()'s counter decrement, then any | 284 | * sees srcu_read_unlock()'s counter decrement, then any |
299 | * of the current task's subsequent code will happen after | 285 | * of the current task's subsequent code will happen after |
300 | * that SRCU read-side critical section. | 286 | * that SRCU read-side critical section. |
287 | * | ||
288 | * It also ensures the order between the above waiting and | ||
289 | * the next flipping. | ||
301 | */ | 290 | */ |
302 | smp_mb(); /* E */ | 291 | smp_mb(); /* E */ |
303 | } | 292 | } |
304 | 293 | ||
305 | /* | 294 | /* |
295 | * Flip the readers' index by incrementing ->completed, then wait | ||
296 | * until there are no more readers using the counters referenced by | ||
297 | * the old index value. (Recall that the index is the bottom bit | ||
298 | * of ->completed.) | ||
299 | * | ||
300 | * Of course, it is possible that a reader might be delayed for the | ||
301 | * full duration of flip_idx_and_wait() between fetching the | ||
302 | * index and incrementing its counter. This possibility is handled | ||
303 | * by the next __synchronize_srcu() invoking wait_idx() for such readers | ||
304 | * before starting a new grace period. | ||
305 | */ | ||
306 | static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited) | ||
307 | { | ||
308 | int idx; | ||
309 | |||
310 | idx = sp->completed++ & 0x1; | ||
311 | wait_idx(sp, idx, expedited); | ||
312 | } | ||
313 | |||
314 | /* | ||
306 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | 315 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
307 | */ | 316 | */ |
308 | static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) | 317 | static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) |
309 | { | 318 | { |
310 | int idx = 0; | ||
311 | |||
312 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && | 319 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && |
313 | !lock_is_held(&rcu_bh_lock_map) && | 320 | !lock_is_held(&rcu_bh_lock_map) && |
314 | !lock_is_held(&rcu_lock_map) && | 321 | !lock_is_held(&rcu_lock_map) && |
@@ -318,27 +325,40 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) | |||
318 | mutex_lock(&sp->mutex); | 325 | mutex_lock(&sp->mutex); |
319 | 326 | ||
320 | /* | 327 | /* |
321 | * If there were no helpers, then we need to do two flips of | 328 | * Suppose that during the previous grace period, a reader |
322 | * the index. The first flip is required if there are any | 329 | * picked up the old value of the index, but did not increment |
323 | * outstanding SRCU readers even if there are no new readers | 330 | * its counter until after the previous instance of |
324 | * running concurrently with the first counter flip. | 331 | * __synchronize_srcu() did the counter summation and recheck. |
332 | * That previous grace period was OK because the reader did | ||
333 | * not start until after the grace period started, so the grace | ||
334 | * period was not obligated to wait for that reader. | ||
335 | * | ||
336 | * However, the current SRCU grace period does have to wait for | ||
337 | * that reader. This is handled by invoking wait_idx() on the | ||
338 | * non-active set of counters (hence sp->completed - 1). Once | ||
339 | * wait_idx() returns, we know that all readers that picked up | ||
340 | * the old value of ->completed and that already incremented their | ||
341 | * counter will have completed. | ||
325 | * | 342 | * |
326 | * The second flip is required when a new reader picks up | 343 | * But what about readers that picked up the old value of |
327 | * the old value of the index, but does not increment its | 344 | * ->completed, but -still- have not managed to increment their |
328 | * counter until after its counters is summed/rechecked by | 345 | * counter? We do not need to wait for those readers, because |
329 | * srcu_readers_active_idx_check(). In this case, the current SRCU | 346 | * they will have started their SRCU read-side critical section |
330 | * grace period would be OK because the SRCU read-side critical | 347 | * after the current grace period starts. |
331 | * section started after this SRCU grace period started, so the | ||
332 | * grace period is not required to wait for the reader. | ||
333 | * | 348 | * |
334 | * However, the next SRCU grace period would be waiting for the | 349 | * Because it is unlikely that readers will be preempted between |
335 | * other set of counters to go to zero, and therefore would not | 350 | * fetching ->completed and incrementing their counter, wait_idx() |
336 | * wait for the reader, which would be very bad. To avoid this | 351 | * will normally not need to wait. |
337 | * bad scenario, we flip and wait twice, clearing out both sets | ||
338 | * of counters. | ||
339 | */ | 352 | */ |
340 | for (; idx < 2; idx++) | 353 | wait_idx(sp, (sp->completed - 1) & 0x1, expedited); |
341 | flip_idx_and_wait(sp, expedited); | 354 | |
355 | /* | ||
356 | * Now that wait_idx() has waited for the really old readers, | ||
357 | * invoke flip_idx_and_wait() to flip the counter and wait | ||
358 | * for current SRCU readers. | ||
359 | */ | ||
360 | flip_idx_and_wait(sp, expedited); | ||
361 | |||
342 | mutex_unlock(&sp->mutex); | 362 | mutex_unlock(&sp->mutex); |
343 | } | 363 | } |
344 | 364 | ||