diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2017-04-12 18:16:50 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2017-06-08 11:25:23 -0400 |
commit | 881ec9d209d5371c21db89ca1bb19afd3fcadab3 (patch) | |
tree | 04f275f3a297386ecc5a148f97dc4235d162c742 /kernel/rcu/srcutree.c | |
parent | 17ed2b6c3ad9f80174c32cc19d86a15396abc196 (diff) |
srcu: Eliminate possibility of destructive counter overflow
Earlier versions of Tree SRCU were subject to a counter overflow bug that
could theoretically result in too-short grace periods. This commit
eliminates this problem by adding an update-side memory barrier.
The short explanation is that if the updater sums the unlock counts
too late to see a given __srcu_read_unlock() increment, that CPU's
next __srcu_read_lock() must see the new value of ->srcu_idx, thus
incrementing the other bank of counters. This eliminates the possibility
of destructive counter overflow as long as the srcu_read_lock() nesting
level does not exceed floor(ULONG_MAX/NR_CPUS/2), which should be an
eminently reasonable nesting limit, especially on 64-bit systems.
Reported-by: Lance Roy <ldr709@gmail.com>
Suggested-by: Lance Roy <ldr709@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/srcutree.c')
-rw-r--r-- | kernel/rcu/srcutree.c | 33 |
1 files changed, 24 insertions, 9 deletions
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 157654fa436a..fceca84df6b0 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c | |||
@@ -275,15 +275,20 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) | |||
275 | * not mean that there are no more readers, as one could have read | 275 | * not mean that there are no more readers, as one could have read |
276 | * the current index but not have incremented the lock counter yet. | 276 | * the current index but not have incremented the lock counter yet. |
277 | * | 277 | * |
278 | * Possible bug: There is no guarantee that there haven't been | 278 | * So suppose that the updater is preempted here for so long |
279 | * ULONG_MAX increments of ->srcu_lock_count[] since the unlocks were | 279 | * that more than ULONG_MAX non-nested readers come and go in |
280 | * counted, meaning that this could return true even if there are | 280 | * the meantime. It turns out that this cannot result in overflow |
281 | * still active readers. Since there are no memory barriers around | 281 | * because if a reader modifies its unlock count after we read it |
282 | * srcu_flip(), the CPU is not required to increment ->srcu_idx | 282 | * above, then that reader's next load of ->srcu_idx is guaranteed |
283 | * before running srcu_readers_unlock_idx(), which means that there | 283 | * to get the new value, which will cause it to operate on the |
284 | * could be an arbitrarily large number of critical sections that | 284 | * other bank of counters, where it cannot contribute to the |
285 | * execute after srcu_readers_unlock_idx() but use the old value | 285 | * overflow of these counters. This means that there is a maximum |
286 | * of ->srcu_idx. | 286 | * of 2*NR_CPUS increments, which cannot overflow given current |
287 | * systems, especially not on 64-bit systems. | ||
288 | * | ||
289 | * OK, how about nesting? This does impose a limit on nesting | ||
290 | * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient, | ||
291 | * especially on 64-bit systems. | ||
287 | */ | 292 | */ |
288 | return srcu_readers_lock_idx(sp, idx) == unlocks; | 293 | return srcu_readers_lock_idx(sp, idx) == unlocks; |
289 | } | 294 | } |
@@ -671,6 +676,16 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) | |||
671 | */ | 676 | */ |
672 | static void srcu_flip(struct srcu_struct *sp) | 677 | static void srcu_flip(struct srcu_struct *sp) |
673 | { | 678 | { |
679 | /* | ||
680 | * Ensure that if this updater saw a given reader's increment | ||
681 | * from __srcu_read_lock(), that reader was using an old value | ||
682 | * of ->srcu_idx. Also ensure that if a given reader sees the | ||
683 | * new value of ->srcu_idx, this updater's earlier scans cannot | ||
684 | * have seen that reader's increments (which is OK, because this | ||
685 | * grace period need not wait on that reader). | ||
686 | */ | ||
687 | smp_mb(); /* E */ /* Pairs with B and C. */ | ||
688 | |||
674 | WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); | 689 | WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); |
675 | 690 | ||
676 | /* | 691 | /* |