diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2012-10-11 15:30:37 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-11-08 14:50:12 -0500 |
commit | 1924bcb0259711eea98491a7942d1ffbf677e114 (patch) | |
tree | b5c98e8b210c60468cf1264b9839a3078869b5d1 /kernel/rcutree.c | |
parent | 7b2e6011f150c42235c4a541d20cf6891afe878a (diff) |
rcu: Avoid counter wrap in synchronize_sched_expedited()
There is a counter scheme similar to ticket locking that
synchronize_sched_expedited() uses to service multiple concurrent
callers with the same expedited grace period. Upon entry, a
sync_sched_expedited_started variable is atomically incremented,
and upon completion of a expedited grace period a separate
sync_sched_expedited_done variable is atomically incremented.
However, if a synchronize_sched_expedited() is delayed while
in try_stop_cpus(), concurrent invocations will increment the
sync_sched_expedited_started counter, which will eventually overflow.
If the original synchronize_sched_expedited() resumes execution just
as the counter overflows, a concurrent invocation could incorrectly
conclude that an expedited grace period elapsed in zero time, which
would be bad. One could rely on counter size to prevent this from
happening in practice, but the goal is to formally validate this
code, so it needs to be fixed anyway.
This commit therefore checks the gap between the two counters before
incrementing sync_sched_expedited_started, and if the gap is too
large, does a normal grace period instead. Overflow is thus only
possible if there are more than about 3.5 billion threads on 32-bit
systems, which can be excluded until such time as task_struct fits
into a single byte and 4G/4G patches are accepted into mainline.
It is also easy to encode this limitation into mechanical theorem
provers.
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 62 |
1 files changed, 44 insertions, 18 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 89148860e2ba..678905555ca4 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -2249,8 +2249,8 @@ void synchronize_rcu_bh(void) | |||
2249 | } | 2249 | } |
2250 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 2250 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
2251 | 2251 | ||
2252 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); | 2252 | static atomic_long_t sync_sched_expedited_started = ATOMIC_LONG_INIT(0); |
2253 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); | 2253 | static atomic_long_t sync_sched_expedited_done = ATOMIC_LONG_INIT(0); |
2254 | 2254 | ||
2255 | static int synchronize_sched_expedited_cpu_stop(void *data) | 2255 | static int synchronize_sched_expedited_cpu_stop(void *data) |
2256 | { | 2256 | { |
@@ -2308,10 +2308,30 @@ static int synchronize_sched_expedited_cpu_stop(void *data) | |||
2308 | */ | 2308 | */ |
2309 | void synchronize_sched_expedited(void) | 2309 | void synchronize_sched_expedited(void) |
2310 | { | 2310 | { |
2311 | int firstsnap, s, snap, trycount = 0; | 2311 | long firstsnap, s, snap; |
2312 | int trycount = 0; | ||
2312 | 2313 | ||
2313 | /* Note that atomic_inc_return() implies full memory barrier. */ | 2314 | /* |
2314 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); | 2315 | * If we are in danger of counter wrap, just do synchronize_sched(). |
2316 | * By allowing sync_sched_expedited_started to advance no more than | ||
2317 | * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring | ||
2318 | * that more than 3.5 billion CPUs would be required to force a | ||
2319 | * counter wrap on a 32-bit system. Quite a few more CPUs would of | ||
2320 | * course be required on a 64-bit system. | ||
2321 | */ | ||
2322 | if (ULONG_CMP_GE((ulong)atomic_read(&sync_sched_expedited_started), | ||
2323 | (ulong)atomic_read(&sync_sched_expedited_done) + | ||
2324 | ULONG_MAX / 8)) { | ||
2325 | synchronize_sched(); | ||
2326 | return; | ||
2327 | } | ||
2328 | |||
2329 | /* | ||
2330 | * Take a ticket. Note that atomic_inc_return() implies a | ||
2331 | * full memory barrier. | ||
2332 | */ | ||
2333 | snap = atomic_long_inc_return(&sync_sched_expedited_started); | ||
2334 | firstsnap = snap; | ||
2315 | get_online_cpus(); | 2335 | get_online_cpus(); |
2316 | WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); | 2336 | WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); |
2317 | 2337 | ||
@@ -2324,6 +2344,13 @@ void synchronize_sched_expedited(void) | |||
2324 | NULL) == -EAGAIN) { | 2344 | NULL) == -EAGAIN) { |
2325 | put_online_cpus(); | 2345 | put_online_cpus(); |
2326 | 2346 | ||
2347 | /* Check to see if someone else did our work for us. */ | ||
2348 | s = atomic_long_read(&sync_sched_expedited_done); | ||
2349 | if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { | ||
2350 | smp_mb(); /* ensure test happens before caller kfree */ | ||
2351 | return; | ||
2352 | } | ||
2353 | |||
2327 | /* No joy, try again later. Or just synchronize_sched(). */ | 2354 | /* No joy, try again later. Or just synchronize_sched(). */ |
2328 | if (trycount++ < 10) { | 2355 | if (trycount++ < 10) { |
2329 | udelay(trycount * num_online_cpus()); | 2356 | udelay(trycount * num_online_cpus()); |
@@ -2332,23 +2359,22 @@ void synchronize_sched_expedited(void) | |||
2332 | return; | 2359 | return; |
2333 | } | 2360 | } |
2334 | 2361 | ||
2335 | /* Check to see if someone else did our work for us. */ | 2362 | /* Recheck to see if someone else did our work for us. */ |
2336 | s = atomic_read(&sync_sched_expedited_done); | 2363 | s = atomic_long_read(&sync_sched_expedited_done); |
2337 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { | 2364 | if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { |
2338 | smp_mb(); /* ensure test happens before caller kfree */ | 2365 | smp_mb(); /* ensure test happens before caller kfree */ |
2339 | return; | 2366 | return; |
2340 | } | 2367 | } |
2341 | 2368 | ||
2342 | /* | 2369 | /* |
2343 | * Refetching sync_sched_expedited_started allows later | 2370 | * Refetching sync_sched_expedited_started allows later |
2344 | * callers to piggyback on our grace period. We subtract | 2371 | * callers to piggyback on our grace period. We retry |
2345 | * 1 to get the same token that the last incrementer got. | 2372 | * after they started, so our grace period works for them, |
2346 | * We retry after they started, so our grace period works | 2373 | * and they started after our first try, so their grace |
2347 | * for them, and they started after our first try, so their | 2374 | * period works for us. |
2348 | * grace period works for us. | ||
2349 | */ | 2375 | */ |
2350 | get_online_cpus(); | 2376 | get_online_cpus(); |
2351 | snap = atomic_read(&sync_sched_expedited_started); | 2377 | snap = atomic_long_read(&sync_sched_expedited_started); |
2352 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | 2378 | smp_mb(); /* ensure read is before try_stop_cpus(). */ |
2353 | } | 2379 | } |
2354 | 2380 | ||
@@ -2356,15 +2382,15 @@ void synchronize_sched_expedited(void) | |||
2356 | * Everyone up to our most recent fetch is covered by our grace | 2382 | * Everyone up to our most recent fetch is covered by our grace |
2357 | * period. Update the counter, but only if our work is still | 2383 | * period. Update the counter, but only if our work is still |
2358 | * relevant -- which it won't be if someone who started later | 2384 | * relevant -- which it won't be if someone who started later |
2359 | * than we did beat us to the punch. | 2385 | * than we did already did their update. |
2360 | */ | 2386 | */ |
2361 | do { | 2387 | do { |
2362 | s = atomic_read(&sync_sched_expedited_done); | 2388 | s = atomic_long_read(&sync_sched_expedited_done); |
2363 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { | 2389 | if (ULONG_CMP_GE((ulong)s, (ulong)snap)) { |
2364 | smp_mb(); /* ensure test happens before caller kfree */ | 2390 | smp_mb(); /* ensure test happens before caller kfree */ |
2365 | break; | 2391 | break; |
2366 | } | 2392 | } |
2367 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); | 2393 | } while (atomic_long_cmpxchg(&sync_sched_expedited_done, s, snap) != s); |
2368 | 2394 | ||
2369 | put_online_cpus(); | 2395 | put_online_cpus(); |
2370 | } | 2396 | } |