aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2012-10-11 15:30:37 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-11-08 14:50:12 -0500
commit1924bcb0259711eea98491a7942d1ffbf677e114 (patch)
treeb5c98e8b210c60468cf1264b9839a3078869b5d1 /kernel/rcutree.c
parent7b2e6011f150c42235c4a541d20cf6891afe878a (diff)
rcu: Avoid counter wrap in synchronize_sched_expedited()
There is a counter scheme similar to ticket locking that synchronize_sched_expedited() uses to service multiple concurrent callers with the same expedited grace period. Upon entry, a sync_sched_expedited_started variable is atomically incremented, and upon completion of a expedited grace period a separate sync_sched_expedited_done variable is atomically incremented. However, if a synchronize_sched_expedited() is delayed while in try_stop_cpus(), concurrent invocations will increment the sync_sched_expedited_started counter, which will eventually overflow. If the original synchronize_sched_expedited() resumes execution just as the counter overflows, a concurrent invocation could incorrectly conclude that an expedited grace period elapsed in zero time, which would be bad. One could rely on counter size to prevent this from happening in practice, but the goal is to formally validate this code, so it needs to be fixed anyway. This commit therefore checks the gap between the two counters before incrementing sync_sched_expedited_started, and if the gap is too large, does a normal grace period instead. Overflow is thus only possible if there are more than about 3.5 billion threads on 32-bit systems, which can be excluded until such time as task_struct fits into a single byte and 4G/4G patches are accepted into mainline. It is also easy to encode this limitation into mechanical theorem provers. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c62
1 files changed, 44 insertions, 18 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 89148860e2ba..678905555ca4 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2249,8 +2249,8 @@ void synchronize_rcu_bh(void)
2249} 2249}
2250EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 2250EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
2251 2251
2252static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); 2252static atomic_long_t sync_sched_expedited_started = ATOMIC_LONG_INIT(0);
2253static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); 2253static atomic_long_t sync_sched_expedited_done = ATOMIC_LONG_INIT(0);
2254 2254
2255static int synchronize_sched_expedited_cpu_stop(void *data) 2255static int synchronize_sched_expedited_cpu_stop(void *data)
2256{ 2256{
@@ -2308,10 +2308,30 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
2308 */ 2308 */
2309void synchronize_sched_expedited(void) 2309void synchronize_sched_expedited(void)
2310{ 2310{
2311 int firstsnap, s, snap, trycount = 0; 2311 long firstsnap, s, snap;
2312 int trycount = 0;
2312 2313
2313 /* Note that atomic_inc_return() implies full memory barrier. */ 2314 /*
2314 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); 2315 * If we are in danger of counter wrap, just do synchronize_sched().
2316 * By allowing sync_sched_expedited_started to advance no more than
2317 * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
2318 * that more than 3.5 billion CPUs would be required to force a
2319 * counter wrap on a 32-bit system. Quite a few more CPUs would of
2320 * course be required on a 64-bit system.
2321 */
2322 if (ULONG_CMP_GE((ulong)atomic_read(&sync_sched_expedited_started),
2323 (ulong)atomic_read(&sync_sched_expedited_done) +
2324 ULONG_MAX / 8)) {
2325 synchronize_sched();
2326 return;
2327 }
2328
2329 /*
2330 * Take a ticket. Note that atomic_inc_return() implies a
2331 * full memory barrier.
2332 */
2333 snap = atomic_long_inc_return(&sync_sched_expedited_started);
2334 firstsnap = snap;
2315 get_online_cpus(); 2335 get_online_cpus();
2316 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); 2336 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
2317 2337
@@ -2324,6 +2344,13 @@ void synchronize_sched_expedited(void)
2324 NULL) == -EAGAIN) { 2344 NULL) == -EAGAIN) {
2325 put_online_cpus(); 2345 put_online_cpus();
2326 2346
2347 /* Check to see if someone else did our work for us. */
2348 s = atomic_long_read(&sync_sched_expedited_done);
2349 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2350 smp_mb(); /* ensure test happens before caller kfree */
2351 return;
2352 }
2353
2327 /* No joy, try again later. Or just synchronize_sched(). */ 2354 /* No joy, try again later. Or just synchronize_sched(). */
2328 if (trycount++ < 10) { 2355 if (trycount++ < 10) {
2329 udelay(trycount * num_online_cpus()); 2356 udelay(trycount * num_online_cpus());
@@ -2332,23 +2359,22 @@ void synchronize_sched_expedited(void)
2332 return; 2359 return;
2333 } 2360 }
2334 2361
2335 /* Check to see if someone else did our work for us. */ 2362 /* Recheck to see if someone else did our work for us. */
2336 s = atomic_read(&sync_sched_expedited_done); 2363 s = atomic_long_read(&sync_sched_expedited_done);
2337 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { 2364 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2338 smp_mb(); /* ensure test happens before caller kfree */ 2365 smp_mb(); /* ensure test happens before caller kfree */
2339 return; 2366 return;
2340 } 2367 }
2341 2368
2342 /* 2369 /*
2343 * Refetching sync_sched_expedited_started allows later 2370 * Refetching sync_sched_expedited_started allows later
2344 * callers to piggyback on our grace period. We subtract 2371 * callers to piggyback on our grace period. We retry
2345 * 1 to get the same token that the last incrementer got. 2372 * after they started, so our grace period works for them,
2346 * We retry after they started, so our grace period works 2373 * and they started after our first try, so their grace
2347 * for them, and they started after our first try, so their 2374 * period works for us.
2348 * grace period works for us.
2349 */ 2375 */
2350 get_online_cpus(); 2376 get_online_cpus();
2351 snap = atomic_read(&sync_sched_expedited_started); 2377 snap = atomic_long_read(&sync_sched_expedited_started);
2352 smp_mb(); /* ensure read is before try_stop_cpus(). */ 2378 smp_mb(); /* ensure read is before try_stop_cpus(). */
2353 } 2379 }
2354 2380
@@ -2356,15 +2382,15 @@ void synchronize_sched_expedited(void)
2356 * Everyone up to our most recent fetch is covered by our grace 2382 * Everyone up to our most recent fetch is covered by our grace
2357 * period. Update the counter, but only if our work is still 2383 * period. Update the counter, but only if our work is still
2358 * relevant -- which it won't be if someone who started later 2384 * relevant -- which it won't be if someone who started later
2359 * than we did beat us to the punch. 2385 * than we did already did their update.
2360 */ 2386 */
2361 do { 2387 do {
2362 s = atomic_read(&sync_sched_expedited_done); 2388 s = atomic_long_read(&sync_sched_expedited_done);
2363 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { 2389 if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
2364 smp_mb(); /* ensure test happens before caller kfree */ 2390 smp_mb(); /* ensure test happens before caller kfree */
2365 break; 2391 break;
2366 } 2392 }
2367 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); 2393 } while (atomic_long_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
2368 2394
2369 put_online_cpus(); 2395 put_online_cpus();
2370} 2396}