diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2012-05-29 17:56:46 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-07-02 15:33:23 -0400 |
commit | cf3a9c4842b1e097dbe0854933c471d43dd24f69 (patch) | |
tree | 5b2737cade79c491ba46cd642c01cc165118d723 | |
parent | cfed0a85dad921c683e9c0d25b072bcc5745ede0 (diff) |
rcu: Increase rcu_barrier() concurrency
The traditional rcu_barrier() implementation has serialized all requests,
regardless of RCU flavor, and also does not coalesce concurrent requests.
In the past, this has been good and sufficient.
However, systems are getting larger and use of rcu_barrier() has been
increasing. This commit therefore introduces a counter-based scheme
that allows _rcu_barrier() calls for the same flavor of RCU to take
advantage of each others' work.
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r-- | kernel/rcutree.c | 36 | ||||
-rw-r--r-- | kernel/rcutree.h | 2 |
2 files changed, 37 insertions, 1 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 44a8fda9be86..6bb5d562253f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -2294,13 +2294,41 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
2294 | unsigned long flags; | 2294 | unsigned long flags; |
2295 | struct rcu_data *rdp; | 2295 | struct rcu_data *rdp; |
2296 | struct rcu_data rd; | 2296 | struct rcu_data rd; |
2297 | unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); | ||
2298 | unsigned long snap_done; | ||
2297 | 2299 | ||
2298 | init_rcu_head_on_stack(&rd.barrier_head); | 2300 | init_rcu_head_on_stack(&rd.barrier_head); |
2299 | 2301 | ||
2300 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 2302 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
2301 | mutex_lock(&rsp->barrier_mutex); | 2303 | mutex_lock(&rsp->barrier_mutex); |
2302 | 2304 | ||
2303 | smp_mb(); /* Prevent any prior operations from leaking in. */ | 2305 | /* |
2306 | * Ensure that all prior references, including to ->n_barrier_done, | ||
2307 | * are ordered before the _rcu_barrier() machinery. | ||
2308 | */ | ||
2309 | smp_mb(); /* See above block comment. */ | ||
2310 | |||
2311 | /* | ||
2312 | * Recheck ->n_barrier_done to see if others did our work for us. | ||
2313 | * This means checking ->n_barrier_done for an even-to-odd-to-even | ||
2314 | * transition. The "if" expression below therefore rounds the old | ||
2315 | * value up to the next even number and adds two before comparing. | ||
2316 | */ | ||
2317 | snap_done = ACCESS_ONCE(rsp->n_barrier_done); | ||
2318 | if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { | ||
2319 | smp_mb(); /* caller's subsequent code after above check. */ | ||
2320 | mutex_unlock(&rsp->barrier_mutex); | ||
2321 | return; | ||
2322 | } | ||
2323 | |||
2324 | /* | ||
2325 | * Increment ->n_barrier_done to avoid duplicate work. Use | ||
2326 | * ACCESS_ONCE() to prevent the compiler from speculating | ||
2327 | * the increment to precede the early-exit check. | ||
2328 | */ | ||
2329 | ACCESS_ONCE(rsp->n_barrier_done)++; | ||
2330 | WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); | ||
2331 | smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ | ||
2304 | 2332 | ||
2305 | /* | 2333 | /* |
2306 | * Initialize the count to one rather than to zero in order to | 2334 | * Initialize the count to one rather than to zero in order to |
@@ -2371,6 +2399,12 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
2371 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) | 2399 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) |
2372 | complete(&rsp->barrier_completion); | 2400 | complete(&rsp->barrier_completion); |
2373 | 2401 | ||
2402 | /* Increment ->n_barrier_done to prevent duplicate work. */ | ||
2403 | smp_mb(); /* Keep increment after above mechanism. */ | ||
2404 | ACCESS_ONCE(rsp->n_barrier_done)++; | ||
2405 | WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); | ||
2406 | smp_mb(); /* Keep increment before caller's subsequent code. */ | ||
2407 | |||
2374 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ | 2408 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ |
2375 | wait_for_completion(&rsp->barrier_completion); | 2409 | wait_for_completion(&rsp->barrier_completion); |
2376 | 2410 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7641aec3e59c..be10286ad380 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -403,6 +403,8 @@ struct rcu_state { | |||
403 | struct mutex barrier_mutex; /* Guards barrier fields. */ | 403 | struct mutex barrier_mutex; /* Guards barrier fields. */ |
404 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ | 404 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ |
405 | struct completion barrier_completion; /* Wake at barrier end. */ | 405 | struct completion barrier_completion; /* Wake at barrier end. */ |
406 | unsigned long n_barrier_done; /* ++ at start and end of */ | ||
407 | /* _rcu_barrier(). */ | ||
406 | raw_spinlock_t fqslock; /* Only one task forcing */ | 408 | raw_spinlock_t fqslock; /* Only one task forcing */ |
407 | /* quiescent states. */ | 409 | /* quiescent states. */ |
408 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 410 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |