diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-06-29 20:06:39 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-07-17 17:59:00 -0400 |
commit | 2cd6ffafec066118365f6d7eb7a42ea16c1f032c (patch) | |
tree | 39656499f5a78c4b61528904e3464c2403a0b83b /kernel/rcu/tree.c | |
parent | 704dd435ac7eaefa89fcd82fd2876b8330e00ff3 (diff) |
rcu: Extend expedited funnel locking to rcu_data structure
The strictly rcu_node based funnel-locking scheme works well in many
cases, but systems with CONFIG_RCU_FANOUT_LEAF=64 won't necessarily get
all that much concurrency. This commit therefore extends the funnel
locking into the per-CPU rcu_data structure, providing concurrency equal
to the number of CPUs.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a905d3ba8673..e45097fc39fa 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -3312,11 +3312,14 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) | |||
3312 | 3312 | ||
3313 | /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ | 3313 | /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ |
3314 | static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, | 3314 | static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, |
3315 | struct rcu_data *rdp, | ||
3315 | atomic_long_t *stat, unsigned long s) | 3316 | atomic_long_t *stat, unsigned long s) |
3316 | { | 3317 | { |
3317 | if (rcu_exp_gp_seq_done(rsp, s)) { | 3318 | if (rcu_exp_gp_seq_done(rsp, s)) { |
3318 | if (rnp) | 3319 | if (rnp) |
3319 | mutex_unlock(&rnp->exp_funnel_mutex); | 3320 | mutex_unlock(&rnp->exp_funnel_mutex); |
3321 | else if (rdp) | ||
3322 | mutex_unlock(&rdp->exp_funnel_mutex); | ||
3320 | /* Ensure test happens before caller kfree(). */ | 3323 | /* Ensure test happens before caller kfree(). */ |
3321 | smp_mb__before_atomic(); /* ^^^ */ | 3324 | smp_mb__before_atomic(); /* ^^^ */ |
3322 | atomic_long_inc(stat); | 3325 | atomic_long_inc(stat); |
@@ -3332,6 +3335,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, | |||
3332 | */ | 3335 | */ |
3333 | static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) | 3336 | static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) |
3334 | { | 3337 | { |
3338 | struct rcu_data *rdp; | ||
3335 | struct rcu_node *rnp0; | 3339 | struct rcu_node *rnp0; |
3336 | struct rcu_node *rnp1 = NULL; | 3340 | struct rcu_node *rnp1 = NULL; |
3337 | 3341 | ||
@@ -3343,16 +3347,24 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) | |||
3343 | * can be inexact, as it is just promoting locality and is not | 3347 | * can be inexact, as it is just promoting locality and is not |
3344 | * strictly needed for correctness. | 3348 | * strictly needed for correctness. |
3345 | */ | 3349 | */ |
3346 | rnp0 = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode; | 3350 | rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); |
3351 | if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s)) | ||
3352 | return NULL; | ||
3353 | mutex_lock(&rdp->exp_funnel_mutex); | ||
3354 | rnp0 = rdp->mynode; | ||
3347 | for (; rnp0 != NULL; rnp0 = rnp0->parent) { | 3355 | for (; rnp0 != NULL; rnp0 = rnp0->parent) { |
3348 | if (sync_exp_work_done(rsp, rnp1, &rsp->expedited_workdone1, s)) | 3356 | if (sync_exp_work_done(rsp, rnp1, rdp, |
3357 | &rsp->expedited_workdone2, s)) | ||
3349 | return NULL; | 3358 | return NULL; |
3350 | mutex_lock(&rnp0->exp_funnel_mutex); | 3359 | mutex_lock(&rnp0->exp_funnel_mutex); |
3351 | if (rnp1) | 3360 | if (rnp1) |
3352 | mutex_unlock(&rnp1->exp_funnel_mutex); | 3361 | mutex_unlock(&rnp1->exp_funnel_mutex); |
3362 | else | ||
3363 | mutex_unlock(&rdp->exp_funnel_mutex); | ||
3353 | rnp1 = rnp0; | 3364 | rnp1 = rnp0; |
3354 | } | 3365 | } |
3355 | if (sync_exp_work_done(rsp, rnp1, &rsp->expedited_workdone2, s)) | 3366 | if (sync_exp_work_done(rsp, rnp1, rdp, |
3367 | &rsp->expedited_workdone3, s)) | ||
3356 | return NULL; | 3368 | return NULL; |
3357 | return rnp1; | 3369 | return rnp1; |
3358 | } | 3370 | } |
@@ -3733,6 +3745,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
3733 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 3745 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
3734 | rdp->cpu = cpu; | 3746 | rdp->cpu = cpu; |
3735 | rdp->rsp = rsp; | 3747 | rdp->rsp = rsp; |
3748 | mutex_init(&rdp->exp_funnel_mutex); | ||
3736 | rcu_boot_init_nocb_percpu_data(rdp); | 3749 | rcu_boot_init_nocb_percpu_data(rdp); |
3737 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3750 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
3738 | } | 3751 | } |