aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-06-29 20:06:39 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-07-17 17:59:00 -0400
commit2cd6ffafec066118365f6d7eb7a42ea16c1f032c (patch)
tree39656499f5a78c4b61528904e3464c2403a0b83b /kernel/rcu
parent704dd435ac7eaefa89fcd82fd2876b8330e00ff3 (diff)
rcu: Extend expedited funnel locking to rcu_data structure
The strictly rcu_node based funnel-locking scheme works well in many cases, but systems with CONFIG_RCU_FANOUT_LEAF=64 won't necessarily get all that much concurrency. This commit therefore extends the funnel locking into the per-CPU rcu_data structure, providing concurrency equal to the number of CPUs. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/tree.c19
-rw-r--r--kernel/rcu/tree.h4
-rw-r--r--kernel/rcu/tree_trace.c3
3 files changed, 21 insertions, 5 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a905d3ba8673..e45097fc39fa 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3312,11 +3312,14 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
3312 3312
3313/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ 3313/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
3314static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, 3314static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
3315 struct rcu_data *rdp,
3315 atomic_long_t *stat, unsigned long s) 3316 atomic_long_t *stat, unsigned long s)
3316{ 3317{
3317 if (rcu_exp_gp_seq_done(rsp, s)) { 3318 if (rcu_exp_gp_seq_done(rsp, s)) {
3318 if (rnp) 3319 if (rnp)
3319 mutex_unlock(&rnp->exp_funnel_mutex); 3320 mutex_unlock(&rnp->exp_funnel_mutex);
3321 else if (rdp)
3322 mutex_unlock(&rdp->exp_funnel_mutex);
3320 /* Ensure test happens before caller kfree(). */ 3323 /* Ensure test happens before caller kfree(). */
3321 smp_mb__before_atomic(); /* ^^^ */ 3324 smp_mb__before_atomic(); /* ^^^ */
3322 atomic_long_inc(stat); 3325 atomic_long_inc(stat);
@@ -3332,6 +3335,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
3332 */ 3335 */
3333static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) 3336static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
3334{ 3337{
3338 struct rcu_data *rdp;
3335 struct rcu_node *rnp0; 3339 struct rcu_node *rnp0;
3336 struct rcu_node *rnp1 = NULL; 3340 struct rcu_node *rnp1 = NULL;
3337 3341
@@ -3343,16 +3347,24 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
3343 * can be inexact, as it is just promoting locality and is not 3347 * can be inexact, as it is just promoting locality and is not
3344 * strictly needed for correctness. 3348 * strictly needed for correctness.
3345 */ 3349 */
3346 rnp0 = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode; 3350 rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
3351 if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
3352 return NULL;
3353 mutex_lock(&rdp->exp_funnel_mutex);
3354 rnp0 = rdp->mynode;
3347 for (; rnp0 != NULL; rnp0 = rnp0->parent) { 3355 for (; rnp0 != NULL; rnp0 = rnp0->parent) {
3348 if (sync_exp_work_done(rsp, rnp1, &rsp->expedited_workdone1, s)) 3356 if (sync_exp_work_done(rsp, rnp1, rdp,
3357 &rsp->expedited_workdone2, s))
3349 return NULL; 3358 return NULL;
3350 mutex_lock(&rnp0->exp_funnel_mutex); 3359 mutex_lock(&rnp0->exp_funnel_mutex);
3351 if (rnp1) 3360 if (rnp1)
3352 mutex_unlock(&rnp1->exp_funnel_mutex); 3361 mutex_unlock(&rnp1->exp_funnel_mutex);
3362 else
3363 mutex_unlock(&rdp->exp_funnel_mutex);
3353 rnp1 = rnp0; 3364 rnp1 = rnp0;
3354 } 3365 }
3355 if (sync_exp_work_done(rsp, rnp1, &rsp->expedited_workdone2, s)) 3366 if (sync_exp_work_done(rsp, rnp1, rdp,
3367 &rsp->expedited_workdone3, s))
3356 return NULL; 3368 return NULL;
3357 return rnp1; 3369 return rnp1;
3358} 3370}
@@ -3733,6 +3745,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3733 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 3745 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
3734 rdp->cpu = cpu; 3746 rdp->cpu = cpu;
3735 rdp->rsp = rsp; 3747 rdp->rsp = rsp;
3748 mutex_init(&rdp->exp_funnel_mutex);
3736 rcu_boot_init_nocb_percpu_data(rdp); 3749 rcu_boot_init_nocb_percpu_data(rdp);
3737 raw_spin_unlock_irqrestore(&rnp->lock, flags); 3750 raw_spin_unlock_irqrestore(&rnp->lock, flags);
3738} 3751}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 5c1042d9c310..efee84ce1e08 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -364,11 +364,12 @@ struct rcu_data {
364 unsigned long n_rp_nocb_defer_wakeup; 364 unsigned long n_rp_nocb_defer_wakeup;
365 unsigned long n_rp_need_nothing; 365 unsigned long n_rp_need_nothing;
366 366
367 /* 6) _rcu_barrier() and OOM callbacks. */ 367 /* 6) _rcu_barrier(), OOM callbacks, and expediting. */
368 struct rcu_head barrier_head; 368 struct rcu_head barrier_head;
369#ifdef CONFIG_RCU_FAST_NO_HZ 369#ifdef CONFIG_RCU_FAST_NO_HZ
370 struct rcu_head oom_head; 370 struct rcu_head oom_head;
371#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 371#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
372 struct mutex exp_funnel_mutex;
372 373
373 /* 7) Callback offloading. */ 374 /* 7) Callback offloading. */
374#ifdef CONFIG_RCU_NOCB_CPU 375#ifdef CONFIG_RCU_NOCB_CPU
@@ -494,6 +495,7 @@ struct rcu_state {
494 atomic_long_t expedited_tryfail; /* # acquisition failures. */ 495 atomic_long_t expedited_tryfail; /* # acquisition failures. */
495 atomic_long_t expedited_workdone1; /* # done by others #1. */ 496 atomic_long_t expedited_workdone1; /* # done by others #1. */
496 atomic_long_t expedited_workdone2; /* # done by others #2. */ 497 atomic_long_t expedited_workdone2; /* # done by others #2. */
498 atomic_long_t expedited_workdone3; /* # done by others #3. */
497 atomic_long_t expedited_normal; /* # fallbacks to normal. */ 499 atomic_long_t expedited_normal; /* # fallbacks to normal. */
498 atomic_t expedited_need_qs; /* # CPUs left to check in. */ 500 atomic_t expedited_need_qs; /* # CPUs left to check in. */
499 wait_queue_head_t expedited_wq; /* Wait for check-ins. */ 501 wait_queue_head_t expedited_wq; /* Wait for check-ins. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index d9982a2ce305..ec62369f1b02 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,11 +185,12 @@ static int show_rcuexp(struct seq_file *m, void *v)
185{ 185{
186 struct rcu_state *rsp = (struct rcu_state *)m->private; 186 struct rcu_state *rsp = (struct rcu_state *)m->private;
187 187
188 seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu enq=%d sc=%lu\n", 188 seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
189 rsp->expedited_sequence, 189 rsp->expedited_sequence,
190 atomic_long_read(&rsp->expedited_tryfail), 190 atomic_long_read(&rsp->expedited_tryfail),
191 atomic_long_read(&rsp->expedited_workdone1), 191 atomic_long_read(&rsp->expedited_workdone1),
192 atomic_long_read(&rsp->expedited_workdone2), 192 atomic_long_read(&rsp->expedited_workdone2),
193 atomic_long_read(&rsp->expedited_workdone3),
193 atomic_long_read(&rsp->expedited_normal), 194 atomic_long_read(&rsp->expedited_normal),
194 atomic_read(&rsp->expedited_need_qs), 195 atomic_read(&rsp->expedited_need_qs),
195 rsp->expedited_sequence / 2); 196 rsp->expedited_sequence / 2);