aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-01-30 20:23:19 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-03-31 16:34:07 -0400
commite2fd9d35847d1936398d44c4df68dceb3d7f64e7 (patch)
tree1893e200e6f5e87065b38fedfc9f74d19a4079b7
parent4f41530245c7fd4837152e264d120d05ae940eb0 (diff)
rcu: Remove expedited GP funnel-lock bypass
Commit #cdacbe1f91264 ("rcu: Add fastpath bypassing funnel locking") turns out to be a pessimization at high load because it forces a tree full of tasks to wait for an expedited grace period that they probably do not need. This commit therefore removes this optimization. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--Documentation/RCU/trace.txt10
-rw-r--r--kernel/rcu/tree.c19
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_trace.c7
4 files changed, 8 insertions, 29 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index ec6998b1b6d0..00a3a38b375a 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
237 237
238The output of "cat rcu/rcu_preempt/rcuexp" looks as follows: 238The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
239 239
240s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872 240s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
241 241
242These fields are as follows: 242These fields are as follows:
243 243
244o "s" is the sequence number, with an odd number indicating that 244o "s" is the sequence number, with an odd number indicating that
245 an expedited grace period is in progress. 245 an expedited grace period is in progress.
246 246
247o "wd0", "wd1", "wd2", and "wd3" are the number of times that an 247o "wd1", "wd2", and "wd3" are the number of times that an attempt
248 attempt to start an expedited grace period found that someone 248 to start an expedited grace period found that someone else had
249 else had completed an expedited grace period that satisfies the 249 completed an expedited grace period that satisfies the attempted
250 attempted request. "Our work is done." 250 request. "Our work is done."
251 251
252o "n" is number of times that a concurrent CPU-hotplug operation 252o "n" is number of times that a concurrent CPU-hotplug operation
253 forced a fallback to a normal grace period. 253 forced a fallback to a normal grace period.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 524026fd9dd7..62e73e0a929f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3617,25 +3617,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
3617 struct rcu_node *rnp1 = NULL; 3617 struct rcu_node *rnp1 = NULL;
3618 3618
3619 /* 3619 /*
3620 * First try directly acquiring the root lock in order to reduce
3621 * latency in the common case where expedited grace periods are
3622 * rare. We check mutex_is_locked() to avoid pathological levels of
3623 * memory contention on ->exp_funnel_mutex in the heavy-load case.
3624 */
3625 rnp0 = rcu_get_root(rsp);
3626 if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
3627 if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
3628 trace_rcu_exp_funnel_lock(rsp->name, rnp0->level,
3629 rnp0->grplo, rnp0->grphi,
3630 TPS("acq"));
3631 if (sync_exp_work_done(rsp, rnp0, NULL,
3632 &rdp->expedited_workdone0, s))
3633 return NULL;
3634 return rnp0;
3635 }
3636 }
3637
3638 /*
3639 * Each pass through the following loop works its way 3620 * Each pass through the following loop works its way
3640 * up the rcu_node tree, returning if others have done the 3621 * up the rcu_node tree, returning if others have done the
3641 * work or otherwise falls through holding the root rnp's 3622 * work or otherwise falls through holding the root rnp's
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index df668c0f9e64..ac9a7b0c36ae 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -388,7 +388,6 @@ struct rcu_data {
388 struct rcu_head oom_head; 388 struct rcu_head oom_head;
389#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 389#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
390 struct mutex exp_funnel_mutex; 390 struct mutex exp_funnel_mutex;
391 atomic_long_t expedited_workdone0; /* # done by others #0. */
392 atomic_long_t expedited_workdone1; /* # done by others #1. */ 391 atomic_long_t expedited_workdone1; /* # done by others #1. */
393 atomic_long_t expedited_workdone2; /* # done by others #2. */ 392 atomic_long_t expedited_workdone2; /* # done by others #2. */
394 atomic_long_t expedited_workdone3; /* # done by others #3. */ 393 atomic_long_t expedited_workdone3; /* # done by others #3. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 1088e64f01ad..d149c412a4e5 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
185 int cpu; 185 int cpu;
186 struct rcu_state *rsp = (struct rcu_state *)m->private; 186 struct rcu_state *rsp = (struct rcu_state *)m->private;
187 struct rcu_data *rdp; 187 struct rcu_data *rdp;
188 unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0; 188 unsigned long s1 = 0, s2 = 0, s3 = 0;
189 189
190 for_each_possible_cpu(cpu) { 190 for_each_possible_cpu(cpu) {
191 rdp = per_cpu_ptr(rsp->rda, cpu); 191 rdp = per_cpu_ptr(rsp->rda, cpu);
192 s0 += atomic_long_read(&rdp->expedited_workdone0);
193 s1 += atomic_long_read(&rdp->expedited_workdone1); 192 s1 += atomic_long_read(&rdp->expedited_workdone1);
194 s2 += atomic_long_read(&rdp->expedited_workdone2); 193 s2 += atomic_long_read(&rdp->expedited_workdone2);
195 s3 += atomic_long_read(&rdp->expedited_workdone3); 194 s3 += atomic_long_read(&rdp->expedited_workdone3);
196 } 195 }
197 seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", 196 seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
198 rsp->expedited_sequence, s0, s1, s2, s3, 197 rsp->expedited_sequence, s1, s2, s3,
199 atomic_long_read(&rsp->expedited_normal), 198 atomic_long_read(&rsp->expedited_normal),
200 atomic_read(&rsp->expedited_need_qs), 199 atomic_read(&rsp->expedited_need_qs),
201 rsp->expedited_sequence / 2); 200 rsp->expedited_sequence / 2);