diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-10-01 13:26:24 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-12-04 15:26:52 -0500 |
commit | df5bd5144a80a9f6c3807383b11f735dae9caf9d (patch) | |
tree | fce44d0970a70446c5fd6b50f2e764db0efb8e56 /kernel/rcu/tree_trace.c | |
parent | 1307f2148719cc9e9d12f5fa7d5b3b61ec5aef72 (diff) |
rcu: Reduce expedited GP memory contention via per-CPU variables
Currently, the piggybacked-work checks carried out by sync_exp_work_done()
atomically increment a small set of variables (the ->expedited_workdone0,
->expedited_workdone1, ->expedited_workdone2, ->expedited_workdone3
fields in the rcu_state structure), which will form a memory-contention
bottleneck given a sufficiently large number of CPUs concurrently invoking
either synchronize_rcu_expedited() or synchronize_sched_expedited().
This commit therefore moves these for fields to the per-CPU rcu_data
structure, eliminating the memory contention. The show_rcuexp() function
also changes to sum up each field in the rcu_data structures.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree_trace.c')
-rw-r--r-- | kernel/rcu/tree_trace.c | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 8efaba870d96..d43649450ea4 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c | |||
@@ -183,14 +183,20 @@ static const struct file_operations rcudata_fops = { | |||
183 | 183 | ||
184 | static int show_rcuexp(struct seq_file *m, void *v) | 184 | static int show_rcuexp(struct seq_file *m, void *v) |
185 | { | 185 | { |
186 | int cpu; | ||
186 | struct rcu_state *rsp = (struct rcu_state *)m->private; | 187 | struct rcu_state *rsp = (struct rcu_state *)m->private; |
187 | 188 | struct rcu_data *rdp; | |
189 | unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0; | ||
190 | |||
191 | for_each_possible_cpu(cpu) { | ||
192 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
193 | s0 += atomic_long_read(&rdp->expedited_workdone0); | ||
194 | s1 += atomic_long_read(&rdp->expedited_workdone1); | ||
195 | s2 += atomic_long_read(&rdp->expedited_workdone2); | ||
196 | s3 += atomic_long_read(&rdp->expedited_workdone3); | ||
197 | } | ||
188 | seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", | 198 | seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", |
189 | rsp->expedited_sequence, | 199 | rsp->expedited_sequence, s0, s1, s2, s3, |
190 | atomic_long_read(&rsp->expedited_workdone0), | ||
191 | atomic_long_read(&rsp->expedited_workdone1), | ||
192 | atomic_long_read(&rsp->expedited_workdone2), | ||
193 | atomic_long_read(&rsp->expedited_workdone3), | ||
194 | atomic_long_read(&rsp->expedited_normal), | 200 | atomic_long_read(&rsp->expedited_normal), |
195 | atomic_read(&rsp->expedited_need_qs), | 201 | atomic_read(&rsp->expedited_need_qs), |
196 | rsp->expedited_sequence / 2); | 202 | rsp->expedited_sequence / 2); |