diff options
author | Joel Fernandes (Google) <joel@joelfernandes.org> | 2018-07-30 18:24:22 -0400 |
---|---|---|
committer | Steven Rostedt (VMware) <rostedt@goodmis.org> | 2018-07-30 19:13:03 -0400 |
commit | e6753f23d961d601dbae50a2fc2a3975c9715b14 (patch) | |
tree | 54adb707018682f6b6f49a42273ca88012a829b0 /kernel/tracepoint.c | |
parent | 01f38497c6f6525f57eb445887b9ed1867dbd05c (diff) |
tracepoint: Make rcuidle tracepoint callers use SRCU
In recent tests with IRQ on/off tracepoints, a large performance
overhead ~10% is noticed when running hackbench. This is root caused to
calls to rcu_irq_enter_irqson and rcu_irq_exit_irqson from the
tracepoint code. Following a long discussion on the list [1] about this,
we concluded that srcu is a better alternative for use during rcu idle.
Although it does involve extra barriers, its lighter than the sched-rcu
version which has to do additional RCU calls to notify RCU idle about
entry into RCU sections.
In this patch, we change the underlying implementation of the
trace_*_rcuidle API to use SRCU. This has shown to improve performance
alot for the high frequency irq enable/disable tracepoints.
Test: Tested idle and preempt/irq tracepoints.
Here are some performance numbers:
With a run of the following 30 times on a single core x86 Qemu instance
with 1GB memory:
hackbench -g 4 -f 2 -l 3000
Completion times in seconds. CONFIG_PROVE_LOCKING=y.
No patches (without this series)
Mean: 3.048
Median: 3.025
Std Dev: 0.064
With Lockdep using irq tracepoints with RCU implementation:
Mean: 3.451 (-11.66 %)
Median: 3.447 (-12.22%)
Std Dev: 0.049
With Lockdep using irq tracepoints with SRCU implementation (this series):
Mean: 3.020 (I would consider the improvement against the "without
this series" case as just noise).
Median: 3.013
Std Dev: 0.033
[1] https://patchwork.kernel.org/patch/10344297/
[remove rcu_read_lock_sched_notrace as its the equivalent of
preempt_disable_notrace and is unnecessary to call in tracepoint code]
Link: http://lkml.kernel.org/r/20180730222423.196630-3-joel@joelfernandes.org
Cleaned-up-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
[ Simplified WARN_ON_ONCE() ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Diffstat (limited to 'kernel/tracepoint.c')
-rw-r--r-- | kernel/tracepoint.c | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 6dc6356c3327..955148d91b74 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -31,6 +31,9 @@ | |||
31 | extern struct tracepoint * const __start___tracepoints_ptrs[]; | 31 | extern struct tracepoint * const __start___tracepoints_ptrs[]; |
32 | extern struct tracepoint * const __stop___tracepoints_ptrs[]; | 32 | extern struct tracepoint * const __stop___tracepoints_ptrs[]; |
33 | 33 | ||
34 | DEFINE_SRCU(tracepoint_srcu); | ||
35 | EXPORT_SYMBOL_GPL(tracepoint_srcu); | ||
36 | |||
34 | /* Set to 1 to enable tracepoint debug output */ | 37 | /* Set to 1 to enable tracepoint debug output */ |
35 | static const int tracepoint_debug; | 38 | static const int tracepoint_debug; |
36 | 39 | ||
@@ -67,16 +70,27 @@ static inline void *allocate_probes(int count) | |||
67 | return p == NULL ? NULL : p->probes; | 70 | return p == NULL ? NULL : p->probes; |
68 | } | 71 | } |
69 | 72 | ||
70 | static void rcu_free_old_probes(struct rcu_head *head) | 73 | static void srcu_free_old_probes(struct rcu_head *head) |
71 | { | 74 | { |
72 | kfree(container_of(head, struct tp_probes, rcu)); | 75 | kfree(container_of(head, struct tp_probes, rcu)); |
73 | } | 76 | } |
74 | 77 | ||
78 | static void rcu_free_old_probes(struct rcu_head *head) | ||
79 | { | ||
80 | call_srcu(&tracepoint_srcu, head, srcu_free_old_probes); | ||
81 | } | ||
82 | |||
75 | static inline void release_probes(struct tracepoint_func *old) | 83 | static inline void release_probes(struct tracepoint_func *old) |
76 | { | 84 | { |
77 | if (old) { | 85 | if (old) { |
78 | struct tp_probes *tp_probes = container_of(old, | 86 | struct tp_probes *tp_probes = container_of(old, |
79 | struct tp_probes, probes[0]); | 87 | struct tp_probes, probes[0]); |
88 | /* | ||
89 | * Tracepoint probes are protected by both sched RCU and SRCU, | ||
90 | * by calling the SRCU callback in the sched RCU callback we | ||
91 | * cover both cases. So let us chain the SRCU and sched RCU | ||
92 | * callbacks to wait for both grace periods. | ||
93 | */ | ||
80 | call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes); | 94 | call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes); |
81 | } | 95 | } |
82 | } | 96 | } |