diff options
author | Steven Rostedt <srostedt@redhat.com> | 2009-10-13 16:33:50 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-10-14 02:13:53 -0400 |
commit | 194ec34184869f0de1cf255c924fc5299e1b3d27 (patch) | |
tree | 7eb411e56f381b65bcafd0aa750f6f6705f3e451 /arch | |
parent | 1bac0497ef9af8d933860672223e38bd6ac4934a (diff) |
function-graph/x86: Replace unbalanced ret with jmp
The function graph tracer replaces the return address with a hook
to trace the exit of the function call. This hook will finish by
returning to the real location the function should return to.
But the current implementation uses a ret to jump to the real
return location. This causes a imbalance between calls and ret.
That is the original function does a call, the ret goes to the
handler and then the handler does a ret without a matching call.
Although the function graph tracer itself still breaks the branch
predictor by replacing the original ret, by using a second ret and
causing an imbalance, it breaks the predictor even more.
This patch replaces the ret with a jmp to keep the calls and ret
balanced. I tested this on one box and it showed a 1.7% increase in
performance. Another box only showed a small 0.3% increase. But no
box that I tested this on showed a decrease in performance by
making this change.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20091013203425.042034383@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/entry_32.S | 7 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 6 |
2 files changed, 5 insertions, 8 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c6..7d52e9da5e0c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -1185,17 +1185,14 @@ END(ftrace_graph_caller) | |||
1185 | 1185 | ||
1186 | .globl return_to_handler | 1186 | .globl return_to_handler |
1187 | return_to_handler: | 1187 | return_to_handler: |
1188 | pushl $0 | ||
1189 | pushl %eax | 1188 | pushl %eax |
1190 | pushl %ecx | ||
1191 | pushl %edx | 1189 | pushl %edx |
1192 | movl %ebp, %eax | 1190 | movl %ebp, %eax |
1193 | call ftrace_return_to_handler | 1191 | call ftrace_return_to_handler |
1194 | movl %eax, 0xc(%esp) | 1192 | movl %eax, %ecx |
1195 | popl %edx | 1193 | popl %edx |
1196 | popl %ecx | ||
1197 | popl %eax | 1194 | popl %eax |
1198 | ret | 1195 | jmp *%ecx |
1199 | #endif | 1196 | #endif |
1200 | 1197 | ||
1201 | .section .rodata,"a" | 1198 | .section .rodata,"a" |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f8f358..bd5bbddddf91 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler) | |||
155 | 155 | ||
156 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
157 | 157 | ||
158 | movq %rax, 16(%rsp) | 158 | movq %rax, %rdi |
159 | movq 8(%rsp), %rdx | 159 | movq 8(%rsp), %rdx |
160 | movq (%rsp), %rax | 160 | movq (%rsp), %rax |
161 | addq $16, %rsp | 161 | addq $24, %rsp |
162 | retq | 162 | jmp *%rdi |
163 | #endif | 163 | #endif |
164 | 164 | ||
165 | 165 | ||