aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2014-09-03 07:26:23 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2014-09-09 02:53:30 -0400
commit3d1e220d08c6a00ffa83d39030b8162f66665b2b (patch)
tree4529f0d568ef53d296476a640d26ae0128bcbacf /arch/s390
parentea2f47699082b971769be8b8f38c08b49219f471 (diff)
s390/ftrace: optimize mcount code
Reduce the number of executed instructions within the mcount block if function tracing is enabled. We achieve that by using a non-standard C function call ABI. Since the called function is also written in assembler this is not a problem. This also allows to replace the unconditional store at the beginning of the mcount block with a larl instruction, which doesn't touch memory. In theory we could also patch the first instruction of the mcount block to enable and disable function tracing. However this would break kprobes. This could be fixed with implementing the "kprobes_on_ftrace" feature; however keeping the odd jprobes working seems not to be possible without a lot of code churn. Therefore keep the code easy and simply accept one wasted 1-cycle "larl" instruction per function prologue. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/ftrace.h2
-rw-r--r--arch/s390/kernel/ftrace.c57
-rw-r--r--arch/s390/kernel/mcount64.S30
3 files changed, 55 insertions, 34 deletions
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 1759d73fb95b..d419362dc231 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -19,7 +19,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
19#endif /* __ASSEMBLY__ */ 19#endif /* __ASSEMBLY__ */
20 20
21#ifdef CONFIG_64BIT 21#ifdef CONFIG_64BIT
22#define MCOUNT_INSN_SIZE 12 22#define MCOUNT_INSN_SIZE 18
23#else 23#else
24#define MCOUNT_INSN_SIZE 22 24#define MCOUNT_INSN_SIZE 22
25#endif 25#endif
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index f908e42e11c4..fcb009d3edde 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Dynamic function tracer architecture backend. 2 * Dynamic function tracer architecture backend.
3 * 3 *
4 * Copyright IBM Corp. 2009 4 * Copyright IBM Corp. 2009,2014
5 * 5 *
6 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, 6 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
7 * Martin Schwidefsky <schwidefsky@de.ibm.com> 7 * Martin Schwidefsky <schwidefsky@de.ibm.com>
@@ -17,6 +17,7 @@
17#include <asm/asm-offsets.h> 17#include <asm/asm-offsets.h>
18#include "entry.h" 18#include "entry.h"
19 19
20void mcount_replace_code(void);
20void ftrace_disable_code(void); 21void ftrace_disable_code(void);
21void ftrace_enable_insn(void); 22void ftrace_enable_insn(void);
22 23
@@ -24,38 +25,50 @@ void ftrace_enable_insn(void);
24/* 25/*
25 * The 64-bit mcount code looks like this: 26 * The 64-bit mcount code looks like this:
26 * stg %r14,8(%r15) # offset 0 27 * stg %r14,8(%r15) # offset 0
27 * > larl %r1,<&counter> # offset 6 28 * larl %r1,<&counter> # offset 6
28 * > brasl %r14,_mcount # offset 12 29 * brasl %r14,_mcount # offset 12
29 * lg %r14,8(%r15) # offset 18 30 * lg %r14,8(%r15) # offset 18
30 * Total length is 24 bytes. The middle two instructions of the mcount 31 * Total length is 24 bytes. The complete mcount block initially gets replaced
31 * block get overwritten by ftrace_make_nop / ftrace_make_call. 32 * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop
33 * only patch the jg/lg instruction within the block.
34 * Note: we do not patch the first instruction to an unconditional branch,
35 * since that would break kprobes/jprobes. It is easier to leave the larl
36 * instruction in and only modify the second instruction.
32 * The 64-bit enabled ftrace code block looks like this: 37 * The 64-bit enabled ftrace code block looks like this:
33 * stg %r14,8(%r15) # offset 0 38 * larl %r0,.+24 # offset 0
34 * > lg %r1,__LC_FTRACE_FUNC # offset 6 39 * > lg %r1,__LC_FTRACE_FUNC # offset 6
35 * > lgr %r0,%r0 # offset 12 40 * br %r1 # offset 12
36 * > basr %r14,%r1 # offset 16 41 * brcl 0,0 # offset 14
37 * lg %r14,8(%15) # offset 18 42 * brc 0,0 # offset 20
38 * The return points of the mcount/ftrace function have the same offset 18. 43 * The ftrace function gets called with a non-standard C function call ABI
39 * The 64-bit disable ftrace code block looks like this: 44 * where r0 contains the return address. It is also expected that the called
40 * stg %r14,8(%r15) # offset 0 45 * function only clobbers r0 and r1, but restores r2-r15.
46 * The return point of the ftrace function has offset 24, so execution
47 * continues behind the mcount block.
48 * larl %r0,.+24 # offset 0
41 * > jg .+18 # offset 6 49 * > jg .+18 # offset 6
42 * > lgr %r0,%r0 # offset 12 50 * br %r1 # offset 12
43 * > basr %r14,%r1 # offset 16 51 * brcl 0,0 # offset 14
44 * lg %r14,8(%15) # offset 18 52 * brc 0,0 # offset 20
45 * The jg instruction branches to offset 24 to skip as many instructions 53 * The jg instruction branches to offset 24 to skip as many instructions
46 * as possible. 54 * as possible.
47 */ 55 */
48asm( 56asm(
49 " .align 4\n" 57 " .align 4\n"
58 "mcount_replace_code:\n"
59 " larl %r0,0f\n"
50 "ftrace_disable_code:\n" 60 "ftrace_disable_code:\n"
51 " jg 0f\n" 61 " jg 0f\n"
52 " lgr %r0,%r0\n" 62 " br %r1\n"
53 " basr %r14,%r1\n" 63 " brcl 0,0\n"
64 " brc 0,0\n"
54 "0:\n" 65 "0:\n"
55 " .align 4\n" 66 " .align 4\n"
56 "ftrace_enable_insn:\n" 67 "ftrace_enable_insn:\n"
57 " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); 68 " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n");
58 69
70#define MCOUNT_BLOCK_SIZE 24
71#define MCOUNT_INSN_OFFSET 6
59#define FTRACE_INSN_SIZE 6 72#define FTRACE_INSN_SIZE 6
60 73
61#else /* CONFIG_64BIT */ 74#else /* CONFIG_64BIT */
@@ -116,6 +129,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
116int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, 129int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
117 unsigned long addr) 130 unsigned long addr)
118{ 131{
132#ifdef CONFIG_64BIT
133 /* Initial replacement of the whole mcount block */
134 if (addr == MCOUNT_ADDR) {
135 if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET,
136 mcount_replace_code,
137 MCOUNT_BLOCK_SIZE))
138 return -EPERM;
139 return 0;
140 }
141#endif
119 if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, 142 if (probe_kernel_write((void *) rec->ip, ftrace_disable_code,
120 MCOUNT_INSN_SIZE)) 143 MCOUNT_INSN_SIZE))
121 return -EPERM; 144 return -EPERM;
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
index 8cf976f83a10..07abe8d464d4 100644
--- a/arch/s390/kernel/mcount64.S
+++ b/arch/s390/kernel/mcount64.S
@@ -16,7 +16,6 @@ ENTRY(ftrace_stub)
16 br %r14 16 br %r14
17 17
18#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) 18#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
19#define STACK_PARENT_IP (STACK_FRAME_SIZE + 8)
20#define STACK_PTREGS (STACK_FRAME_OVERHEAD) 19#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
21#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) 20#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
22#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) 21#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
@@ -31,40 +30,39 @@ ENTRY(ftrace_caller)
31 aghi %r15,-STACK_FRAME_SIZE 30 aghi %r15,-STACK_FRAME_SIZE
32 stg %r1,__SF_BACKCHAIN(%r15) 31 stg %r1,__SF_BACKCHAIN(%r15)
33 stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) 32 stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
34 stmg %r0,%r13,STACK_PTREGS_GPRS(%r15) 33 stg %r0,(STACK_PTREGS_PSW+8)(%r15)
35 stg %r14,(STACK_PTREGS_PSW+8)(%r15) 34 stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
36#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 35#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
37 aghik %r2,%r14,-MCOUNT_INSN_SIZE 36 aghik %r2,%r0,-MCOUNT_INSN_SIZE
38 lgrl %r4,function_trace_op 37 lgrl %r4,function_trace_op
39 lgrl %r14,ftrace_trace_function 38 lgrl %r1,ftrace_trace_function
40#else 39#else
41 lgr %r2,%r14 40 lgr %r2,%r0
42 aghi %r2,-MCOUNT_INSN_SIZE 41 aghi %r2,-MCOUNT_INSN_SIZE
43 larl %r4,function_trace_op 42 larl %r4,function_trace_op
44 lg %r4,0(%r4) 43 lg %r4,0(%r4)
45 larl %r14,ftrace_trace_function 44 larl %r1,ftrace_trace_function
46 lg %r14,0(%r14) 45 lg %r1,0(%r1)
47#endif 46#endif
48 lg %r3,STACK_PARENT_IP(%r15) 47 lgr %r3,%r14
49 la %r5,STACK_PTREGS(%r15) 48 la %r5,STACK_PTREGS(%r15)
50 basr %r14,%r14 49 basr %r14,%r1
51#ifdef CONFIG_FUNCTION_GRAPH_TRACER 50#ifdef CONFIG_FUNCTION_GRAPH_TRACER
52# The j instruction gets runtime patched to a nop instruction. 51# The j instruction gets runtime patched to a nop instruction.
53# See ftrace_enable_ftrace_graph_caller. The patched instruction is: 52# See ftrace_enable_ftrace_graph_caller. The patched instruction is:
54# j .+4 53# j .+4
55ENTRY(ftrace_graph_caller) 54ENTRY(ftrace_graph_caller)
56 j ftrace_graph_caller_end 55 j ftrace_graph_caller_end
57 lg %r2,STACK_PARENT_IP(%r15) 56 lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
58 lg %r3,(STACK_PTREGS_PSW+8)(%r15) 57 lg %r3,(STACK_PTREGS_PSW+8)(%r15)
59 brasl %r14,prepare_ftrace_return 58 brasl %r14,prepare_ftrace_return
60 stg %r2,STACK_PARENT_IP(%r15) 59 stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
61ftrace_graph_caller_end: 60ftrace_graph_caller_end:
62 .globl ftrace_graph_caller_end 61 .globl ftrace_graph_caller_end
63#endif 62#endif
64 lmg %r0,%r13,STACK_PTREGS_GPRS(%r15) 63 lg %r1,(STACK_PTREGS_PSW+8)(%r15)
65 lg %r14,(STACK_PTREGS_PSW+8)(%r15) 64 lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
66 aghi %r15,STACK_FRAME_SIZE 65 br %r1
67 br %r14
68 66
69#ifdef CONFIG_FUNCTION_GRAPH_TRACER 67#ifdef CONFIG_FUNCTION_GRAPH_TRACER
70 68