diff options
author | Torsten Duwe <duwe@suse.de> | 2016-03-02 23:26:59 -0500 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2016-03-06 22:53:55 -0500 |
commit | 153086644fd1fb07fb3af84d9f11542a19b1e8b6 (patch) | |
tree | 83f3ffb83f6bee07a72562de83db23f1dd0c7a3a | |
parent | 9a7841ae8d6ce9b7a7cf879c9968fcf4c9545563 (diff) |
powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI
The gcc switch -mprofile-kernel defines a new ABI for calling _mcount()
very early in the function with minimal overhead.
Although mprofile-kernel has been available since GCC 3.4, there were
bugs which were only fixed recently. Currently it is known to work in
GCC 4.9, 5 and 6.
Additionally there are two possible code sequences generated by the
flag, the first uses mflr/std/bl and the second is optimised to omit the
std. Currently only gcc 6 has the optimised sequence. This patch
supports both sequences.
Initial work started by Vojtech Pavlik, used with permission.
Key changes:
- rework _mcount() to work for both the old and new ABIs.
- implement new versions of ftrace_caller() and ftrace_graph_caller()
which deal with the new ABI.
- updates to __ftrace_make_nop() to recognise the new mcount calling
sequence.
- updates to __ftrace_make_call() to recognise the nop'ed sequence.
- implement ftrace_modify_call().
- updates to the module loader to surpress the toc save in the module
stub when calling mcount with the new ABI.
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r-- | arch/powerpc/include/asm/code-patching.h | 21 | ||||
-rw-r--r-- | arch/powerpc/include/asm/ftrace.h | 5 | ||||
-rw-r--r-- | arch/powerpc/kernel/entry_64.S | 166 | ||||
-rw-r--r-- | arch/powerpc/kernel/ftrace.c | 103 | ||||
-rw-r--r-- | arch/powerpc/kernel/module_64.c | 49 |
5 files changed, 324 insertions, 20 deletions
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 840a5509b3f1..994c60a857ce 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h | |||
@@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func) | |||
99 | #endif | 99 | #endif |
100 | } | 100 | } |
101 | 101 | ||
102 | #ifdef CONFIG_PPC64 | ||
103 | /* | ||
104 | * Some instruction encodings commonly used in dynamic ftracing | ||
105 | * and function live patching. | ||
106 | */ | ||
107 | |||
108 | /* This must match the definition of STK_GOT in <asm/ppc_asm.h> */ | ||
109 | #if defined(_CALL_ELF) && _CALL_ELF == 2 | ||
110 | #define R2_STACK_OFFSET 24 | ||
111 | #else | ||
112 | #define R2_STACK_OFFSET 40 | ||
113 | #endif | ||
114 | |||
115 | #define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \ | ||
116 | ___PPC_RA(__REG_R1) | R2_STACK_OFFSET) | ||
117 | |||
118 | /* usually preceded by a mflr r0 */ | ||
119 | #define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \ | ||
120 | ___PPC_RA(__REG_R1) | PPC_LR_STKOFF) | ||
121 | #endif /* CONFIG_PPC64 */ | ||
122 | |||
102 | #endif /* _ASM_POWERPC_CODE_PATCHING_H */ | 123 | #endif /* _ASM_POWERPC_CODE_PATCHING_H */ |
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ef89b1465573..50ca7585abe2 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h | |||
@@ -46,6 +46,8 @@ | |||
46 | extern void _mcount(void); | 46 | extern void _mcount(void); |
47 | 47 | ||
48 | #ifdef CONFIG_DYNAMIC_FTRACE | 48 | #ifdef CONFIG_DYNAMIC_FTRACE |
49 | # define FTRACE_ADDR ((unsigned long)ftrace_caller) | ||
50 | # define FTRACE_REGS_ADDR FTRACE_ADDR | ||
49 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | 51 | static inline unsigned long ftrace_call_adjust(unsigned long addr) |
50 | { | 52 | { |
51 | /* reloction of mcount call site is the same as the address */ | 53 | /* reloction of mcount call site is the same as the address */ |
@@ -58,6 +60,9 @@ struct dyn_arch_ftrace { | |||
58 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 60 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
59 | #endif /* __ASSEMBLY__ */ | 61 | #endif /* __ASSEMBLY__ */ |
60 | 62 | ||
63 | #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS | ||
64 | #define ARCH_SUPPORTS_FTRACE_OPS 1 | ||
65 | #endif | ||
61 | #endif | 66 | #endif |
62 | 67 | ||
63 | #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) | 68 | #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) |
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0d525ce3717f..ec7f8aada697 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -1143,8 +1143,12 @@ _GLOBAL(enter_prom) | |||
1143 | #ifdef CONFIG_DYNAMIC_FTRACE | 1143 | #ifdef CONFIG_DYNAMIC_FTRACE |
1144 | _GLOBAL(mcount) | 1144 | _GLOBAL(mcount) |
1145 | _GLOBAL(_mcount) | 1145 | _GLOBAL(_mcount) |
1146 | blr | 1146 | mflr r12 |
1147 | mtctr r12 | ||
1148 | mtlr r0 | ||
1149 | bctr | ||
1147 | 1150 | ||
1151 | #ifndef CC_USING_MPROFILE_KERNEL | ||
1148 | _GLOBAL_TOC(ftrace_caller) | 1152 | _GLOBAL_TOC(ftrace_caller) |
1149 | /* Taken from output of objdump from lib64/glibc */ | 1153 | /* Taken from output of objdump from lib64/glibc */ |
1150 | mflr r3 | 1154 | mflr r3 |
@@ -1166,6 +1170,115 @@ _GLOBAL(ftrace_graph_stub) | |||
1166 | ld r0, 128(r1) | 1170 | ld r0, 128(r1) |
1167 | mtlr r0 | 1171 | mtlr r0 |
1168 | addi r1, r1, 112 | 1172 | addi r1, r1, 112 |
1173 | |||
1174 | #else /* CC_USING_MPROFILE_KERNEL */ | ||
1175 | /* | ||
1176 | * | ||
1177 | * ftrace_caller() is the function that replaces _mcount() when ftrace is | ||
1178 | * active. | ||
1179 | * | ||
1180 | * We arrive here after a function A calls function B, and we are the trace | ||
1181 | * function for B. When we enter r1 points to A's stack frame, B has not yet | ||
1182 | * had a chance to allocate one yet. | ||
1183 | * | ||
1184 | * Additionally r2 may point either to the TOC for A, or B, depending on | ||
1185 | * whether B did a TOC setup sequence before calling us. | ||
1186 | * | ||
1187 | * On entry the LR points back to the _mcount() call site, and r0 holds the | ||
1188 | * saved LR as it was on entry to B, ie. the original return address at the | ||
1189 | * call site in A. | ||
1190 | * | ||
1191 | * Our job is to save the register state into a struct pt_regs (on the stack) | ||
1192 | * and then arrange for the ftrace function to be called. | ||
1193 | */ | ||
1194 | _GLOBAL(ftrace_caller) | ||
1195 | /* Save the original return address in A's stack frame */ | ||
1196 | std r0,LRSAVE(r1) | ||
1197 | |||
1198 | /* Create our stack frame + pt_regs */ | ||
1199 | stdu r1,-SWITCH_FRAME_SIZE(r1) | ||
1200 | |||
1201 | /* Save all gprs to pt_regs */ | ||
1202 | SAVE_8GPRS(0,r1) | ||
1203 | SAVE_8GPRS(8,r1) | ||
1204 | SAVE_8GPRS(16,r1) | ||
1205 | SAVE_8GPRS(24,r1) | ||
1206 | |||
1207 | /* Load special regs for save below */ | ||
1208 | mfmsr r8 | ||
1209 | mfctr r9 | ||
1210 | mfxer r10 | ||
1211 | mfcr r11 | ||
1212 | |||
1213 | /* Get the _mcount() call site out of LR */ | ||
1214 | mflr r7 | ||
1215 | /* Save it as pt_regs->nip & pt_regs->link */ | ||
1216 | std r7, _NIP(r1) | ||
1217 | std r7, _LINK(r1) | ||
1218 | |||
1219 | /* Save callee's TOC in the ABI compliant location */ | ||
1220 | std r2, 24(r1) | ||
1221 | ld r2,PACATOC(r13) /* get kernel TOC in r2 */ | ||
1222 | |||
1223 | addis r3,r2,function_trace_op@toc@ha | ||
1224 | addi r3,r3,function_trace_op@toc@l | ||
1225 | ld r5,0(r3) | ||
1226 | |||
1227 | /* Calculate ip from nip-4 into r3 for call below */ | ||
1228 | subi r3, r7, MCOUNT_INSN_SIZE | ||
1229 | |||
1230 | /* Put the original return address in r4 as parent_ip */ | ||
1231 | mr r4, r0 | ||
1232 | |||
1233 | /* Save special regs */ | ||
1234 | std r8, _MSR(r1) | ||
1235 | std r9, _CTR(r1) | ||
1236 | std r10, _XER(r1) | ||
1237 | std r11, _CCR(r1) | ||
1238 | |||
1239 | /* Load &pt_regs in r6 for call below */ | ||
1240 | addi r6, r1 ,STACK_FRAME_OVERHEAD | ||
1241 | |||
1242 | /* ftrace_call(r3, r4, r5, r6) */ | ||
1243 | .globl ftrace_call | ||
1244 | ftrace_call: | ||
1245 | bl ftrace_stub | ||
1246 | nop | ||
1247 | |||
1248 | /* Load ctr with the possibly modified NIP */ | ||
1249 | ld r3, _NIP(r1) | ||
1250 | mtctr r3 | ||
1251 | |||
1252 | /* Restore gprs */ | ||
1253 | REST_8GPRS(0,r1) | ||
1254 | REST_8GPRS(8,r1) | ||
1255 | REST_8GPRS(16,r1) | ||
1256 | REST_8GPRS(24,r1) | ||
1257 | |||
1258 | /* Restore callee's TOC */ | ||
1259 | ld r2, 24(r1) | ||
1260 | |||
1261 | /* Pop our stack frame */ | ||
1262 | addi r1, r1, SWITCH_FRAME_SIZE | ||
1263 | |||
1264 | /* Restore original LR for return to B */ | ||
1265 | ld r0, LRSAVE(r1) | ||
1266 | mtlr r0 | ||
1267 | |||
1268 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1269 | stdu r1, -112(r1) | ||
1270 | .globl ftrace_graph_call | ||
1271 | ftrace_graph_call: | ||
1272 | b ftrace_graph_stub | ||
1273 | _GLOBAL(ftrace_graph_stub) | ||
1274 | addi r1, r1, 112 | ||
1275 | #endif | ||
1276 | |||
1277 | ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */ | ||
1278 | mtlr r0 | ||
1279 | bctr /* jump after _mcount site */ | ||
1280 | #endif /* CC_USING_MPROFILE_KERNEL */ | ||
1281 | |||
1169 | _GLOBAL(ftrace_stub) | 1282 | _GLOBAL(ftrace_stub) |
1170 | blr | 1283 | blr |
1171 | #else | 1284 | #else |
@@ -1198,6 +1311,7 @@ _GLOBAL(ftrace_stub) | |||
1198 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 1311 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
1199 | 1312 | ||
1200 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1313 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1314 | #ifndef CC_USING_MPROFILE_KERNEL | ||
1201 | _GLOBAL(ftrace_graph_caller) | 1315 | _GLOBAL(ftrace_graph_caller) |
1202 | /* load r4 with local address */ | 1316 | /* load r4 with local address */ |
1203 | ld r4, 128(r1) | 1317 | ld r4, 128(r1) |
@@ -1222,6 +1336,56 @@ _GLOBAL(ftrace_graph_caller) | |||
1222 | addi r1, r1, 112 | 1336 | addi r1, r1, 112 |
1223 | blr | 1337 | blr |
1224 | 1338 | ||
1339 | #else /* CC_USING_MPROFILE_KERNEL */ | ||
1340 | _GLOBAL(ftrace_graph_caller) | ||
1341 | /* with -mprofile-kernel, parameter regs are still alive at _mcount */ | ||
1342 | std r10, 104(r1) | ||
1343 | std r9, 96(r1) | ||
1344 | std r8, 88(r1) | ||
1345 | std r7, 80(r1) | ||
1346 | std r6, 72(r1) | ||
1347 | std r5, 64(r1) | ||
1348 | std r4, 56(r1) | ||
1349 | std r3, 48(r1) | ||
1350 | |||
1351 | /* Save callee's TOC in the ABI compliant location */ | ||
1352 | std r2, 24(r1) | ||
1353 | ld r2, PACATOC(r13) /* get kernel TOC in r2 */ | ||
1354 | |||
1355 | mfctr r4 /* ftrace_caller has moved local addr here */ | ||
1356 | std r4, 40(r1) | ||
1357 | mflr r3 /* ftrace_caller has restored LR from stack */ | ||
1358 | subi r4, r4, MCOUNT_INSN_SIZE | ||
1359 | |||
1360 | bl prepare_ftrace_return | ||
1361 | nop | ||
1362 | |||
1363 | /* | ||
1364 | * prepare_ftrace_return gives us the address we divert to. | ||
1365 | * Change the LR to this. | ||
1366 | */ | ||
1367 | mtlr r3 | ||
1368 | |||
1369 | ld r0, 40(r1) | ||
1370 | mtctr r0 | ||
1371 | ld r10, 104(r1) | ||
1372 | ld r9, 96(r1) | ||
1373 | ld r8, 88(r1) | ||
1374 | ld r7, 80(r1) | ||
1375 | ld r6, 72(r1) | ||
1376 | ld r5, 64(r1) | ||
1377 | ld r4, 56(r1) | ||
1378 | ld r3, 48(r1) | ||
1379 | |||
1380 | /* Restore callee's TOC */ | ||
1381 | ld r2, 24(r1) | ||
1382 | |||
1383 | addi r1, r1, 112 | ||
1384 | mflr r0 | ||
1385 | std r0, LRSAVE(r1) | ||
1386 | bctr | ||
1387 | #endif /* CC_USING_MPROFILE_KERNEL */ | ||
1388 | |||
1225 | _GLOBAL(return_to_handler) | 1389 | _GLOBAL(return_to_handler) |
1226 | /* need to save return values */ | 1390 | /* need to save return values */ |
1227 | std r4, -32(r1) | 1391 | std r4, -32(r1) |
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 62899fbae703..9dac18dabd03 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c | |||
@@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) | |||
61 | return -EFAULT; | 61 | return -EFAULT; |
62 | 62 | ||
63 | /* Make sure it is what we expect it to be */ | 63 | /* Make sure it is what we expect it to be */ |
64 | if (replaced != old) | 64 | if (replaced != old) { |
65 | pr_err("%p: replaced (%#x) != old (%#x)", | ||
66 | (void *)ip, replaced, old); | ||
65 | return -EINVAL; | 67 | return -EINVAL; |
68 | } | ||
66 | 69 | ||
67 | /* replace the text with the new text */ | 70 | /* replace the text with the new text */ |
68 | if (patch_instruction((unsigned int *)ip, new)) | 71 | if (patch_instruction((unsigned int *)ip, new)) |
@@ -108,11 +111,13 @@ __ftrace_make_nop(struct module *mod, | |||
108 | { | 111 | { |
109 | unsigned long entry, ptr, tramp; | 112 | unsigned long entry, ptr, tramp; |
110 | unsigned long ip = rec->ip; | 113 | unsigned long ip = rec->ip; |
111 | unsigned int op; | 114 | unsigned int op, pop; |
112 | 115 | ||
113 | /* read where this goes */ | 116 | /* read where this goes */ |
114 | if (probe_kernel_read(&op, (void *)ip, sizeof(int))) | 117 | if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { |
118 | pr_err("Fetching opcode failed.\n"); | ||
115 | return -EFAULT; | 119 | return -EFAULT; |
120 | } | ||
116 | 121 | ||
117 | /* Make sure that that this is still a 24bit jump */ | 122 | /* Make sure that that this is still a 24bit jump */ |
118 | if (!is_bl_op(op)) { | 123 | if (!is_bl_op(op)) { |
@@ -152,10 +157,42 @@ __ftrace_make_nop(struct module *mod, | |||
152 | * | 157 | * |
153 | * Use a b +8 to jump over the load. | 158 | * Use a b +8 to jump over the load. |
154 | */ | 159 | */ |
155 | op = 0x48000008; /* b +8 */ | ||
156 | 160 | ||
157 | if (patch_instruction((unsigned int *)ip, op)) | 161 | pop = PPC_INST_BRANCH | 8; /* b +8 */ |
162 | |||
163 | /* | ||
164 | * Check what is in the next instruction. We can see ld r2,40(r1), but | ||
165 | * on first pass after boot we will see mflr r0. | ||
166 | */ | ||
167 | if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) { | ||
168 | pr_err("Fetching op failed.\n"); | ||
169 | return -EFAULT; | ||
170 | } | ||
171 | |||
172 | if (op != PPC_INST_LD_TOC) { | ||
173 | unsigned int inst; | ||
174 | |||
175 | if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) { | ||
176 | pr_err("Fetching instruction at %lx failed.\n", ip - 4); | ||
177 | return -EFAULT; | ||
178 | } | ||
179 | |||
180 | /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */ | ||
181 | if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) { | ||
182 | pr_err("Unexpected instructions around bl _mcount\n" | ||
183 | "when enabling dynamic ftrace!\t" | ||
184 | "(%08x,bl,%08x)\n", inst, op); | ||
185 | return -EINVAL; | ||
186 | } | ||
187 | |||
188 | /* When using -mkernel_profile there is no load to jump over */ | ||
189 | pop = PPC_INST_NOP; | ||
190 | } | ||
191 | |||
192 | if (patch_instruction((unsigned int *)ip, pop)) { | ||
193 | pr_err("Patching NOP failed.\n"); | ||
158 | return -EPERM; | 194 | return -EPERM; |
195 | } | ||
159 | 196 | ||
160 | return 0; | 197 | return 0; |
161 | } | 198 | } |
@@ -281,16 +318,15 @@ int ftrace_make_nop(struct module *mod, | |||
281 | 318 | ||
282 | #ifdef CONFIG_MODULES | 319 | #ifdef CONFIG_MODULES |
283 | #ifdef CONFIG_PPC64 | 320 | #ifdef CONFIG_PPC64 |
321 | /* | ||
322 | * Examine the existing instructions for __ftrace_make_call. | ||
323 | * They should effectively be a NOP, and follow formal constraints, | ||
324 | * depending on the ABI. Return false if they don't. | ||
325 | */ | ||
326 | #ifndef CC_USING_MPROFILE_KERNEL | ||
284 | static int | 327 | static int |
285 | __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | 328 | expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) |
286 | { | 329 | { |
287 | unsigned int op[2]; | ||
288 | void *ip = (void *)rec->ip; | ||
289 | |||
290 | /* read where this goes */ | ||
291 | if (probe_kernel_read(op, ip, sizeof(op))) | ||
292 | return -EFAULT; | ||
293 | |||
294 | /* | 330 | /* |
295 | * We expect to see: | 331 | * We expect to see: |
296 | * | 332 | * |
@@ -300,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
300 | * The load offset is different depending on the ABI. For simplicity | 336 | * The load offset is different depending on the ABI. For simplicity |
301 | * just mask it out when doing the compare. | 337 | * just mask it out when doing the compare. |
302 | */ | 338 | */ |
303 | if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) { | 339 | if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000)) |
304 | pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]); | 340 | return 0; |
341 | return 1; | ||
342 | } | ||
343 | #else | ||
344 | static int | ||
345 | expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) | ||
346 | { | ||
347 | /* look for patched "NOP" on ppc64 with -mprofile-kernel */ | ||
348 | if (op0 != PPC_INST_NOP) | ||
349 | return 0; | ||
350 | return 1; | ||
351 | } | ||
352 | #endif | ||
353 | |||
354 | static int | ||
355 | __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | ||
356 | { | ||
357 | unsigned int op[2]; | ||
358 | void *ip = (void *)rec->ip; | ||
359 | |||
360 | /* read where this goes */ | ||
361 | if (probe_kernel_read(op, ip, sizeof(op))) | ||
362 | return -EFAULT; | ||
363 | |||
364 | if (!expected_nop_sequence(ip, op[0], op[1])) { | ||
365 | pr_err("Unexpected call sequence at %p: %x %x\n", | ||
366 | ip, op[0], op[1]); | ||
305 | return -EINVAL; | 367 | return -EINVAL; |
306 | } | 368 | } |
307 | 369 | ||
@@ -324,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
324 | 386 | ||
325 | return 0; | 387 | return 0; |
326 | } | 388 | } |
327 | #else | 389 | |
390 | #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS | ||
391 | int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, | ||
392 | unsigned long addr) | ||
393 | { | ||
394 | return ftrace_make_call(rec, addr); | ||
395 | } | ||
396 | #endif | ||
397 | |||
398 | #else /* !CONFIG_PPC64: */ | ||
328 | static int | 399 | static int |
329 | __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | 400 | __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) |
330 | { | 401 | { |
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 76c0963572f5..848b47499a27 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c | |||
@@ -42,7 +42,6 @@ | |||
42 | --RR. */ | 42 | --RR. */ |
43 | 43 | ||
44 | #if defined(_CALL_ELF) && _CALL_ELF == 2 | 44 | #if defined(_CALL_ELF) && _CALL_ELF == 2 |
45 | #define R2_STACK_OFFSET 24 | ||
46 | 45 | ||
47 | /* An address is simply the address of the function. */ | 46 | /* An address is simply the address of the function. */ |
48 | typedef unsigned long func_desc_t; | 47 | typedef unsigned long func_desc_t; |
@@ -74,7 +73,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) | |||
74 | return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); | 73 | return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); |
75 | } | 74 | } |
76 | #else | 75 | #else |
77 | #define R2_STACK_OFFSET 40 | ||
78 | 76 | ||
79 | /* An address is address of the OPD entry, which contains address of fn. */ | 77 | /* An address is address of the OPD entry, which contains address of fn. */ |
80 | typedef struct ppc64_opd_entry func_desc_t; | 78 | typedef struct ppc64_opd_entry func_desc_t; |
@@ -451,17 +449,60 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, | |||
451 | return (unsigned long)&stubs[i]; | 449 | return (unsigned long)&stubs[i]; |
452 | } | 450 | } |
453 | 451 | ||
452 | #ifdef CC_USING_MPROFILE_KERNEL | ||
453 | static bool is_early_mcount_callsite(u32 *instruction) | ||
454 | { | ||
455 | /* | ||
456 | * Check if this is one of the -mprofile-kernel sequences. | ||
457 | */ | ||
458 | if (instruction[-1] == PPC_INST_STD_LR && | ||
459 | instruction[-2] == PPC_INST_MFLR) | ||
460 | return true; | ||
461 | |||
462 | if (instruction[-1] == PPC_INST_MFLR) | ||
463 | return true; | ||
464 | |||
465 | return false; | ||
466 | } | ||
467 | |||
468 | /* | ||
469 | * In case of _mcount calls, do not save the current callee's TOC (in r2) into | ||
470 | * the original caller's stack frame. If we did we would clobber the saved TOC | ||
471 | * value of the original caller. | ||
472 | */ | ||
473 | static void squash_toc_save_inst(const char *name, unsigned long addr) | ||
474 | { | ||
475 | struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr; | ||
476 | |||
477 | /* Only for calls to _mcount */ | ||
478 | if (strcmp("_mcount", name) != 0) | ||
479 | return; | ||
480 | |||
481 | stub->jump[2] = PPC_INST_NOP; | ||
482 | } | ||
483 | #else | ||
484 | static void squash_toc_save_inst(const char *name, unsigned long addr) { } | ||
485 | |||
486 | /* without -mprofile-kernel, mcount calls are never early */ | ||
487 | static bool is_early_mcount_callsite(u32 *instruction) | ||
488 | { | ||
489 | return false; | ||
490 | } | ||
491 | #endif | ||
492 | |||
454 | /* We expect a noop next: if it is, replace it with instruction to | 493 | /* We expect a noop next: if it is, replace it with instruction to |
455 | restore r2. */ | 494 | restore r2. */ |
456 | static int restore_r2(u32 *instruction, struct module *me) | 495 | static int restore_r2(u32 *instruction, struct module *me) |
457 | { | 496 | { |
458 | if (*instruction != PPC_INST_NOP) { | 497 | if (*instruction != PPC_INST_NOP) { |
498 | if (is_early_mcount_callsite(instruction - 1)) | ||
499 | return 1; | ||
459 | pr_err("%s: Expect noop after relocate, got %08x\n", | 500 | pr_err("%s: Expect noop after relocate, got %08x\n", |
460 | me->name, *instruction); | 501 | me->name, *instruction); |
461 | return 0; | 502 | return 0; |
462 | } | 503 | } |
463 | /* ld r2,R2_STACK_OFFSET(r1) */ | 504 | /* ld r2,R2_STACK_OFFSET(r1) */ |
464 | *instruction = 0xe8410000 | R2_STACK_OFFSET; | 505 | *instruction = PPC_INST_LD_TOC; |
465 | return 1; | 506 | return 1; |
466 | } | 507 | } |
467 | 508 | ||
@@ -586,6 +627,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
586 | return -ENOENT; | 627 | return -ENOENT; |
587 | if (!restore_r2((u32 *)location + 1, me)) | 628 | if (!restore_r2((u32 *)location + 1, me)) |
588 | return -ENOEXEC; | 629 | return -ENOEXEC; |
630 | |||
631 | squash_toc_save_inst(strtab + sym->st_name, value); | ||
589 | } else | 632 | } else |
590 | value += local_entry_offset(sym); | 633 | value += local_entry_offset(sym); |
591 | 634 | ||