diff options
Diffstat (limited to 'arch/x86/kernel/kprobes.c')
-rw-r--r-- | arch/x86/kernel/kprobes.c | 664 |
1 files changed, 76 insertions, 588 deletions
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d8b64..e213fc8408d 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -30,16 +30,15 @@ | |||
30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | 30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi |
31 | * <prasanna@in.ibm.com> added function-return probes. | 31 | * <prasanna@in.ibm.com> added function-return probes. |
32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> | 32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> |
33 | * Added function return probes functionality | 33 | * Added function return probes functionality |
34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added | 34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added |
35 | * kprobe-booster and kretprobe-booster for i386. | 35 | * kprobe-booster and kretprobe-booster for i386. |
36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster | 36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster |
37 | * and kretprobe-booster for x86-64 | 37 | * and kretprobe-booster for x86-64 |
38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven | 38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven |
39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> | 39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> |
40 | * unified x86 kprobes code. | 40 | * unified x86 kprobes code. |
41 | */ | 41 | */ |
42 | |||
43 | #include <linux/kprobes.h> | 42 | #include <linux/kprobes.h> |
44 | #include <linux/ptrace.h> | 43 | #include <linux/ptrace.h> |
45 | #include <linux/string.h> | 44 | #include <linux/string.h> |
@@ -59,6 +58,8 @@ | |||
59 | #include <asm/insn.h> | 58 | #include <asm/insn.h> |
60 | #include <asm/debugreg.h> | 59 | #include <asm/debugreg.h> |
61 | 60 | ||
61 | #include "kprobes-common.h" | ||
62 | |||
62 | void jprobe_return_end(void); | 63 | void jprobe_return_end(void); |
63 | 64 | ||
64 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 65 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { | |||
108 | doesn't switch kernel stack.*/ | 109 | doesn't switch kernel stack.*/ |
109 | {NULL, NULL} /* Terminator */ | 110 | {NULL, NULL} /* Terminator */ |
110 | }; | 111 | }; |
112 | |||
111 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 113 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); |
112 | 114 | ||
113 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | 115 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) |
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | |||
123 | } | 125 | } |
124 | 126 | ||
125 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 127 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ |
126 | static void __kprobes synthesize_reljump(void *from, void *to) | 128 | void __kprobes synthesize_reljump(void *from, void *to) |
127 | { | 129 | { |
128 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); | 130 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); |
129 | } | 131 | } |
130 | 132 | ||
133 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
134 | void __kprobes synthesize_relcall(void *from, void *to) | ||
135 | { | ||
136 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
137 | } | ||
138 | |||
131 | /* | 139 | /* |
132 | * Skip the prefixes of the instruction. | 140 | * Skip the prefixes of the instruction. |
133 | */ | 141 | */ |
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) | |||
151 | * Returns non-zero if opcode is boostable. | 159 | * Returns non-zero if opcode is boostable. |
152 | * RIP relative instructions are adjusted at copying time in 64 bits mode | 160 | * RIP relative instructions are adjusted at copying time in 64 bits mode |
153 | */ | 161 | */ |
154 | static int __kprobes can_boost(kprobe_opcode_t *opcodes) | 162 | int __kprobes can_boost(kprobe_opcode_t *opcodes) |
155 | { | 163 | { |
156 | kprobe_opcode_t opcode; | 164 | kprobe_opcode_t opcode; |
157 | kprobe_opcode_t *orig_opcodes = opcodes; | 165 | kprobe_opcode_t *orig_opcodes = opcodes; |
@@ -207,13 +215,15 @@ retry: | |||
207 | } | 215 | } |
208 | } | 216 | } |
209 | 217 | ||
210 | /* Recover the probed instruction at addr for further analysis. */ | 218 | static unsigned long |
211 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | 219 | __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) |
212 | { | 220 | { |
213 | struct kprobe *kp; | 221 | struct kprobe *kp; |
222 | |||
214 | kp = get_kprobe((void *)addr); | 223 | kp = get_kprobe((void *)addr); |
224 | /* There is no probe, return original address */ | ||
215 | if (!kp) | 225 | if (!kp) |
216 | return -EINVAL; | 226 | return addr; |
217 | 227 | ||
218 | /* | 228 | /* |
219 | * Basically, kp->ainsn.insn has an original instruction. | 229 | * Basically, kp->ainsn.insn has an original instruction. |
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
230 | */ | 240 | */ |
231 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 241 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); |
232 | buf[0] = kp->opcode; | 242 | buf[0] = kp->opcode; |
233 | return 0; | 243 | return (unsigned long)buf; |
244 | } | ||
245 | |||
246 | /* | ||
247 | * Recover the probed instruction at addr for further analysis. | ||
248 | * Caller must lock kprobes by kprobe_mutex, or disable preemption | ||
249 | * for preventing to release referencing kprobes. | ||
250 | */ | ||
251 | unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
252 | { | ||
253 | unsigned long __addr; | ||
254 | |||
255 | __addr = __recover_optprobed_insn(buf, addr); | ||
256 | if (__addr != addr) | ||
257 | return __addr; | ||
258 | |||
259 | return __recover_probed_insn(buf, addr); | ||
234 | } | 260 | } |
235 | 261 | ||
236 | /* Check if paddr is at an instruction boundary */ | 262 | /* Check if paddr is at an instruction boundary */ |
237 | static int __kprobes can_probe(unsigned long paddr) | 263 | static int __kprobes can_probe(unsigned long paddr) |
238 | { | 264 | { |
239 | int ret; | 265 | unsigned long addr, __addr, offset = 0; |
240 | unsigned long addr, offset = 0; | ||
241 | struct insn insn; | 266 | struct insn insn; |
242 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 267 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
243 | 268 | ||
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr) | |||
247 | /* Decode instructions */ | 272 | /* Decode instructions */ |
248 | addr = paddr - offset; | 273 | addr = paddr - offset; |
249 | while (addr < paddr) { | 274 | while (addr < paddr) { |
250 | kernel_insn_init(&insn, (void *)addr); | ||
251 | insn_get_opcode(&insn); | ||
252 | |||
253 | /* | 275 | /* |
254 | * Check if the instruction has been modified by another | 276 | * Check if the instruction has been modified by another |
255 | * kprobe, in which case we replace the breakpoint by the | 277 | * kprobe, in which case we replace the breakpoint by the |
256 | * original instruction in our buffer. | 278 | * original instruction in our buffer. |
279 | * Also, jump optimization will change the breakpoint to | ||
280 | * relative-jump. Since the relative-jump itself is | ||
281 | * normally used, we just go through if there is no kprobe. | ||
257 | */ | 282 | */ |
258 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | 283 | __addr = recover_probed_instruction(buf, addr); |
259 | ret = recover_probed_instruction(buf, addr); | 284 | kernel_insn_init(&insn, (void *)__addr); |
260 | if (ret) | ||
261 | /* | ||
262 | * Another debugging subsystem might insert | ||
263 | * this breakpoint. In that case, we can't | ||
264 | * recover it. | ||
265 | */ | ||
266 | return 0; | ||
267 | kernel_insn_init(&insn, buf); | ||
268 | } | ||
269 | insn_get_length(&insn); | 285 | insn_get_length(&insn); |
286 | |||
287 | /* | ||
288 | * Another debugging subsystem might insert this breakpoint. | ||
289 | * In that case, we can't recover it. | ||
290 | */ | ||
291 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
292 | return 0; | ||
270 | addr += insn.length; | 293 | addr += insn.length; |
271 | } | 294 | } |
272 | 295 | ||
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
299 | * If not, return null. | 322 | * If not, return null. |
300 | * Only applicable to 64-bit x86. | 323 | * Only applicable to 64-bit x86. |
301 | */ | 324 | */ |
302 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | 325 | int __kprobes __copy_instruction(u8 *dest, u8 *src) |
303 | { | 326 | { |
304 | struct insn insn; | 327 | struct insn insn; |
305 | int ret; | ||
306 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 328 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
307 | 329 | ||
308 | kernel_insn_init(&insn, src); | 330 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); |
309 | if (recover) { | ||
310 | insn_get_opcode(&insn); | ||
311 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
312 | ret = recover_probed_instruction(buf, | ||
313 | (unsigned long)src); | ||
314 | if (ret) | ||
315 | return 0; | ||
316 | kernel_insn_init(&insn, buf); | ||
317 | } | ||
318 | } | ||
319 | insn_get_length(&insn); | 331 | insn_get_length(&insn); |
332 | /* Another subsystem puts a breakpoint, failed to recover */ | ||
333 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
334 | return 0; | ||
320 | memcpy(dest, insn.kaddr, insn.length); | 335 | memcpy(dest, insn.kaddr, insn.length); |
321 | 336 | ||
322 | #ifdef CONFIG_X86_64 | 337 | #ifdef CONFIG_X86_64 |
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
337 | * extension of the original signed 32-bit displacement would | 352 | * extension of the original signed 32-bit displacement would |
338 | * have given. | 353 | * have given. |
339 | */ | 354 | */ |
340 | newdisp = (u8 *) src + (s64) insn.displacement.value - | 355 | newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; |
341 | (u8 *) dest; | ||
342 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ | 356 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
343 | disp = (u8 *) dest + insn_offset_displacement(&insn); | 357 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
344 | *(s32 *) disp = (s32) newdisp; | 358 | *(s32 *) disp = (s32) newdisp; |
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
349 | 363 | ||
350 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 364 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
351 | { | 365 | { |
366 | /* Copy an instruction with recovering if other optprobe modifies it.*/ | ||
367 | __copy_instruction(p->ainsn.insn, p->addr); | ||
368 | |||
352 | /* | 369 | /* |
353 | * Copy an instruction without recovering int3, because it will be | 370 | * __copy_instruction can modify the displacement of the instruction, |
354 | * put by another subsystem. | 371 | * but it doesn't affect boostable check. |
355 | */ | 372 | */ |
356 | __copy_instruction(p->ainsn.insn, p->addr, 0); | 373 | if (can_boost(p->ainsn.insn)) |
357 | |||
358 | if (can_boost(p->addr)) | ||
359 | p->ainsn.boostable = 0; | 374 | p->ainsn.boostable = 0; |
360 | else | 375 | else |
361 | p->ainsn.boostable = -1; | 376 | p->ainsn.boostable = -1; |
362 | 377 | ||
363 | p->opcode = *p->addr; | 378 | /* Also, displacement change doesn't affect the first byte */ |
379 | p->opcode = p->ainsn.insn[0]; | ||
364 | } | 380 | } |
365 | 381 | ||
366 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 382 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void) | |||
442 | } | 458 | } |
443 | } | 459 | } |
444 | 460 | ||
445 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 461 | void __kprobes |
446 | struct pt_regs *regs) | 462 | arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) |
447 | { | 463 | { |
448 | unsigned long *sara = stack_addr(regs); | 464 | unsigned long *sara = stack_addr(regs); |
449 | 465 | ||
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
453 | *sara = (unsigned long) &kretprobe_trampoline; | 469 | *sara = (unsigned long) &kretprobe_trampoline; |
454 | } | 470 | } |
455 | 471 | ||
456 | #ifdef CONFIG_OPTPROBES | 472 | static void __kprobes |
457 | static int __kprobes setup_detour_execution(struct kprobe *p, | 473 | setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) |
458 | struct pt_regs *regs, | ||
459 | int reenter); | ||
460 | #else | ||
461 | #define setup_detour_execution(p, regs, reenter) (0) | ||
462 | #endif | ||
463 | |||
464 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
465 | struct kprobe_ctlblk *kcb, int reenter) | ||
466 | { | 474 | { |
467 | if (setup_detour_execution(p, regs, reenter)) | 475 | if (setup_detour_execution(p, regs, reenter)) |
468 | return; | 476 | return; |
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | |||
504 | * within the handler. We save the original kprobes variables and just single | 512 | * within the handler. We save the original kprobes variables and just single |
505 | * step on the instruction of the new probe without calling any user handlers. | 513 | * step on the instruction of the new probe without calling any user handlers. |
506 | */ | 514 | */ |
507 | static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | 515 | static int __kprobes |
508 | struct kprobe_ctlblk *kcb) | 516 | reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) |
509 | { | 517 | { |
510 | switch (kcb->kprobe_status) { | 518 | switch (kcb->kprobe_status) { |
511 | case KPROBE_HIT_SSDONE: | 519 | case KPROBE_HIT_SSDONE: |
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
600 | return 0; | 608 | return 0; |
601 | } | 609 | } |
602 | 610 | ||
603 | #ifdef CONFIG_X86_64 | ||
604 | #define SAVE_REGS_STRING \ | ||
605 | /* Skip cs, ip, orig_ax. */ \ | ||
606 | " subq $24, %rsp\n" \ | ||
607 | " pushq %rdi\n" \ | ||
608 | " pushq %rsi\n" \ | ||
609 | " pushq %rdx\n" \ | ||
610 | " pushq %rcx\n" \ | ||
611 | " pushq %rax\n" \ | ||
612 | " pushq %r8\n" \ | ||
613 | " pushq %r9\n" \ | ||
614 | " pushq %r10\n" \ | ||
615 | " pushq %r11\n" \ | ||
616 | " pushq %rbx\n" \ | ||
617 | " pushq %rbp\n" \ | ||
618 | " pushq %r12\n" \ | ||
619 | " pushq %r13\n" \ | ||
620 | " pushq %r14\n" \ | ||
621 | " pushq %r15\n" | ||
622 | #define RESTORE_REGS_STRING \ | ||
623 | " popq %r15\n" \ | ||
624 | " popq %r14\n" \ | ||
625 | " popq %r13\n" \ | ||
626 | " popq %r12\n" \ | ||
627 | " popq %rbp\n" \ | ||
628 | " popq %rbx\n" \ | ||
629 | " popq %r11\n" \ | ||
630 | " popq %r10\n" \ | ||
631 | " popq %r9\n" \ | ||
632 | " popq %r8\n" \ | ||
633 | " popq %rax\n" \ | ||
634 | " popq %rcx\n" \ | ||
635 | " popq %rdx\n" \ | ||
636 | " popq %rsi\n" \ | ||
637 | " popq %rdi\n" \ | ||
638 | /* Skip orig_ax, ip, cs */ \ | ||
639 | " addq $24, %rsp\n" | ||
640 | #else | ||
641 | #define SAVE_REGS_STRING \ | ||
642 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
643 | " subl $16, %esp\n" \ | ||
644 | " pushl %fs\n" \ | ||
645 | " pushl %es\n" \ | ||
646 | " pushl %ds\n" \ | ||
647 | " pushl %eax\n" \ | ||
648 | " pushl %ebp\n" \ | ||
649 | " pushl %edi\n" \ | ||
650 | " pushl %esi\n" \ | ||
651 | " pushl %edx\n" \ | ||
652 | " pushl %ecx\n" \ | ||
653 | " pushl %ebx\n" | ||
654 | #define RESTORE_REGS_STRING \ | ||
655 | " popl %ebx\n" \ | ||
656 | " popl %ecx\n" \ | ||
657 | " popl %edx\n" \ | ||
658 | " popl %esi\n" \ | ||
659 | " popl %edi\n" \ | ||
660 | " popl %ebp\n" \ | ||
661 | " popl %eax\n" \ | ||
662 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
663 | " addl $24, %esp\n" | ||
664 | #endif | ||
665 | |||
666 | /* | 611 | /* |
667 | * When a retprobed function returns, this code saves registers and | 612 | * When a retprobed function returns, this code saves registers and |
668 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | 613 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
816 | * jump instruction after the copied instruction, that jumps to the next | 761 | * jump instruction after the copied instruction, that jumps to the next |
817 | * instruction after the probepoint. | 762 | * instruction after the probepoint. |
818 | */ | 763 | */ |
819 | static void __kprobes resume_execution(struct kprobe *p, | 764 | static void __kprobes |
820 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | 765 | resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) |
821 | { | 766 | { |
822 | unsigned long *tos = stack_addr(regs); | 767 | unsigned long *tos = stack_addr(regs); |
823 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; | 768 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; |
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | |||
996 | /* | 941 | /* |
997 | * Wrapper routine for handling exceptions. | 942 | * Wrapper routine for handling exceptions. |
998 | */ | 943 | */ |
999 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | 944 | int __kprobes |
1000 | unsigned long val, void *data) | 945 | kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) |
1001 | { | 946 | { |
1002 | struct die_args *args = data; | 947 | struct die_args *args = data; |
1003 | int ret = NOTIFY_DONE; | 948 | int ret = NOTIFY_DONE; |
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1107 | return 0; | 1052 | return 0; |
1108 | } | 1053 | } |
1109 | 1054 | ||
1110 | |||
1111 | #ifdef CONFIG_OPTPROBES | ||
1112 | |||
1113 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
1114 | static void __kprobes synthesize_relcall(void *from, void *to) | ||
1115 | { | ||
1116 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
1117 | } | ||
1118 | |||
1119 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
1120 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | ||
1121 | unsigned long val) | ||
1122 | { | ||
1123 | #ifdef CONFIG_X86_64 | ||
1124 | *addr++ = 0x48; | ||
1125 | *addr++ = 0xbf; | ||
1126 | #else | ||
1127 | *addr++ = 0xb8; | ||
1128 | #endif | ||
1129 | *(unsigned long *)addr = val; | ||
1130 | } | ||
1131 | |||
1132 | static void __used __kprobes kprobes_optinsn_template_holder(void) | ||
1133 | { | ||
1134 | asm volatile ( | ||
1135 | ".global optprobe_template_entry\n" | ||
1136 | "optprobe_template_entry: \n" | ||
1137 | #ifdef CONFIG_X86_64 | ||
1138 | /* We don't bother saving the ss register */ | ||
1139 | " pushq %rsp\n" | ||
1140 | " pushfq\n" | ||
1141 | SAVE_REGS_STRING | ||
1142 | " movq %rsp, %rsi\n" | ||
1143 | ".global optprobe_template_val\n" | ||
1144 | "optprobe_template_val: \n" | ||
1145 | ASM_NOP5 | ||
1146 | ASM_NOP5 | ||
1147 | ".global optprobe_template_call\n" | ||
1148 | "optprobe_template_call: \n" | ||
1149 | ASM_NOP5 | ||
1150 | /* Move flags to rsp */ | ||
1151 | " movq 144(%rsp), %rdx\n" | ||
1152 | " movq %rdx, 152(%rsp)\n" | ||
1153 | RESTORE_REGS_STRING | ||
1154 | /* Skip flags entry */ | ||
1155 | " addq $8, %rsp\n" | ||
1156 | " popfq\n" | ||
1157 | #else /* CONFIG_X86_32 */ | ||
1158 | " pushf\n" | ||
1159 | SAVE_REGS_STRING | ||
1160 | " movl %esp, %edx\n" | ||
1161 | ".global optprobe_template_val\n" | ||
1162 | "optprobe_template_val: \n" | ||
1163 | ASM_NOP5 | ||
1164 | ".global optprobe_template_call\n" | ||
1165 | "optprobe_template_call: \n" | ||
1166 | ASM_NOP5 | ||
1167 | RESTORE_REGS_STRING | ||
1168 | " addl $4, %esp\n" /* skip cs */ | ||
1169 | " popf\n" | ||
1170 | #endif | ||
1171 | ".global optprobe_template_end\n" | ||
1172 | "optprobe_template_end: \n"); | ||
1173 | } | ||
1174 | |||
1175 | #define TMPL_MOVE_IDX \ | ||
1176 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
1177 | #define TMPL_CALL_IDX \ | ||
1178 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
1179 | #define TMPL_END_IDX \ | ||
1180 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
1181 | |||
1182 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
1183 | |||
1184 | /* Optimized kprobe call back function: called from optinsn */ | ||
1185 | static void __kprobes optimized_callback(struct optimized_kprobe *op, | ||
1186 | struct pt_regs *regs) | ||
1187 | { | ||
1188 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1189 | unsigned long flags; | ||
1190 | |||
1191 | /* This is possible if op is under delayed unoptimizing */ | ||
1192 | if (kprobe_disabled(&op->kp)) | ||
1193 | return; | ||
1194 | |||
1195 | local_irq_save(flags); | ||
1196 | if (kprobe_running()) { | ||
1197 | kprobes_inc_nmissed_count(&op->kp); | ||
1198 | } else { | ||
1199 | /* Save skipped registers */ | ||
1200 | #ifdef CONFIG_X86_64 | ||
1201 | regs->cs = __KERNEL_CS; | ||
1202 | #else | ||
1203 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
1204 | regs->gs = 0; | ||
1205 | #endif | ||
1206 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
1207 | regs->orig_ax = ~0UL; | ||
1208 | |||
1209 | __this_cpu_write(current_kprobe, &op->kp); | ||
1210 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1211 | opt_pre_handler(&op->kp, regs); | ||
1212 | __this_cpu_write(current_kprobe, NULL); | ||
1213 | } | ||
1214 | local_irq_restore(flags); | ||
1215 | } | ||
1216 | |||
1217 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
1218 | { | ||
1219 | int len = 0, ret; | ||
1220 | |||
1221 | while (len < RELATIVEJUMP_SIZE) { | ||
1222 | ret = __copy_instruction(dest + len, src + len, 1); | ||
1223 | if (!ret || !can_boost(dest + len)) | ||
1224 | return -EINVAL; | ||
1225 | len += ret; | ||
1226 | } | ||
1227 | /* Check whether the address range is reserved */ | ||
1228 | if (ftrace_text_reserved(src, src + len - 1) || | ||
1229 | alternatives_text_reserved(src, src + len - 1) || | ||
1230 | jump_label_text_reserved(src, src + len - 1)) | ||
1231 | return -EBUSY; | ||
1232 | |||
1233 | return len; | ||
1234 | } | ||
1235 | |||
1236 | /* Check whether insn is indirect jump */ | ||
1237 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
1238 | { | ||
1239 | return ((insn->opcode.bytes[0] == 0xff && | ||
1240 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
1241 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
1242 | } | ||
1243 | |||
1244 | /* Check whether insn jumps into specified address range */ | ||
1245 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
1246 | { | ||
1247 | unsigned long target = 0; | ||
1248 | |||
1249 | switch (insn->opcode.bytes[0]) { | ||
1250 | case 0xe0: /* loopne */ | ||
1251 | case 0xe1: /* loope */ | ||
1252 | case 0xe2: /* loop */ | ||
1253 | case 0xe3: /* jcxz */ | ||
1254 | case 0xe9: /* near relative jump */ | ||
1255 | case 0xeb: /* short relative jump */ | ||
1256 | break; | ||
1257 | case 0x0f: | ||
1258 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
1259 | break; | ||
1260 | return 0; | ||
1261 | default: | ||
1262 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
1263 | break; | ||
1264 | return 0; | ||
1265 | } | ||
1266 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
1267 | |||
1268 | return (start <= target && target <= start + len); | ||
1269 | } | ||
1270 | |||
1271 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
1272 | static int __kprobes can_optimize(unsigned long paddr) | ||
1273 | { | ||
1274 | int ret; | ||
1275 | unsigned long addr, size = 0, offset = 0; | ||
1276 | struct insn insn; | ||
1277 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
1278 | |||
1279 | /* Lookup symbol including addr */ | ||
1280 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | ||
1281 | return 0; | ||
1282 | |||
1283 | /* | ||
1284 | * Do not optimize in the entry code due to the unstable | ||
1285 | * stack handling. | ||
1286 | */ | ||
1287 | if ((paddr >= (unsigned long )__entry_text_start) && | ||
1288 | (paddr < (unsigned long )__entry_text_end)) | ||
1289 | return 0; | ||
1290 | |||
1291 | /* Check there is enough space for a relative jump. */ | ||
1292 | if (size - offset < RELATIVEJUMP_SIZE) | ||
1293 | return 0; | ||
1294 | |||
1295 | /* Decode instructions */ | ||
1296 | addr = paddr - offset; | ||
1297 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
1298 | if (search_exception_tables(addr)) | ||
1299 | /* | ||
1300 | * Since some fixup code will jumps into this function, | ||
1301 | * we can't optimize kprobe in this function. | ||
1302 | */ | ||
1303 | return 0; | ||
1304 | kernel_insn_init(&insn, (void *)addr); | ||
1305 | insn_get_opcode(&insn); | ||
1306 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
1307 | ret = recover_probed_instruction(buf, addr); | ||
1308 | if (ret) | ||
1309 | return 0; | ||
1310 | kernel_insn_init(&insn, buf); | ||
1311 | } | ||
1312 | insn_get_length(&insn); | ||
1313 | /* Recover address */ | ||
1314 | insn.kaddr = (void *)addr; | ||
1315 | insn.next_byte = (void *)(addr + insn.length); | ||
1316 | /* Check any instructions don't jump into target */ | ||
1317 | if (insn_is_indirect_jump(&insn) || | ||
1318 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
1319 | RELATIVE_ADDR_SIZE)) | ||
1320 | return 0; | ||
1321 | addr += insn.length; | ||
1322 | } | ||
1323 | |||
1324 | return 1; | ||
1325 | } | ||
1326 | |||
1327 | /* Check optimized_kprobe can actually be optimized. */ | ||
1328 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
1329 | { | ||
1330 | int i; | ||
1331 | struct kprobe *p; | ||
1332 | |||
1333 | for (i = 1; i < op->optinsn.size; i++) { | ||
1334 | p = get_kprobe(op->kp.addr + i); | ||
1335 | if (p && !kprobe_disabled(p)) | ||
1336 | return -EEXIST; | ||
1337 | } | ||
1338 | |||
1339 | return 0; | ||
1340 | } | ||
1341 | |||
1342 | /* Check the addr is within the optimized instructions. */ | ||
1343 | int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, | ||
1344 | unsigned long addr) | ||
1345 | { | ||
1346 | return ((unsigned long)op->kp.addr <= addr && | ||
1347 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
1348 | } | ||
1349 | |||
1350 | /* Free optimized instruction slot */ | ||
1351 | static __kprobes | ||
1352 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
1353 | { | ||
1354 | if (op->optinsn.insn) { | ||
1355 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
1356 | op->optinsn.insn = NULL; | ||
1357 | op->optinsn.size = 0; | ||
1358 | } | ||
1359 | } | ||
1360 | |||
1361 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
1362 | { | ||
1363 | __arch_remove_optimized_kprobe(op, 1); | ||
1364 | } | ||
1365 | |||
1366 | /* | ||
1367 | * Copy replacing target instructions | ||
1368 | * Target instructions MUST be relocatable (checked inside) | ||
1369 | */ | ||
1370 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
1371 | { | ||
1372 | u8 *buf; | ||
1373 | int ret; | ||
1374 | long rel; | ||
1375 | |||
1376 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
1377 | return -EILSEQ; | ||
1378 | |||
1379 | op->optinsn.insn = get_optinsn_slot(); | ||
1380 | if (!op->optinsn.insn) | ||
1381 | return -ENOMEM; | ||
1382 | |||
1383 | /* | ||
1384 | * Verify if the address gap is in 2GB range, because this uses | ||
1385 | * a relative jump. | ||
1386 | */ | ||
1387 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
1388 | if (abs(rel) > 0x7fffffff) | ||
1389 | return -ERANGE; | ||
1390 | |||
1391 | buf = (u8 *)op->optinsn.insn; | ||
1392 | |||
1393 | /* Copy instructions into the out-of-line buffer */ | ||
1394 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
1395 | if (ret < 0) { | ||
1396 | __arch_remove_optimized_kprobe(op, 0); | ||
1397 | return ret; | ||
1398 | } | ||
1399 | op->optinsn.size = ret; | ||
1400 | |||
1401 | /* Copy arch-dep-instance from template */ | ||
1402 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
1403 | |||
1404 | /* Set probe information */ | ||
1405 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
1406 | |||
1407 | /* Set probe function call */ | ||
1408 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
1409 | |||
1410 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
1411 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
1412 | (u8 *)op->kp.addr + op->optinsn.size); | ||
1413 | |||
1414 | flush_icache_range((unsigned long) buf, | ||
1415 | (unsigned long) buf + TMPL_END_IDX + | ||
1416 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
1417 | return 0; | ||
1418 | } | ||
1419 | |||
1420 | #define MAX_OPTIMIZE_PROBES 256 | ||
1421 | static struct text_poke_param *jump_poke_params; | ||
1422 | static struct jump_poke_buffer { | ||
1423 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1424 | } *jump_poke_bufs; | ||
1425 | |||
1426 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
1427 | u8 *insn_buf, | ||
1428 | struct optimized_kprobe *op) | ||
1429 | { | ||
1430 | s32 rel = (s32)((long)op->optinsn.insn - | ||
1431 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
1432 | |||
1433 | /* Backup instructions which will be replaced by jump address */ | ||
1434 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
1435 | RELATIVE_ADDR_SIZE); | ||
1436 | |||
1437 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
1438 | *(s32 *)(&insn_buf[1]) = rel; | ||
1439 | |||
1440 | tprm->addr = op->kp.addr; | ||
1441 | tprm->opcode = insn_buf; | ||
1442 | tprm->len = RELATIVEJUMP_SIZE; | ||
1443 | } | ||
1444 | |||
1445 | /* | ||
1446 | * Replace breakpoints (int3) with relative jumps. | ||
1447 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
1448 | */ | ||
1449 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
1450 | { | ||
1451 | struct optimized_kprobe *op, *tmp; | ||
1452 | int c = 0; | ||
1453 | |||
1454 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1455 | WARN_ON(kprobe_disabled(&op->kp)); | ||
1456 | /* Setup param */ | ||
1457 | setup_optimize_kprobe(&jump_poke_params[c], | ||
1458 | jump_poke_bufs[c].buf, op); | ||
1459 | list_del_init(&op->list); | ||
1460 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1461 | break; | ||
1462 | } | ||
1463 | |||
1464 | /* | ||
1465 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1466 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1467 | * code probing, it's not a problem. | ||
1468 | */ | ||
1469 | text_poke_smp_batch(jump_poke_params, c); | ||
1470 | } | ||
1471 | |||
1472 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
1473 | u8 *insn_buf, | ||
1474 | struct optimized_kprobe *op) | ||
1475 | { | ||
1476 | /* Set int3 to first byte for kprobes */ | ||
1477 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
1478 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1479 | |||
1480 | tprm->addr = op->kp.addr; | ||
1481 | tprm->opcode = insn_buf; | ||
1482 | tprm->len = RELATIVEJUMP_SIZE; | ||
1483 | } | ||
1484 | |||
1485 | /* | ||
1486 | * Recover original instructions and breakpoints from relative jumps. | ||
1487 | * Caller must call with locking kprobe_mutex. | ||
1488 | */ | ||
1489 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
1490 | struct list_head *done_list) | ||
1491 | { | ||
1492 | struct optimized_kprobe *op, *tmp; | ||
1493 | int c = 0; | ||
1494 | |||
1495 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1496 | /* Setup param */ | ||
1497 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
1498 | jump_poke_bufs[c].buf, op); | ||
1499 | list_move(&op->list, done_list); | ||
1500 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1501 | break; | ||
1502 | } | ||
1503 | |||
1504 | /* | ||
1505 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1506 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1507 | * code probing, it's not a problem. | ||
1508 | */ | ||
1509 | text_poke_smp_batch(jump_poke_params, c); | ||
1510 | } | ||
1511 | |||
1512 | /* Replace a relative jump with a breakpoint (int3). */ | ||
1513 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
1514 | { | ||
1515 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1516 | |||
1517 | /* Set int3 to first byte for kprobes */ | ||
1518 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
1519 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1520 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
1521 | } | ||
1522 | |||
1523 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
1524 | struct pt_regs *regs, | ||
1525 | int reenter) | ||
1526 | { | ||
1527 | struct optimized_kprobe *op; | ||
1528 | |||
1529 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
1530 | /* This kprobe is really able to run optimized path. */ | ||
1531 | op = container_of(p, struct optimized_kprobe, kp); | ||
1532 | /* Detour through copied instructions */ | ||
1533 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
1534 | if (!reenter) | ||
1535 | reset_current_kprobe(); | ||
1536 | preempt_enable_no_resched(); | ||
1537 | return 1; | ||
1538 | } | ||
1539 | return 0; | ||
1540 | } | ||
1541 | |||
1542 | static int __kprobes init_poke_params(void) | ||
1543 | { | ||
1544 | /* Allocate code buffer and parameter array */ | ||
1545 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
1546 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1547 | if (!jump_poke_bufs) | ||
1548 | return -ENOMEM; | ||
1549 | |||
1550 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
1551 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1552 | if (!jump_poke_params) { | ||
1553 | kfree(jump_poke_bufs); | ||
1554 | jump_poke_bufs = NULL; | ||
1555 | return -ENOMEM; | ||
1556 | } | ||
1557 | |||
1558 | return 0; | ||
1559 | } | ||
1560 | #else /* !CONFIG_OPTPROBES */ | ||
1561 | static int __kprobes init_poke_params(void) | ||
1562 | { | ||
1563 | return 0; | ||
1564 | } | ||
1565 | #endif | ||
1566 | |||
1567 | int __init arch_init_kprobes(void) | 1055 | int __init arch_init_kprobes(void) |
1568 | { | 1056 | { |
1569 | return init_poke_params(); | 1057 | return arch_init_optprobes(); |
1570 | } | 1058 | } |
1571 | 1059 | ||
1572 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | 1060 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) |