diff options
Diffstat (limited to 'arch/x86/kernel/kprobes.c')
-rw-r--r-- | arch/x86/kernel/kprobes.c | 888 |
1 files changed, 631 insertions, 257 deletions
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..1658efdfb4e5 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -48,31 +48,23 @@ | |||
48 | #include <linux/preempt.h> | 48 | #include <linux/preempt.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | ||
52 | #include <linux/ftrace.h> | ||
51 | 53 | ||
52 | #include <asm/cacheflush.h> | 54 | #include <asm/cacheflush.h> |
53 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
54 | #include <asm/pgtable.h> | 56 | #include <asm/pgtable.h> |
55 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
56 | #include <asm/alternative.h> | 58 | #include <asm/alternative.h> |
59 | #include <asm/insn.h> | ||
60 | #include <asm/debugreg.h> | ||
57 | 61 | ||
58 | void jprobe_return_end(void); | 62 | void jprobe_return_end(void); |
59 | 63 | ||
60 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 64 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
61 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | 65 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
62 | 66 | ||
63 | #ifdef CONFIG_X86_64 | 67 | #define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) |
64 | #define stack_addr(regs) ((unsigned long *)regs->sp) | ||
65 | #else | ||
66 | /* | ||
67 | * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs | ||
68 | * don't save the ss and esp registers if the CPU is already in kernel | ||
69 | * mode when it traps. So for kprobes, regs->sp and regs->ss are not | ||
70 | * the [nonexistent] saved stack pointer and ss register, but rather | ||
71 | * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to | ||
72 | * point to the top of the pre-int3 stack. | ||
73 | */ | ||
74 | #define stack_addr(regs) ((unsigned long *)®s->sp) | ||
75 | #endif | ||
76 | 68 | ||
77 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | 69 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ |
78 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | 70 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ |
@@ -106,50 +98,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { | |||
106 | /* ----------------------------------------------- */ | 98 | /* ----------------------------------------------- */ |
107 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 99 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
108 | }; | 100 | }; |
109 | static const u32 onebyte_has_modrm[256 / 32] = { | ||
110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
111 | /* ----------------------------------------------- */ | ||
112 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ | ||
113 | W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ | ||
114 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ | ||
115 | W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ | ||
116 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
117 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
118 | W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ | ||
119 | W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ | ||
120 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
121 | W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ | ||
122 | W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ | ||
123 | W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ | ||
124 | W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ | ||
125 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
126 | W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ | ||
127 | W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ | ||
128 | /* ----------------------------------------------- */ | ||
129 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
130 | }; | ||
131 | static const u32 twobyte_has_modrm[256 / 32] = { | ||
132 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
133 | /* ----------------------------------------------- */ | ||
134 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ | ||
135 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ | ||
136 | W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ | ||
137 | W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ | ||
138 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ | ||
139 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ | ||
140 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ | ||
141 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ | ||
142 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ | ||
143 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ | ||
144 | W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ | ||
145 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ | ||
146 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ | ||
147 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ | ||
148 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ | ||
149 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ | ||
150 | /* ----------------------------------------------- */ | ||
151 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
152 | }; | ||
153 | #undef W | 101 | #undef W |
154 | 102 | ||
155 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | 103 | struct kretprobe_blackpoint kretprobe_blacklist[] = { |
@@ -159,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { | |||
159 | }; | 107 | }; |
160 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 108 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); |
161 | 109 | ||
162 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 110 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) |
163 | static void __kprobes set_jmp_op(void *from, void *to) | ||
164 | { | 111 | { |
165 | struct __arch_jmp_op { | 112 | struct __arch_relative_insn { |
166 | char op; | 113 | u8 op; |
167 | s32 raddr; | 114 | s32 raddr; |
168 | } __attribute__((packed)) * jop; | 115 | } __attribute__((packed)) *insn; |
169 | jop = (struct __arch_jmp_op *)from; | 116 | |
170 | jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); | 117 | insn = (struct __arch_relative_insn *)from; |
171 | jop->op = RELATIVEJUMP_INSTRUCTION; | 118 | insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); |
119 | insn->op = op; | ||
120 | } | ||
121 | |||
122 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | ||
123 | static void __kprobes synthesize_reljump(void *from, void *to) | ||
124 | { | ||
125 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); | ||
172 | } | 126 | } |
173 | 127 | ||
174 | /* | 128 | /* |
@@ -244,6 +198,75 @@ retry: | |||
244 | } | 198 | } |
245 | } | 199 | } |
246 | 200 | ||
201 | /* Recover the probed instruction at addr for further analysis. */ | ||
202 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
203 | { | ||
204 | struct kprobe *kp; | ||
205 | kp = get_kprobe((void *)addr); | ||
206 | if (!kp) | ||
207 | return -EINVAL; | ||
208 | |||
209 | /* | ||
210 | * Basically, kp->ainsn.insn has an original instruction. | ||
211 | * However, RIP-relative instruction can not do single-stepping | ||
212 | * at different place, __copy_instruction() tweaks the displacement of | ||
213 | * that instruction. In that case, we can't recover the instruction | ||
214 | * from the kp->ainsn.insn. | ||
215 | * | ||
216 | * On the other hand, kp->opcode has a copy of the first byte of | ||
217 | * the probed instruction, which is overwritten by int3. And | ||
218 | * the instruction at kp->addr is not modified by kprobes except | ||
219 | * for the first byte, we can recover the original instruction | ||
220 | * from it and kp->opcode. | ||
221 | */ | ||
222 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
223 | buf[0] = kp->opcode; | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | /* Dummy buffers for kallsyms_lookup */ | ||
228 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
229 | |||
230 | /* Check if paddr is at an instruction boundary */ | ||
231 | static int __kprobes can_probe(unsigned long paddr) | ||
232 | { | ||
233 | int ret; | ||
234 | unsigned long addr, offset = 0; | ||
235 | struct insn insn; | ||
236 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
237 | |||
238 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | ||
239 | return 0; | ||
240 | |||
241 | /* Decode instructions */ | ||
242 | addr = paddr - offset; | ||
243 | while (addr < paddr) { | ||
244 | kernel_insn_init(&insn, (void *)addr); | ||
245 | insn_get_opcode(&insn); | ||
246 | |||
247 | /* | ||
248 | * Check if the instruction has been modified by another | ||
249 | * kprobe, in which case we replace the breakpoint by the | ||
250 | * original instruction in our buffer. | ||
251 | */ | ||
252 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
253 | ret = recover_probed_instruction(buf, addr); | ||
254 | if (ret) | ||
255 | /* | ||
256 | * Another debugging subsystem might insert | ||
257 | * this breakpoint. In that case, we can't | ||
258 | * recover it. | ||
259 | */ | ||
260 | return 0; | ||
261 | kernel_insn_init(&insn, buf); | ||
262 | } | ||
263 | insn_get_length(&insn); | ||
264 | addr += insn.length; | ||
265 | } | ||
266 | |||
267 | return (addr == paddr); | ||
268 | } | ||
269 | |||
247 | /* | 270 | /* |
248 | * Returns non-zero if opcode modifies the interrupt flag. | 271 | * Returns non-zero if opcode modifies the interrupt flag. |
249 | */ | 272 | */ |
@@ -268,86 +291,67 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
268 | } | 291 | } |
269 | 292 | ||
270 | /* | 293 | /* |
271 | * Adjust the displacement if the instruction uses the %rip-relative | 294 | * Copy an instruction and adjust the displacement if the instruction |
272 | * addressing mode. | 295 | * uses the %rip-relative addressing mode. |
273 | * If it does, Return the address of the 32-bit displacement word. | 296 | * If it does, Return the address of the 32-bit displacement word. |
274 | * If not, return null. | 297 | * If not, return null. |
275 | * Only applicable to 64-bit x86. | 298 | * Only applicable to 64-bit x86. |
276 | */ | 299 | */ |
277 | static void __kprobes fix_riprel(struct kprobe *p) | 300 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) |
278 | { | 301 | { |
279 | #ifdef CONFIG_X86_64 | 302 | struct insn insn; |
280 | u8 *insn = p->ainsn.insn; | 303 | int ret; |
281 | s64 disp; | 304 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
282 | int need_modrm; | 305 | |
283 | 306 | kernel_insn_init(&insn, src); | |
284 | /* Skip legacy instruction prefixes. */ | 307 | if (recover) { |
285 | while (1) { | 308 | insn_get_opcode(&insn); |
286 | switch (*insn) { | 309 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { |
287 | case 0x66: | 310 | ret = recover_probed_instruction(buf, |
288 | case 0x67: | 311 | (unsigned long)src); |
289 | case 0x2e: | 312 | if (ret) |
290 | case 0x3e: | 313 | return 0; |
291 | case 0x26: | 314 | kernel_insn_init(&insn, buf); |
292 | case 0x64: | ||
293 | case 0x65: | ||
294 | case 0x36: | ||
295 | case 0xf0: | ||
296 | case 0xf3: | ||
297 | case 0xf2: | ||
298 | ++insn; | ||
299 | continue; | ||
300 | } | 315 | } |
301 | break; | ||
302 | } | 316 | } |
317 | insn_get_length(&insn); | ||
318 | memcpy(dest, insn.kaddr, insn.length); | ||
303 | 319 | ||
304 | /* Skip REX instruction prefix. */ | 320 | #ifdef CONFIG_X86_64 |
305 | if (is_REX_prefix(insn)) | 321 | if (insn_rip_relative(&insn)) { |
306 | ++insn; | 322 | s64 newdisp; |
307 | 323 | u8 *disp; | |
308 | if (*insn == 0x0f) { | 324 | kernel_insn_init(&insn, dest); |
309 | /* Two-byte opcode. */ | 325 | insn_get_displacement(&insn); |
310 | ++insn; | 326 | /* |
311 | need_modrm = test_bit(*insn, | 327 | * The copied instruction uses the %rip-relative addressing |
312 | (unsigned long *)twobyte_has_modrm); | 328 | * mode. Adjust the displacement for the difference between |
313 | } else | 329 | * the original location of this instruction and the location |
314 | /* One-byte opcode. */ | 330 | * of the copy that will actually be run. The tricky bit here |
315 | need_modrm = test_bit(*insn, | 331 | * is making sure that the sign extension happens correctly in |
316 | (unsigned long *)onebyte_has_modrm); | 332 | * this calculation, since we need a signed 32-bit result to |
317 | 333 | * be sign-extended to 64 bits when it's added to the %rip | |
318 | if (need_modrm) { | 334 | * value and yield the same 64-bit result that the sign- |
319 | u8 modrm = *++insn; | 335 | * extension of the original signed 32-bit displacement would |
320 | if ((modrm & 0xc7) == 0x05) { | 336 | * have given. |
321 | /* %rip+disp32 addressing mode */ | 337 | */ |
322 | /* Displacement follows ModRM byte. */ | 338 | newdisp = (u8 *) src + (s64) insn.displacement.value - |
323 | ++insn; | 339 | (u8 *) dest; |
324 | /* | 340 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
325 | * The copied instruction uses the %rip-relative | 341 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
326 | * addressing mode. Adjust the displacement for the | 342 | *(s32 *) disp = (s32) newdisp; |
327 | * difference between the original location of this | ||
328 | * instruction and the location of the copy that will | ||
329 | * actually be run. The tricky bit here is making sure | ||
330 | * that the sign extension happens correctly in this | ||
331 | * calculation, since we need a signed 32-bit result to | ||
332 | * be sign-extended to 64 bits when it's added to the | ||
333 | * %rip value and yield the same 64-bit result that the | ||
334 | * sign-extension of the original signed 32-bit | ||
335 | * displacement would have given. | ||
336 | */ | ||
337 | disp = (u8 *) p->addr + *((s32 *) insn) - | ||
338 | (u8 *) p->ainsn.insn; | ||
339 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
340 | *(s32 *)insn = (s32) disp; | ||
341 | } | ||
342 | } | 343 | } |
343 | #endif | 344 | #endif |
345 | return insn.length; | ||
344 | } | 346 | } |
345 | 347 | ||
346 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 348 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
347 | { | 349 | { |
348 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 350 | /* |
349 | 351 | * Copy an instruction without recovering int3, because it will be | |
350 | fix_riprel(p); | 352 | * put by another subsystem. |
353 | */ | ||
354 | __copy_instruction(p->ainsn.insn, p->addr, 0); | ||
351 | 355 | ||
352 | if (can_boost(p->addr)) | 356 | if (can_boost(p->addr)) |
353 | p->ainsn.boostable = 0; | 357 | p->ainsn.boostable = 0; |
@@ -359,6 +363,11 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
359 | 363 | ||
360 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 364 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
361 | { | 365 | { |
366 | if (alternatives_text_reserved(p->addr, p->addr)) | ||
367 | return -EINVAL; | ||
368 | |||
369 | if (!can_probe((unsigned long)p->addr)) | ||
370 | return -EILSEQ; | ||
362 | /* insn: must be on special executable page on x86. */ | 371 | /* insn: must be on special executable page on x86. */ |
363 | p->ainsn.insn = get_insn_slot(); | 372 | p->ainsn.insn = get_insn_slot(); |
364 | if (!p->ainsn.insn) | 373 | if (!p->ainsn.insn) |
@@ -423,18 +432,6 @@ static void __kprobes restore_btf(void) | |||
423 | update_debugctlmsr(current->thread.debugctlmsr); | 432 | update_debugctlmsr(current->thread.debugctlmsr); |
424 | } | 433 | } |
425 | 434 | ||
426 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
427 | { | ||
428 | clear_btf(); | ||
429 | regs->flags |= X86_EFLAGS_TF; | ||
430 | regs->flags &= ~X86_EFLAGS_IF; | ||
431 | /* single step inline if the instruction is an int3 */ | ||
432 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
433 | regs->ip = (unsigned long)p->addr; | ||
434 | else | ||
435 | regs->ip = (unsigned long)p->ainsn.insn; | ||
436 | } | ||
437 | |||
438 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
439 | struct pt_regs *regs) | 436 | struct pt_regs *regs) |
440 | { | 437 | { |
@@ -446,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
446 | *sara = (unsigned long) &kretprobe_trampoline; | 443 | *sara = (unsigned long) &kretprobe_trampoline; |
447 | } | 444 | } |
448 | 445 | ||
446 | #ifdef CONFIG_OPTPROBES | ||
447 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
448 | struct pt_regs *regs, | ||
449 | int reenter); | ||
450 | #else | ||
451 | #define setup_detour_execution(p, regs, reenter) (0) | ||
452 | #endif | ||
453 | |||
449 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | 454 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, |
450 | struct kprobe_ctlblk *kcb) | 455 | struct kprobe_ctlblk *kcb, int reenter) |
451 | { | 456 | { |
452 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) | 457 | if (setup_detour_execution(p, regs, reenter)) |
458 | return; | ||
459 | |||
460 | #if !defined(CONFIG_PREEMPT) | ||
453 | if (p->ainsn.boostable == 1 && !p->post_handler) { | 461 | if (p->ainsn.boostable == 1 && !p->post_handler) { |
454 | /* Boost up -- we can execute copied instructions directly */ | 462 | /* Boost up -- we can execute copied instructions directly */ |
455 | reset_current_kprobe(); | 463 | if (!reenter) |
464 | reset_current_kprobe(); | ||
465 | /* | ||
466 | * Reentering boosted probe doesn't reset current_kprobe, | ||
467 | * nor set current_kprobe, because it doesn't use single | ||
468 | * stepping. | ||
469 | */ | ||
456 | regs->ip = (unsigned long)p->ainsn.insn; | 470 | regs->ip = (unsigned long)p->ainsn.insn; |
457 | preempt_enable_no_resched(); | 471 | preempt_enable_no_resched(); |
458 | return; | 472 | return; |
459 | } | 473 | } |
460 | #endif | 474 | #endif |
461 | prepare_singlestep(p, regs); | 475 | if (reenter) { |
462 | kcb->kprobe_status = KPROBE_HIT_SS; | 476 | save_previous_kprobe(kcb); |
477 | set_current_kprobe(p, regs, kcb); | ||
478 | kcb->kprobe_status = KPROBE_REENTER; | ||
479 | } else | ||
480 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
481 | /* Prepare real single stepping */ | ||
482 | clear_btf(); | ||
483 | regs->flags |= X86_EFLAGS_TF; | ||
484 | regs->flags &= ~X86_EFLAGS_IF; | ||
485 | /* single step inline if the instruction is an int3 */ | ||
486 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
487 | regs->ip = (unsigned long)p->addr; | ||
488 | else | ||
489 | regs->ip = (unsigned long)p->ainsn.insn; | ||
463 | } | 490 | } |
464 | 491 | ||
465 | /* | 492 | /* |
@@ -472,37 +499,21 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
472 | { | 499 | { |
473 | switch (kcb->kprobe_status) { | 500 | switch (kcb->kprobe_status) { |
474 | case KPROBE_HIT_SSDONE: | 501 | case KPROBE_HIT_SSDONE: |
475 | #ifdef CONFIG_X86_64 | ||
476 | /* TODO: Provide re-entrancy from post_kprobes_handler() and | ||
477 | * avoid exception stack corruption while single-stepping on | ||
478 | * the instruction of the new probe. | ||
479 | */ | ||
480 | arch_disarm_kprobe(p); | ||
481 | regs->ip = (unsigned long)p->addr; | ||
482 | reset_current_kprobe(); | ||
483 | preempt_enable_no_resched(); | ||
484 | break; | ||
485 | #endif | ||
486 | case KPROBE_HIT_ACTIVE: | 502 | case KPROBE_HIT_ACTIVE: |
487 | save_previous_kprobe(kcb); | ||
488 | set_current_kprobe(p, regs, kcb); | ||
489 | kprobes_inc_nmissed_count(p); | 503 | kprobes_inc_nmissed_count(p); |
490 | prepare_singlestep(p, regs); | 504 | setup_singlestep(p, regs, kcb, 1); |
491 | kcb->kprobe_status = KPROBE_REENTER; | ||
492 | break; | 505 | break; |
493 | case KPROBE_HIT_SS: | 506 | case KPROBE_HIT_SS: |
494 | if (p == kprobe_running()) { | 507 | /* A probe has been hit in the codepath leading up to, or just |
495 | regs->flags &= ~X86_EFLAGS_TF; | 508 | * after, single-stepping of a probed instruction. This entire |
496 | regs->flags |= kcb->kprobe_saved_flags; | 509 | * codepath should strictly reside in .kprobes.text section. |
497 | return 0; | 510 | * Raise a BUG or we'll continue in an endless reentering loop |
498 | } else { | 511 | * and eventually a stack overflow. |
499 | /* A probe has been hit in the codepath leading up | 512 | */ |
500 | * to, or just after, single-stepping of a probed | 513 | printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", |
501 | * instruction. This entire codepath should strictly | 514 | p->addr); |
502 | * reside in .kprobes.text section. Raise a warning | 515 | dump_kprobe(p); |
503 | * to highlight this peculiar case. | 516 | BUG(); |
504 | */ | ||
505 | } | ||
506 | default: | 517 | default: |
507 | /* impossible cases */ | 518 | /* impossible cases */ |
508 | WARN_ON(1); | 519 | WARN_ON(1); |
@@ -514,7 +525,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
514 | 525 | ||
515 | /* | 526 | /* |
516 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | 527 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they |
517 | * remain disabled thorough out this function. | 528 | * remain disabled throughout this function. |
518 | */ | 529 | */ |
519 | static int __kprobes kprobe_handler(struct pt_regs *regs) | 530 | static int __kprobes kprobe_handler(struct pt_regs *regs) |
520 | { | 531 | { |
@@ -523,20 +534,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
523 | struct kprobe_ctlblk *kcb; | 534 | struct kprobe_ctlblk *kcb; |
524 | 535 | ||
525 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | 536 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); |
526 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
527 | /* | ||
528 | * The breakpoint instruction was removed right | ||
529 | * after we hit it. Another cpu has removed | ||
530 | * either a probepoint or a debugger breakpoint | ||
531 | * at this address. In either case, no further | ||
532 | * handling of this interrupt is appropriate. | ||
533 | * Back up over the (now missing) int3 and run | ||
534 | * the original instruction. | ||
535 | */ | ||
536 | regs->ip = (unsigned long)addr; | ||
537 | return 1; | ||
538 | } | ||
539 | |||
540 | /* | 537 | /* |
541 | * We don't want to be preempted for the entire | 538 | * We don't want to be preempted for the entire |
542 | * duration of kprobe processing. We conditionally | 539 | * duration of kprobe processing. We conditionally |
@@ -565,13 +562,26 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
565 | * more here. | 562 | * more here. |
566 | */ | 563 | */ |
567 | if (!p->pre_handler || !p->pre_handler(p, regs)) | 564 | if (!p->pre_handler || !p->pre_handler(p, regs)) |
568 | setup_singlestep(p, regs, kcb); | 565 | setup_singlestep(p, regs, kcb, 0); |
569 | return 1; | 566 | return 1; |
570 | } | 567 | } |
568 | } else if (*addr != BREAKPOINT_INSTRUCTION) { | ||
569 | /* | ||
570 | * The breakpoint instruction was removed right | ||
571 | * after we hit it. Another cpu has removed | ||
572 | * either a probepoint or a debugger breakpoint | ||
573 | * at this address. In either case, no further | ||
574 | * handling of this interrupt is appropriate. | ||
575 | * Back up over the (now missing) int3 and run | ||
576 | * the original instruction. | ||
577 | */ | ||
578 | regs->ip = (unsigned long)addr; | ||
579 | preempt_enable_no_resched(); | ||
580 | return 1; | ||
571 | } else if (kprobe_running()) { | 581 | } else if (kprobe_running()) { |
572 | p = __get_cpu_var(current_kprobe); | 582 | p = __get_cpu_var(current_kprobe); |
573 | if (p->break_handler && p->break_handler(p, regs)) { | 583 | if (p->break_handler && p->break_handler(p, regs)) { |
574 | setup_singlestep(p, regs, kcb); | 584 | setup_singlestep(p, regs, kcb, 0); |
575 | return 1; | 585 | return 1; |
576 | } | 586 | } |
577 | } /* else: not a kprobe fault; let the kernel handle it */ | 587 | } /* else: not a kprobe fault; let the kernel handle it */ |
@@ -580,6 +590,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
580 | return 0; | 590 | return 0; |
581 | } | 591 | } |
582 | 592 | ||
593 | #ifdef CONFIG_X86_64 | ||
594 | #define SAVE_REGS_STRING \ | ||
595 | /* Skip cs, ip, orig_ax. */ \ | ||
596 | " subq $24, %rsp\n" \ | ||
597 | " pushq %rdi\n" \ | ||
598 | " pushq %rsi\n" \ | ||
599 | " pushq %rdx\n" \ | ||
600 | " pushq %rcx\n" \ | ||
601 | " pushq %rax\n" \ | ||
602 | " pushq %r8\n" \ | ||
603 | " pushq %r9\n" \ | ||
604 | " pushq %r10\n" \ | ||
605 | " pushq %r11\n" \ | ||
606 | " pushq %rbx\n" \ | ||
607 | " pushq %rbp\n" \ | ||
608 | " pushq %r12\n" \ | ||
609 | " pushq %r13\n" \ | ||
610 | " pushq %r14\n" \ | ||
611 | " pushq %r15\n" | ||
612 | #define RESTORE_REGS_STRING \ | ||
613 | " popq %r15\n" \ | ||
614 | " popq %r14\n" \ | ||
615 | " popq %r13\n" \ | ||
616 | " popq %r12\n" \ | ||
617 | " popq %rbp\n" \ | ||
618 | " popq %rbx\n" \ | ||
619 | " popq %r11\n" \ | ||
620 | " popq %r10\n" \ | ||
621 | " popq %r9\n" \ | ||
622 | " popq %r8\n" \ | ||
623 | " popq %rax\n" \ | ||
624 | " popq %rcx\n" \ | ||
625 | " popq %rdx\n" \ | ||
626 | " popq %rsi\n" \ | ||
627 | " popq %rdi\n" \ | ||
628 | /* Skip orig_ax, ip, cs */ \ | ||
629 | " addq $24, %rsp\n" | ||
630 | #else | ||
631 | #define SAVE_REGS_STRING \ | ||
632 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
633 | " subl $16, %esp\n" \ | ||
634 | " pushl %fs\n" \ | ||
635 | " pushl %ds\n" \ | ||
636 | " pushl %es\n" \ | ||
637 | " pushl %eax\n" \ | ||
638 | " pushl %ebp\n" \ | ||
639 | " pushl %edi\n" \ | ||
640 | " pushl %esi\n" \ | ||
641 | " pushl %edx\n" \ | ||
642 | " pushl %ecx\n" \ | ||
643 | " pushl %ebx\n" | ||
644 | #define RESTORE_REGS_STRING \ | ||
645 | " popl %ebx\n" \ | ||
646 | " popl %ecx\n" \ | ||
647 | " popl %edx\n" \ | ||
648 | " popl %esi\n" \ | ||
649 | " popl %edi\n" \ | ||
650 | " popl %ebp\n" \ | ||
651 | " popl %eax\n" \ | ||
652 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
653 | " addl $24, %esp\n" | ||
654 | #endif | ||
655 | |||
583 | /* | 656 | /* |
584 | * When a retprobed function returns, this code saves registers and | 657 | * When a retprobed function returns, this code saves registers and |
585 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | 658 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
@@ -593,65 +666,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
593 | /* We don't bother saving the ss register */ | 666 | /* We don't bother saving the ss register */ |
594 | " pushq %rsp\n" | 667 | " pushq %rsp\n" |
595 | " pushfq\n" | 668 | " pushfq\n" |
596 | /* | 669 | SAVE_REGS_STRING |
597 | * Skip cs, ip, orig_ax. | ||
598 | * trampoline_handler() will plug in these values | ||
599 | */ | ||
600 | " subq $24, %rsp\n" | ||
601 | " pushq %rdi\n" | ||
602 | " pushq %rsi\n" | ||
603 | " pushq %rdx\n" | ||
604 | " pushq %rcx\n" | ||
605 | " pushq %rax\n" | ||
606 | " pushq %r8\n" | ||
607 | " pushq %r9\n" | ||
608 | " pushq %r10\n" | ||
609 | " pushq %r11\n" | ||
610 | " pushq %rbx\n" | ||
611 | " pushq %rbp\n" | ||
612 | " pushq %r12\n" | ||
613 | " pushq %r13\n" | ||
614 | " pushq %r14\n" | ||
615 | " pushq %r15\n" | ||
616 | " movq %rsp, %rdi\n" | 670 | " movq %rsp, %rdi\n" |
617 | " call trampoline_handler\n" | 671 | " call trampoline_handler\n" |
618 | /* Replace saved sp with true return address. */ | 672 | /* Replace saved sp with true return address. */ |
619 | " movq %rax, 152(%rsp)\n" | 673 | " movq %rax, 152(%rsp)\n" |
620 | " popq %r15\n" | 674 | RESTORE_REGS_STRING |
621 | " popq %r14\n" | ||
622 | " popq %r13\n" | ||
623 | " popq %r12\n" | ||
624 | " popq %rbp\n" | ||
625 | " popq %rbx\n" | ||
626 | " popq %r11\n" | ||
627 | " popq %r10\n" | ||
628 | " popq %r9\n" | ||
629 | " popq %r8\n" | ||
630 | " popq %rax\n" | ||
631 | " popq %rcx\n" | ||
632 | " popq %rdx\n" | ||
633 | " popq %rsi\n" | ||
634 | " popq %rdi\n" | ||
635 | /* Skip orig_ax, ip, cs */ | ||
636 | " addq $24, %rsp\n" | ||
637 | " popfq\n" | 675 | " popfq\n" |
638 | #else | 676 | #else |
639 | " pushf\n" | 677 | " pushf\n" |
640 | /* | 678 | SAVE_REGS_STRING |
641 | * Skip cs, ip, orig_ax and gs. | ||
642 | * trampoline_handler() will plug in these values | ||
643 | */ | ||
644 | " subl $16, %esp\n" | ||
645 | " pushl %fs\n" | ||
646 | " pushl %es\n" | ||
647 | " pushl %ds\n" | ||
648 | " pushl %eax\n" | ||
649 | " pushl %ebp\n" | ||
650 | " pushl %edi\n" | ||
651 | " pushl %esi\n" | ||
652 | " pushl %edx\n" | ||
653 | " pushl %ecx\n" | ||
654 | " pushl %ebx\n" | ||
655 | " movl %esp, %eax\n" | 679 | " movl %esp, %eax\n" |
656 | " call trampoline_handler\n" | 680 | " call trampoline_handler\n" |
657 | /* Move flags to cs */ | 681 | /* Move flags to cs */ |
@@ -659,15 +683,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
659 | " movl %edx, 52(%esp)\n" | 683 | " movl %edx, 52(%esp)\n" |
660 | /* Replace saved flags with true return address. */ | 684 | /* Replace saved flags with true return address. */ |
661 | " movl %eax, 56(%esp)\n" | 685 | " movl %eax, 56(%esp)\n" |
662 | " popl %ebx\n" | 686 | RESTORE_REGS_STRING |
663 | " popl %ecx\n" | ||
664 | " popl %edx\n" | ||
665 | " popl %esi\n" | ||
666 | " popl %edi\n" | ||
667 | " popl %ebp\n" | ||
668 | " popl %eax\n" | ||
669 | /* Skip ds, es, fs, gs, orig_ax and ip */ | ||
670 | " addl $24, %esp\n" | ||
671 | " popf\n" | 687 | " popf\n" |
672 | #endif | 688 | #endif |
673 | " ret\n"); | 689 | " ret\n"); |
@@ -835,8 +851,8 @@ static void __kprobes resume_execution(struct kprobe *p, | |||
835 | * These instructions can be executed directly if it | 851 | * These instructions can be executed directly if it |
836 | * jumps back to correct address. | 852 | * jumps back to correct address. |
837 | */ | 853 | */ |
838 | set_jmp_op((void *)regs->ip, | 854 | synthesize_reljump((void *)regs->ip, |
839 | (void *)orig_ip + (regs->ip - copy_ip)); | 855 | (void *)orig_ip + (regs->ip - copy_ip)); |
840 | p->ainsn.boostable = 1; | 856 | p->ainsn.boostable = 1; |
841 | } else { | 857 | } else { |
842 | p->ainsn.boostable = -1; | 858 | p->ainsn.boostable = -1; |
@@ -851,7 +867,7 @@ no_change: | |||
851 | 867 | ||
852 | /* | 868 | /* |
853 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they | 869 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they |
854 | * remain disabled thoroughout this function. | 870 | * remain disabled throughout this function. |
855 | */ | 871 | */ |
856 | static int __kprobes post_kprobe_handler(struct pt_regs *regs) | 872 | static int __kprobes post_kprobe_handler(struct pt_regs *regs) |
857 | { | 873 | { |
@@ -967,8 +983,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
967 | ret = NOTIFY_STOP; | 983 | ret = NOTIFY_STOP; |
968 | break; | 984 | break; |
969 | case DIE_DEBUG: | 985 | case DIE_DEBUG: |
970 | if (post_kprobe_handler(args->regs)) | 986 | if (post_kprobe_handler(args->regs)) { |
987 | /* | ||
988 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
989 | * denote completion of processing | ||
990 | */ | ||
991 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
971 | ret = NOTIFY_STOP; | 992 | ret = NOTIFY_STOP; |
993 | } | ||
972 | break; | 994 | break; |
973 | case DIE_GPF: | 995 | case DIE_GPF: |
974 | /* | 996 | /* |
@@ -1057,6 +1079,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1057 | return 0; | 1079 | return 0; |
1058 | } | 1080 | } |
1059 | 1081 | ||
1082 | |||
1083 | #ifdef CONFIG_OPTPROBES | ||
1084 | |||
1085 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
1086 | static void __kprobes synthesize_relcall(void *from, void *to) | ||
1087 | { | ||
1088 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
1089 | } | ||
1090 | |||
1091 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
1092 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | ||
1093 | unsigned long val) | ||
1094 | { | ||
1095 | #ifdef CONFIG_X86_64 | ||
1096 | *addr++ = 0x48; | ||
1097 | *addr++ = 0xbf; | ||
1098 | #else | ||
1099 | *addr++ = 0xb8; | ||
1100 | #endif | ||
1101 | *(unsigned long *)addr = val; | ||
1102 | } | ||
1103 | |||
1104 | void __kprobes kprobes_optinsn_template_holder(void) | ||
1105 | { | ||
1106 | asm volatile ( | ||
1107 | ".global optprobe_template_entry\n" | ||
1108 | "optprobe_template_entry: \n" | ||
1109 | #ifdef CONFIG_X86_64 | ||
1110 | /* We don't bother saving the ss register */ | ||
1111 | " pushq %rsp\n" | ||
1112 | " pushfq\n" | ||
1113 | SAVE_REGS_STRING | ||
1114 | " movq %rsp, %rsi\n" | ||
1115 | ".global optprobe_template_val\n" | ||
1116 | "optprobe_template_val: \n" | ||
1117 | ASM_NOP5 | ||
1118 | ASM_NOP5 | ||
1119 | ".global optprobe_template_call\n" | ||
1120 | "optprobe_template_call: \n" | ||
1121 | ASM_NOP5 | ||
1122 | /* Move flags to rsp */ | ||
1123 | " movq 144(%rsp), %rdx\n" | ||
1124 | " movq %rdx, 152(%rsp)\n" | ||
1125 | RESTORE_REGS_STRING | ||
1126 | /* Skip flags entry */ | ||
1127 | " addq $8, %rsp\n" | ||
1128 | " popfq\n" | ||
1129 | #else /* CONFIG_X86_32 */ | ||
1130 | " pushf\n" | ||
1131 | SAVE_REGS_STRING | ||
1132 | " movl %esp, %edx\n" | ||
1133 | ".global optprobe_template_val\n" | ||
1134 | "optprobe_template_val: \n" | ||
1135 | ASM_NOP5 | ||
1136 | ".global optprobe_template_call\n" | ||
1137 | "optprobe_template_call: \n" | ||
1138 | ASM_NOP5 | ||
1139 | RESTORE_REGS_STRING | ||
1140 | " addl $4, %esp\n" /* skip cs */ | ||
1141 | " popf\n" | ||
1142 | #endif | ||
1143 | ".global optprobe_template_end\n" | ||
1144 | "optprobe_template_end: \n"); | ||
1145 | } | ||
1146 | |||
1147 | #define TMPL_MOVE_IDX \ | ||
1148 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
1149 | #define TMPL_CALL_IDX \ | ||
1150 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
1151 | #define TMPL_END_IDX \ | ||
1152 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
1153 | |||
1154 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
1155 | |||
1156 | /* Optimized kprobe call back function: called from optinsn */ | ||
1157 | static void __kprobes optimized_callback(struct optimized_kprobe *op, | ||
1158 | struct pt_regs *regs) | ||
1159 | { | ||
1160 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1161 | |||
1162 | preempt_disable(); | ||
1163 | if (kprobe_running()) { | ||
1164 | kprobes_inc_nmissed_count(&op->kp); | ||
1165 | } else { | ||
1166 | /* Save skipped registers */ | ||
1167 | #ifdef CONFIG_X86_64 | ||
1168 | regs->cs = __KERNEL_CS; | ||
1169 | #else | ||
1170 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
1171 | regs->gs = 0; | ||
1172 | #endif | ||
1173 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
1174 | regs->orig_ax = ~0UL; | ||
1175 | |||
1176 | __get_cpu_var(current_kprobe) = &op->kp; | ||
1177 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1178 | opt_pre_handler(&op->kp, regs); | ||
1179 | __get_cpu_var(current_kprobe) = NULL; | ||
1180 | } | ||
1181 | preempt_enable_no_resched(); | ||
1182 | } | ||
1183 | |||
1184 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
1185 | { | ||
1186 | int len = 0, ret; | ||
1187 | |||
1188 | while (len < RELATIVEJUMP_SIZE) { | ||
1189 | ret = __copy_instruction(dest + len, src + len, 1); | ||
1190 | if (!ret || !can_boost(dest + len)) | ||
1191 | return -EINVAL; | ||
1192 | len += ret; | ||
1193 | } | ||
1194 | /* Check whether the address range is reserved */ | ||
1195 | if (ftrace_text_reserved(src, src + len - 1) || | ||
1196 | alternatives_text_reserved(src, src + len - 1)) | ||
1197 | return -EBUSY; | ||
1198 | |||
1199 | return len; | ||
1200 | } | ||
1201 | |||
1202 | /* Check whether insn is indirect jump */ | ||
1203 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
1204 | { | ||
1205 | return ((insn->opcode.bytes[0] == 0xff && | ||
1206 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
1207 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
1208 | } | ||
1209 | |||
1210 | /* Check whether insn jumps into specified address range */ | ||
1211 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
1212 | { | ||
1213 | unsigned long target = 0; | ||
1214 | |||
1215 | switch (insn->opcode.bytes[0]) { | ||
1216 | case 0xe0: /* loopne */ | ||
1217 | case 0xe1: /* loope */ | ||
1218 | case 0xe2: /* loop */ | ||
1219 | case 0xe3: /* jcxz */ | ||
1220 | case 0xe9: /* near relative jump */ | ||
1221 | case 0xeb: /* short relative jump */ | ||
1222 | break; | ||
1223 | case 0x0f: | ||
1224 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
1225 | break; | ||
1226 | return 0; | ||
1227 | default: | ||
1228 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
1229 | break; | ||
1230 | return 0; | ||
1231 | } | ||
1232 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
1233 | |||
1234 | return (start <= target && target <= start + len); | ||
1235 | } | ||
1236 | |||
1237 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
1238 | static int __kprobes can_optimize(unsigned long paddr) | ||
1239 | { | ||
1240 | int ret; | ||
1241 | unsigned long addr, size = 0, offset = 0; | ||
1242 | struct insn insn; | ||
1243 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
1244 | /* Dummy buffers for lookup_symbol_attrs */ | ||
1245 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
1246 | |||
1247 | /* Lookup symbol including addr */ | ||
1248 | if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) | ||
1249 | return 0; | ||
1250 | |||
1251 | /* Check there is enough space for a relative jump. */ | ||
1252 | if (size - offset < RELATIVEJUMP_SIZE) | ||
1253 | return 0; | ||
1254 | |||
1255 | /* Decode instructions */ | ||
1256 | addr = paddr - offset; | ||
1257 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
1258 | if (search_exception_tables(addr)) | ||
1259 | /* | ||
1260 | * Since some fixup code will jumps into this function, | ||
1261 | * we can't optimize kprobe in this function. | ||
1262 | */ | ||
1263 | return 0; | ||
1264 | kernel_insn_init(&insn, (void *)addr); | ||
1265 | insn_get_opcode(&insn); | ||
1266 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
1267 | ret = recover_probed_instruction(buf, addr); | ||
1268 | if (ret) | ||
1269 | return 0; | ||
1270 | kernel_insn_init(&insn, buf); | ||
1271 | } | ||
1272 | insn_get_length(&insn); | ||
1273 | /* Recover address */ | ||
1274 | insn.kaddr = (void *)addr; | ||
1275 | insn.next_byte = (void *)(addr + insn.length); | ||
1276 | /* Check any instructions don't jump into target */ | ||
1277 | if (insn_is_indirect_jump(&insn) || | ||
1278 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
1279 | RELATIVE_ADDR_SIZE)) | ||
1280 | return 0; | ||
1281 | addr += insn.length; | ||
1282 | } | ||
1283 | |||
1284 | return 1; | ||
1285 | } | ||
1286 | |||
1287 | /* Check optimized_kprobe can actually be optimized. */ | ||
1288 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
1289 | { | ||
1290 | int i; | ||
1291 | struct kprobe *p; | ||
1292 | |||
1293 | for (i = 1; i < op->optinsn.size; i++) { | ||
1294 | p = get_kprobe(op->kp.addr + i); | ||
1295 | if (p && !kprobe_disabled(p)) | ||
1296 | return -EEXIST; | ||
1297 | } | ||
1298 | |||
1299 | return 0; | ||
1300 | } | ||
1301 | |||
1302 | /* Check the addr is within the optimized instructions. */ | ||
1303 | int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, | ||
1304 | unsigned long addr) | ||
1305 | { | ||
1306 | return ((unsigned long)op->kp.addr <= addr && | ||
1307 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
1308 | } | ||
1309 | |||
1310 | /* Free optimized instruction slot */ | ||
1311 | static __kprobes | ||
1312 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
1313 | { | ||
1314 | if (op->optinsn.insn) { | ||
1315 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
1316 | op->optinsn.insn = NULL; | ||
1317 | op->optinsn.size = 0; | ||
1318 | } | ||
1319 | } | ||
1320 | |||
1321 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
1322 | { | ||
1323 | __arch_remove_optimized_kprobe(op, 1); | ||
1324 | } | ||
1325 | |||
1326 | /* | ||
1327 | * Copy replacing target instructions | ||
1328 | * Target instructions MUST be relocatable (checked inside) | ||
1329 | */ | ||
1330 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
1331 | { | ||
1332 | u8 *buf; | ||
1333 | int ret; | ||
1334 | long rel; | ||
1335 | |||
1336 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
1337 | return -EILSEQ; | ||
1338 | |||
1339 | op->optinsn.insn = get_optinsn_slot(); | ||
1340 | if (!op->optinsn.insn) | ||
1341 | return -ENOMEM; | ||
1342 | |||
1343 | /* | ||
1344 | * Verify if the address gap is in 2GB range, because this uses | ||
1345 | * a relative jump. | ||
1346 | */ | ||
1347 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
1348 | if (abs(rel) > 0x7fffffff) | ||
1349 | return -ERANGE; | ||
1350 | |||
1351 | buf = (u8 *)op->optinsn.insn; | ||
1352 | |||
1353 | /* Copy instructions into the out-of-line buffer */ | ||
1354 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
1355 | if (ret < 0) { | ||
1356 | __arch_remove_optimized_kprobe(op, 0); | ||
1357 | return ret; | ||
1358 | } | ||
1359 | op->optinsn.size = ret; | ||
1360 | |||
1361 | /* Copy arch-dep-instance from template */ | ||
1362 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
1363 | |||
1364 | /* Set probe information */ | ||
1365 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
1366 | |||
1367 | /* Set probe function call */ | ||
1368 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
1369 | |||
1370 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
1371 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
1372 | (u8 *)op->kp.addr + op->optinsn.size); | ||
1373 | |||
1374 | flush_icache_range((unsigned long) buf, | ||
1375 | (unsigned long) buf + TMPL_END_IDX + | ||
1376 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
1377 | return 0; | ||
1378 | } | ||
1379 | |||
1380 | /* Replace a breakpoint (int3) with a relative jump. */ | ||
1381 | int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | ||
1382 | { | ||
1383 | unsigned char jmp_code[RELATIVEJUMP_SIZE]; | ||
1384 | s32 rel = (s32)((long)op->optinsn.insn - | ||
1385 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
1386 | |||
1387 | /* Backup instructions which will be replaced by jump address */ | ||
1388 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
1389 | RELATIVE_ADDR_SIZE); | ||
1390 | |||
1391 | jmp_code[0] = RELATIVEJUMP_OPCODE; | ||
1392 | *(s32 *)(&jmp_code[1]) = rel; | ||
1393 | |||
1394 | /* | ||
1395 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1396 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1397 | * code probing, it's not a problem. | ||
1398 | */ | ||
1399 | text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); | ||
1400 | return 0; | ||
1401 | } | ||
1402 | |||
1403 | /* Replace a relative jump with a breakpoint (int3). */ | ||
1404 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
1405 | { | ||
1406 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1407 | |||
1408 | /* Set int3 to first byte for kprobes */ | ||
1409 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
1410 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1411 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
1412 | } | ||
1413 | |||
1414 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
1415 | struct pt_regs *regs, | ||
1416 | int reenter) | ||
1417 | { | ||
1418 | struct optimized_kprobe *op; | ||
1419 | |||
1420 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
1421 | /* This kprobe is really able to run optimized path. */ | ||
1422 | op = container_of(p, struct optimized_kprobe, kp); | ||
1423 | /* Detour through copied instructions */ | ||
1424 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
1425 | if (!reenter) | ||
1426 | reset_current_kprobe(); | ||
1427 | preempt_enable_no_resched(); | ||
1428 | return 1; | ||
1429 | } | ||
1430 | return 0; | ||
1431 | } | ||
1432 | #endif | ||
1433 | |||
1060 | int __init arch_init_kprobes(void) | 1434 | int __init arch_init_kprobes(void) |
1061 | { | 1435 | { |
1062 | return 0; | 1436 | return 0; |