diff options
Diffstat (limited to 'arch/x86/kernel/kprobes.c')
-rw-r--r-- | arch/x86/kernel/kprobes.c | 243 |
1 files changed, 114 insertions, 129 deletions
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..3fe86d706a14 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -48,12 +48,15 @@ | |||
48 | #include <linux/preempt.h> | 48 | #include <linux/preempt.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | ||
51 | 52 | ||
52 | #include <asm/cacheflush.h> | 53 | #include <asm/cacheflush.h> |
53 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
54 | #include <asm/pgtable.h> | 55 | #include <asm/pgtable.h> |
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/alternative.h> | 57 | #include <asm/alternative.h> |
58 | #include <asm/insn.h> | ||
59 | #include <asm/debugreg.h> | ||
57 | 60 | ||
58 | void jprobe_return_end(void); | 61 | void jprobe_return_end(void); |
59 | 62 | ||
@@ -106,50 +109,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { | |||
106 | /* ----------------------------------------------- */ | 109 | /* ----------------------------------------------- */ |
107 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
108 | }; | 111 | }; |
109 | static const u32 onebyte_has_modrm[256 / 32] = { | ||
110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
111 | /* ----------------------------------------------- */ | ||
112 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ | ||
113 | W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ | ||
114 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ | ||
115 | W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ | ||
116 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
117 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
118 | W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ | ||
119 | W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ | ||
120 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
121 | W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ | ||
122 | W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ | ||
123 | W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ | ||
124 | W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ | ||
125 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
126 | W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ | ||
127 | W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ | ||
128 | /* ----------------------------------------------- */ | ||
129 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
130 | }; | ||
131 | static const u32 twobyte_has_modrm[256 / 32] = { | ||
132 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
133 | /* ----------------------------------------------- */ | ||
134 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ | ||
135 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ | ||
136 | W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ | ||
137 | W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ | ||
138 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ | ||
139 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ | ||
140 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ | ||
141 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ | ||
142 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ | ||
143 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ | ||
144 | W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ | ||
145 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ | ||
146 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ | ||
147 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ | ||
148 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ | ||
149 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ | ||
150 | /* ----------------------------------------------- */ | ||
151 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
152 | }; | ||
153 | #undef W | 112 | #undef W |
154 | 113 | ||
155 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | 114 | struct kretprobe_blackpoint kretprobe_blacklist[] = { |
@@ -244,6 +203,75 @@ retry: | |||
244 | } | 203 | } |
245 | } | 204 | } |
246 | 205 | ||
206 | /* Recover the probed instruction at addr for further analysis. */ | ||
207 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
208 | { | ||
209 | struct kprobe *kp; | ||
210 | kp = get_kprobe((void *)addr); | ||
211 | if (!kp) | ||
212 | return -EINVAL; | ||
213 | |||
214 | /* | ||
215 | * Basically, kp->ainsn.insn has an original instruction. | ||
216 | * However, RIP-relative instruction can not do single-stepping | ||
217 | * at different place, fix_riprel() tweaks the displacement of | ||
218 | * that instruction. In that case, we can't recover the instruction | ||
219 | * from the kp->ainsn.insn. | ||
220 | * | ||
221 | * On the other hand, kp->opcode has a copy of the first byte of | ||
222 | * the probed instruction, which is overwritten by int3. And | ||
223 | * the instruction at kp->addr is not modified by kprobes except | ||
224 | * for the first byte, we can recover the original instruction | ||
225 | * from it and kp->opcode. | ||
226 | */ | ||
227 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
228 | buf[0] = kp->opcode; | ||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | /* Dummy buffers for kallsyms_lookup */ | ||
233 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
234 | |||
235 | /* Check if paddr is at an instruction boundary */ | ||
236 | static int __kprobes can_probe(unsigned long paddr) | ||
237 | { | ||
238 | int ret; | ||
239 | unsigned long addr, offset = 0; | ||
240 | struct insn insn; | ||
241 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
242 | |||
243 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | ||
244 | return 0; | ||
245 | |||
246 | /* Decode instructions */ | ||
247 | addr = paddr - offset; | ||
248 | while (addr < paddr) { | ||
249 | kernel_insn_init(&insn, (void *)addr); | ||
250 | insn_get_opcode(&insn); | ||
251 | |||
252 | /* | ||
253 | * Check if the instruction has been modified by another | ||
254 | * kprobe, in which case we replace the breakpoint by the | ||
255 | * original instruction in our buffer. | ||
256 | */ | ||
257 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
258 | ret = recover_probed_instruction(buf, addr); | ||
259 | if (ret) | ||
260 | /* | ||
261 | * Another debugging subsystem might insert | ||
262 | * this breakpoint. In that case, we can't | ||
263 | * recover it. | ||
264 | */ | ||
265 | return 0; | ||
266 | kernel_insn_init(&insn, buf); | ||
267 | } | ||
268 | insn_get_length(&insn); | ||
269 | addr += insn.length; | ||
270 | } | ||
271 | |||
272 | return (addr == paddr); | ||
273 | } | ||
274 | |||
247 | /* | 275 | /* |
248 | * Returns non-zero if opcode modifies the interrupt flag. | 276 | * Returns non-zero if opcode modifies the interrupt flag. |
249 | */ | 277 | */ |
@@ -277,68 +305,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
277 | static void __kprobes fix_riprel(struct kprobe *p) | 305 | static void __kprobes fix_riprel(struct kprobe *p) |
278 | { | 306 | { |
279 | #ifdef CONFIG_X86_64 | 307 | #ifdef CONFIG_X86_64 |
280 | u8 *insn = p->ainsn.insn; | 308 | struct insn insn; |
281 | s64 disp; | 309 | kernel_insn_init(&insn, p->ainsn.insn); |
282 | int need_modrm; | ||
283 | |||
284 | /* Skip legacy instruction prefixes. */ | ||
285 | while (1) { | ||
286 | switch (*insn) { | ||
287 | case 0x66: | ||
288 | case 0x67: | ||
289 | case 0x2e: | ||
290 | case 0x3e: | ||
291 | case 0x26: | ||
292 | case 0x64: | ||
293 | case 0x65: | ||
294 | case 0x36: | ||
295 | case 0xf0: | ||
296 | case 0xf3: | ||
297 | case 0xf2: | ||
298 | ++insn; | ||
299 | continue; | ||
300 | } | ||
301 | break; | ||
302 | } | ||
303 | 310 | ||
304 | /* Skip REX instruction prefix. */ | 311 | if (insn_rip_relative(&insn)) { |
305 | if (is_REX_prefix(insn)) | 312 | s64 newdisp; |
306 | ++insn; | 313 | u8 *disp; |
307 | 314 | insn_get_displacement(&insn); | |
308 | if (*insn == 0x0f) { | 315 | /* |
309 | /* Two-byte opcode. */ | 316 | * The copied instruction uses the %rip-relative addressing |
310 | ++insn; | 317 | * mode. Adjust the displacement for the difference between |
311 | need_modrm = test_bit(*insn, | 318 | * the original location of this instruction and the location |
312 | (unsigned long *)twobyte_has_modrm); | 319 | * of the copy that will actually be run. The tricky bit here |
313 | } else | 320 | * is making sure that the sign extension happens correctly in |
314 | /* One-byte opcode. */ | 321 | * this calculation, since we need a signed 32-bit result to |
315 | need_modrm = test_bit(*insn, | 322 | * be sign-extended to 64 bits when it's added to the %rip |
316 | (unsigned long *)onebyte_has_modrm); | 323 | * value and yield the same 64-bit result that the sign- |
317 | 324 | * extension of the original signed 32-bit displacement would | |
318 | if (need_modrm) { | 325 | * have given. |
319 | u8 modrm = *++insn; | 326 | */ |
320 | if ((modrm & 0xc7) == 0x05) { | 327 | newdisp = (u8 *) p->addr + (s64) insn.displacement.value - |
321 | /* %rip+disp32 addressing mode */ | 328 | (u8 *) p->ainsn.insn; |
322 | /* Displacement follows ModRM byte. */ | 329 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
323 | ++insn; | 330 | disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); |
324 | /* | 331 | *(s32 *) disp = (s32) newdisp; |
325 | * The copied instruction uses the %rip-relative | ||
326 | * addressing mode. Adjust the displacement for the | ||
327 | * difference between the original location of this | ||
328 | * instruction and the location of the copy that will | ||
329 | * actually be run. The tricky bit here is making sure | ||
330 | * that the sign extension happens correctly in this | ||
331 | * calculation, since we need a signed 32-bit result to | ||
332 | * be sign-extended to 64 bits when it's added to the | ||
333 | * %rip value and yield the same 64-bit result that the | ||
334 | * sign-extension of the original signed 32-bit | ||
335 | * displacement would have given. | ||
336 | */ | ||
337 | disp = (u8 *) p->addr + *((s32 *) insn) - | ||
338 | (u8 *) p->ainsn.insn; | ||
339 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
340 | *(s32 *)insn = (s32) disp; | ||
341 | } | ||
342 | } | 332 | } |
343 | #endif | 333 | #endif |
344 | } | 334 | } |
@@ -359,6 +349,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
359 | 349 | ||
360 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 350 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
361 | { | 351 | { |
352 | if (!can_probe((unsigned long)p->addr)) | ||
353 | return -EILSEQ; | ||
362 | /* insn: must be on special executable page on x86. */ | 354 | /* insn: must be on special executable page on x86. */ |
363 | p->ainsn.insn = get_insn_slot(); | 355 | p->ainsn.insn = get_insn_slot(); |
364 | if (!p->ainsn.insn) | 356 | if (!p->ainsn.insn) |
@@ -472,17 +464,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
472 | { | 464 | { |
473 | switch (kcb->kprobe_status) { | 465 | switch (kcb->kprobe_status) { |
474 | case KPROBE_HIT_SSDONE: | 466 | case KPROBE_HIT_SSDONE: |
475 | #ifdef CONFIG_X86_64 | ||
476 | /* TODO: Provide re-entrancy from post_kprobes_handler() and | ||
477 | * avoid exception stack corruption while single-stepping on | ||
478 | * the instruction of the new probe. | ||
479 | */ | ||
480 | arch_disarm_kprobe(p); | ||
481 | regs->ip = (unsigned long)p->addr; | ||
482 | reset_current_kprobe(); | ||
483 | preempt_enable_no_resched(); | ||
484 | break; | ||
485 | #endif | ||
486 | case KPROBE_HIT_ACTIVE: | 467 | case KPROBE_HIT_ACTIVE: |
487 | save_previous_kprobe(kcb); | 468 | save_previous_kprobe(kcb); |
488 | set_current_kprobe(p, regs, kcb); | 469 | set_current_kprobe(p, regs, kcb); |
@@ -491,18 +472,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
491 | kcb->kprobe_status = KPROBE_REENTER; | 472 | kcb->kprobe_status = KPROBE_REENTER; |
492 | break; | 473 | break; |
493 | case KPROBE_HIT_SS: | 474 | case KPROBE_HIT_SS: |
494 | if (p == kprobe_running()) { | 475 | /* A probe has been hit in the codepath leading up to, or just |
495 | regs->flags &= ~X86_EFLAGS_TF; | 476 | * after, single-stepping of a probed instruction. This entire |
496 | regs->flags |= kcb->kprobe_saved_flags; | 477 | * codepath should strictly reside in .kprobes.text section. |
497 | return 0; | 478 | * Raise a BUG or we'll continue in an endless reentering loop |
498 | } else { | 479 | * and eventually a stack overflow. |
499 | /* A probe has been hit in the codepath leading up | 480 | */ |
500 | * to, or just after, single-stepping of a probed | 481 | printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", |
501 | * instruction. This entire codepath should strictly | 482 | p->addr); |
502 | * reside in .kprobes.text section. Raise a warning | 483 | dump_kprobe(p); |
503 | * to highlight this peculiar case. | 484 | BUG(); |
504 | */ | ||
505 | } | ||
506 | default: | 485 | default: |
507 | /* impossible cases */ | 486 | /* impossible cases */ |
508 | WARN_ON(1); | 487 | WARN_ON(1); |
@@ -967,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
967 | ret = NOTIFY_STOP; | 946 | ret = NOTIFY_STOP; |
968 | break; | 947 | break; |
969 | case DIE_DEBUG: | 948 | case DIE_DEBUG: |
970 | if (post_kprobe_handler(args->regs)) | 949 | if (post_kprobe_handler(args->regs)) { |
950 | /* | ||
951 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
952 | * denote completion of processing | ||
953 | */ | ||
954 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
971 | ret = NOTIFY_STOP; | 955 | ret = NOTIFY_STOP; |
956 | } | ||
972 | break; | 957 | break; |
973 | case DIE_GPF: | 958 | case DIE_GPF: |
974 | /* | 959 | /* |