aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/kprobes.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/kprobes.c')
-rw-r--r--arch/x86/kernel/kprobes.c243
1 files changed, 114 insertions, 129 deletions
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d2b000..3fe86d706a14 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -48,12 +48,15 @@
48#include <linux/preempt.h> 48#include <linux/preempt.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kdebug.h> 50#include <linux/kdebug.h>
51#include <linux/kallsyms.h>
51 52
52#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
53#include <asm/desc.h> 54#include <asm/desc.h>
54#include <asm/pgtable.h> 55#include <asm/pgtable.h>
55#include <asm/uaccess.h> 56#include <asm/uaccess.h>
56#include <asm/alternative.h> 57#include <asm/alternative.h>
58#include <asm/insn.h>
59#include <asm/debugreg.h>
57 60
58void jprobe_return_end(void); 61void jprobe_return_end(void);
59 62
@@ -106,50 +109,6 @@ static const u32 twobyte_is_boostable[256 / 32] = {
106 /* ----------------------------------------------- */ 109 /* ----------------------------------------------- */
107 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 110 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
108}; 111};
109static const u32 onebyte_has_modrm[256 / 32] = {
110 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
111 /* ----------------------------------------------- */
112 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
113 W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
114 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
115 W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
116 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
117 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
118 W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
119 W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
120 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
121 W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
122 W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
123 W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
124 W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
125 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
126 W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
127 W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
128 /* ----------------------------------------------- */
129 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
130};
131static const u32 twobyte_has_modrm[256 / 32] = {
132 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
133 /* ----------------------------------------------- */
134 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
135 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
136 W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
137 W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
138 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
139 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
140 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
141 W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
142 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
143 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
144 W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
145 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
146 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
147 W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
148 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
149 W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
150 /* ----------------------------------------------- */
151 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
152};
153#undef W 112#undef W
154 113
155struct kretprobe_blackpoint kretprobe_blacklist[] = { 114struct kretprobe_blackpoint kretprobe_blacklist[] = {
@@ -244,6 +203,75 @@ retry:
244 } 203 }
245} 204}
246 205
206/* Recover the probed instruction at addr for further analysis. */
207static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
208{
209 struct kprobe *kp;
210 kp = get_kprobe((void *)addr);
211 if (!kp)
212 return -EINVAL;
213
214 /*
215 * Basically, kp->ainsn.insn has an original instruction.
216 * However, RIP-relative instruction can not do single-stepping
217 * at different place, fix_riprel() tweaks the displacement of
218 * that instruction. In that case, we can't recover the instruction
219 * from the kp->ainsn.insn.
220 *
221 * On the other hand, kp->opcode has a copy of the first byte of
222 * the probed instruction, which is overwritten by int3. And
223 * the instruction at kp->addr is not modified by kprobes except
224 * for the first byte, we can recover the original instruction
225 * from it and kp->opcode.
226 */
227 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
228 buf[0] = kp->opcode;
229 return 0;
230}
231
232/* Dummy buffers for kallsyms_lookup */
233static char __dummy_buf[KSYM_NAME_LEN];
234
235/* Check if paddr is at an instruction boundary */
236static int __kprobes can_probe(unsigned long paddr)
237{
238 int ret;
239 unsigned long addr, offset = 0;
240 struct insn insn;
241 kprobe_opcode_t buf[MAX_INSN_SIZE];
242
243 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
244 return 0;
245
246 /* Decode instructions */
247 addr = paddr - offset;
248 while (addr < paddr) {
249 kernel_insn_init(&insn, (void *)addr);
250 insn_get_opcode(&insn);
251
252 /*
253 * Check if the instruction has been modified by another
254 * kprobe, in which case we replace the breakpoint by the
255 * original instruction in our buffer.
256 */
257 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
258 ret = recover_probed_instruction(buf, addr);
259 if (ret)
260 /*
261 * Another debugging subsystem might insert
262 * this breakpoint. In that case, we can't
263 * recover it.
264 */
265 return 0;
266 kernel_insn_init(&insn, buf);
267 }
268 insn_get_length(&insn);
269 addr += insn.length;
270 }
271
272 return (addr == paddr);
273}
274
247/* 275/*
248 * Returns non-zero if opcode modifies the interrupt flag. 276 * Returns non-zero if opcode modifies the interrupt flag.
249 */ 277 */
@@ -277,68 +305,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
277static void __kprobes fix_riprel(struct kprobe *p) 305static void __kprobes fix_riprel(struct kprobe *p)
278{ 306{
279#ifdef CONFIG_X86_64 307#ifdef CONFIG_X86_64
280 u8 *insn = p->ainsn.insn; 308 struct insn insn;
281 s64 disp; 309 kernel_insn_init(&insn, p->ainsn.insn);
282 int need_modrm;
283
284 /* Skip legacy instruction prefixes. */
285 while (1) {
286 switch (*insn) {
287 case 0x66:
288 case 0x67:
289 case 0x2e:
290 case 0x3e:
291 case 0x26:
292 case 0x64:
293 case 0x65:
294 case 0x36:
295 case 0xf0:
296 case 0xf3:
297 case 0xf2:
298 ++insn;
299 continue;
300 }
301 break;
302 }
303 310
304 /* Skip REX instruction prefix. */ 311 if (insn_rip_relative(&insn)) {
305 if (is_REX_prefix(insn)) 312 s64 newdisp;
306 ++insn; 313 u8 *disp;
307 314 insn_get_displacement(&insn);
308 if (*insn == 0x0f) { 315 /*
309 /* Two-byte opcode. */ 316 * The copied instruction uses the %rip-relative addressing
310 ++insn; 317 * mode. Adjust the displacement for the difference between
311 need_modrm = test_bit(*insn, 318 * the original location of this instruction and the location
312 (unsigned long *)twobyte_has_modrm); 319 * of the copy that will actually be run. The tricky bit here
313 } else 320 * is making sure that the sign extension happens correctly in
314 /* One-byte opcode. */ 321 * this calculation, since we need a signed 32-bit result to
315 need_modrm = test_bit(*insn, 322 * be sign-extended to 64 bits when it's added to the %rip
316 (unsigned long *)onebyte_has_modrm); 323 * value and yield the same 64-bit result that the sign-
317 324 * extension of the original signed 32-bit displacement would
318 if (need_modrm) { 325 * have given.
319 u8 modrm = *++insn; 326 */
320 if ((modrm & 0xc7) == 0x05) { 327 newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
321 /* %rip+disp32 addressing mode */ 328 (u8 *) p->ainsn.insn;
322 /* Displacement follows ModRM byte. */ 329 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
323 ++insn; 330 disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
324 /* 331 *(s32 *) disp = (s32) newdisp;
325 * The copied instruction uses the %rip-relative
326 * addressing mode. Adjust the displacement for the
327 * difference between the original location of this
328 * instruction and the location of the copy that will
329 * actually be run. The tricky bit here is making sure
330 * that the sign extension happens correctly in this
331 * calculation, since we need a signed 32-bit result to
332 * be sign-extended to 64 bits when it's added to the
333 * %rip value and yield the same 64-bit result that the
334 * sign-extension of the original signed 32-bit
335 * displacement would have given.
336 */
337 disp = (u8 *) p->addr + *((s32 *) insn) -
338 (u8 *) p->ainsn.insn;
339 BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
340 *(s32 *)insn = (s32) disp;
341 }
342 } 332 }
343#endif 333#endif
344} 334}
@@ -359,6 +349,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
359 349
360int __kprobes arch_prepare_kprobe(struct kprobe *p) 350int __kprobes arch_prepare_kprobe(struct kprobe *p)
361{ 351{
352 if (!can_probe((unsigned long)p->addr))
353 return -EILSEQ;
362 /* insn: must be on special executable page on x86. */ 354 /* insn: must be on special executable page on x86. */
363 p->ainsn.insn = get_insn_slot(); 355 p->ainsn.insn = get_insn_slot();
364 if (!p->ainsn.insn) 356 if (!p->ainsn.insn)
@@ -472,17 +464,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
472{ 464{
473 switch (kcb->kprobe_status) { 465 switch (kcb->kprobe_status) {
474 case KPROBE_HIT_SSDONE: 466 case KPROBE_HIT_SSDONE:
475#ifdef CONFIG_X86_64
476 /* TODO: Provide re-entrancy from post_kprobes_handler() and
477 * avoid exception stack corruption while single-stepping on
478 * the instruction of the new probe.
479 */
480 arch_disarm_kprobe(p);
481 regs->ip = (unsigned long)p->addr;
482 reset_current_kprobe();
483 preempt_enable_no_resched();
484 break;
485#endif
486 case KPROBE_HIT_ACTIVE: 467 case KPROBE_HIT_ACTIVE:
487 save_previous_kprobe(kcb); 468 save_previous_kprobe(kcb);
488 set_current_kprobe(p, regs, kcb); 469 set_current_kprobe(p, regs, kcb);
@@ -491,18 +472,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
491 kcb->kprobe_status = KPROBE_REENTER; 472 kcb->kprobe_status = KPROBE_REENTER;
492 break; 473 break;
493 case KPROBE_HIT_SS: 474 case KPROBE_HIT_SS:
494 if (p == kprobe_running()) { 475 /* A probe has been hit in the codepath leading up to, or just
495 regs->flags &= ~X86_EFLAGS_TF; 476 * after, single-stepping of a probed instruction. This entire
496 regs->flags |= kcb->kprobe_saved_flags; 477 * codepath should strictly reside in .kprobes.text section.
497 return 0; 478 * Raise a BUG or we'll continue in an endless reentering loop
498 } else { 479 * and eventually a stack overflow.
499 /* A probe has been hit in the codepath leading up 480 */
500 * to, or just after, single-stepping of a probed 481 printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
501 * instruction. This entire codepath should strictly 482 p->addr);
502 * reside in .kprobes.text section. Raise a warning 483 dump_kprobe(p);
503 * to highlight this peculiar case. 484 BUG();
504 */
505 }
506 default: 485 default:
507 /* impossible cases */ 486 /* impossible cases */
508 WARN_ON(1); 487 WARN_ON(1);
@@ -967,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
967 ret = NOTIFY_STOP; 946 ret = NOTIFY_STOP;
968 break; 947 break;
969 case DIE_DEBUG: 948 case DIE_DEBUG:
970 if (post_kprobe_handler(args->regs)) 949 if (post_kprobe_handler(args->regs)) {
950 /*
951 * Reset the BS bit in dr6 (pointed by args->err) to
952 * denote completion of processing
953 */
954 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
971 ret = NOTIFY_STOP; 955 ret = NOTIFY_STOP;
956 }
972 break; 957 break;
973 case DIE_GPF: 958 case DIE_GPF:
974 /* 959 /*