diff options
-rw-r--r-- | arch/x86/kernel/uprobes.c | 176 |
1 files changed, 125 insertions, 51 deletions
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 31dcb4d5ea46..159ca520ef5b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -41,8 +41,11 @@ | |||
41 | /* Instruction will modify TF, don't change it */ | 41 | /* Instruction will modify TF, don't change it */ |
42 | #define UPROBE_FIX_SETF 0x04 | 42 | #define UPROBE_FIX_SETF 0x04 |
43 | 43 | ||
44 | #define UPROBE_FIX_RIP_AX 0x08 | 44 | #define UPROBE_FIX_RIP_SI 0x08 |
45 | #define UPROBE_FIX_RIP_CX 0x10 | 45 | #define UPROBE_FIX_RIP_DI 0x10 |
46 | #define UPROBE_FIX_RIP_BX 0x20 | ||
47 | #define UPROBE_FIX_RIP_MASK \ | ||
48 | (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX) | ||
46 | 49 | ||
47 | #define UPROBE_TRAP_NR UINT_MAX | 50 | #define UPROBE_TRAP_NR UINT_MAX |
48 | 51 | ||
@@ -275,20 +278,109 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) | |||
275 | { | 278 | { |
276 | u8 *cursor; | 279 | u8 *cursor; |
277 | u8 reg; | 280 | u8 reg; |
281 | u8 reg2; | ||
278 | 282 | ||
279 | if (!insn_rip_relative(insn)) | 283 | if (!insn_rip_relative(insn)) |
280 | return; | 284 | return; |
281 | 285 | ||
282 | /* | 286 | /* |
283 | * insn_rip_relative() would have decoded rex_prefix, modrm. | 287 | * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. |
284 | * Clear REX.b bit (extension of MODRM.rm field): | 288 | * Clear REX.b bit (extension of MODRM.rm field): |
285 | * we want to encode rax/rcx, not r8/r9. | 289 | * we want to encode low numbered reg, not r8+. |
286 | */ | 290 | */ |
287 | if (insn->rex_prefix.nbytes) { | 291 | if (insn->rex_prefix.nbytes) { |
288 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); | 292 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); |
289 | *cursor &= 0xfe; /* Clearing REX.B bit */ | 293 | /* REX byte has 0100wrxb layout, clearing REX.b bit */ |
294 | *cursor &= 0xfe; | ||
290 | } | 295 | } |
296 | /* | ||
297 | * Similar treatment for VEX3 prefix. | ||
298 | * TODO: add XOP/EVEX treatment when insn decoder supports them | ||
299 | */ | ||
300 | if (insn->vex_prefix.nbytes == 3) { | ||
301 | /* | ||
302 | * vex2: c5 rvvvvLpp (has no b bit) | ||
303 | * vex3/xop: c4/8f rxbmmmmm wvvvvLpp | ||
304 | * evex: 62 rxbR00mm wvvvv1pp zllBVaaa | ||
305 | * (evex will need setting of both b and x since | ||
306 | * in non-sib encoding evex.x is 4th bit of MODRM.rm) | ||
307 | * Setting VEX3.b (setting because it has inverted meaning): | ||
308 | */ | ||
309 | cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; | ||
310 | *cursor |= 0x20; | ||
311 | } | ||
312 | |||
313 | /* | ||
314 | * Convert from rip-relative addressing to register-relative addressing | ||
315 | * via a scratch register. | ||
316 | * | ||
317 | * This is tricky since there are insns with modrm byte | ||
318 | * which also use registers not encoded in modrm byte: | ||
319 | * [i]div/[i]mul: implicitly use dx:ax | ||
320 | * shift ops: implicitly use cx | ||
321 | * cmpxchg: implicitly uses ax | ||
322 | * cmpxchg8/16b: implicitly uses dx:ax and bx:cx | ||
323 | * Encoding: 0f c7/1 modrm | ||
324 | * The code below thinks that reg=1 (cx), chooses si as scratch. | ||
325 | * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. | ||
326 | * First appeared in Haswell (BMI2 insn). It is vex-encoded. | ||
327 | * Example where none of bx,cx,dx can be used as scratch reg: | ||
328 | * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx | ||
329 | * [v]pcmpistri: implicitly uses cx, xmm0 | ||
330 | * [v]pcmpistrm: implicitly uses xmm0 | ||
331 | * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 | ||
332 | * [v]pcmpestrm: implicitly uses ax, dx, xmm0 | ||
333 | * Evil SSE4.2 string comparison ops from hell. | ||
334 | * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. | ||
335 | * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. | ||
336 | * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). | ||
337 | * AMD says it has no 3-operand form (vex.vvvv must be 1111) | ||
338 | * and that it can have only register operands, not mem | ||
339 | * (its modrm byte must have mode=11). | ||
340 | * If these restrictions will ever be lifted, | ||
341 | * we'll need code to prevent selection of di as scratch reg! | ||
342 | * | ||
343 | * Summary: I don't know any insns with modrm byte which | ||
344 | * use SI register implicitly. DI register is used only | ||
345 | * by one insn (maskmovq) and BX register is used | ||
346 | * only by one too (cmpxchg8b). | ||
347 | * BP is stack-segment based (may be a problem?). | ||
348 | * AX, DX, CX are off-limits (many implicit users). | ||
349 | * SP is unusable (it's stack pointer - think about "pop mem"; | ||
350 | * also, rsp+disp32 needs sib encoding -> insn length change). | ||
351 | */ | ||
291 | 352 | ||
353 | reg = MODRM_REG(insn); /* Fetch modrm.reg */ | ||
354 | reg2 = 0xff; /* Fetch vex.vvvv */ | ||
355 | if (insn->vex_prefix.nbytes == 2) | ||
356 | reg2 = insn->vex_prefix.bytes[1]; | ||
357 | else if (insn->vex_prefix.nbytes == 3) | ||
358 | reg2 = insn->vex_prefix.bytes[2]; | ||
359 | /* | ||
360 | * TODO: add XOP, EXEV vvvv reading. | ||
361 | * | ||
362 | * vex.vvvv field is in bits 6-3, bits are inverted. | ||
363 | * But in 32-bit mode, high-order bit may be ignored. | ||
364 | * Therefore, let's consider only 3 low-order bits. | ||
365 | */ | ||
366 | reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; | ||
367 | /* | ||
368 | * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. | ||
369 | * | ||
370 | * Choose scratch reg. Order is important: must not select bx | ||
371 | * if we can use si (cmpxchg8b case!) | ||
372 | */ | ||
373 | if (reg != 6 && reg2 != 6) { | ||
374 | reg2 = 6; | ||
375 | auprobe->def.fixups |= UPROBE_FIX_RIP_SI; | ||
376 | } else if (reg != 7 && reg2 != 7) { | ||
377 | reg2 = 7; | ||
378 | auprobe->def.fixups |= UPROBE_FIX_RIP_DI; | ||
379 | /* TODO (paranoia): force maskmovq to not use di */ | ||
380 | } else { | ||
381 | reg2 = 3; | ||
382 | auprobe->def.fixups |= UPROBE_FIX_RIP_BX; | ||
383 | } | ||
292 | /* | 384 | /* |
293 | * Point cursor at the modrm byte. The next 4 bytes are the | 385 | * Point cursor at the modrm byte. The next 4 bytes are the |
294 | * displacement. Beyond the displacement, for some instructions, | 386 | * displacement. Beyond the displacement, for some instructions, |
@@ -296,41 +388,21 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) | |||
296 | */ | 388 | */ |
297 | cursor = auprobe->insn + insn_offset_modrm(insn); | 389 | cursor = auprobe->insn + insn_offset_modrm(insn); |
298 | /* | 390 | /* |
299 | * Convert from rip-relative addressing | 391 | * Change modrm from "00 reg 101" to "10 reg reg2". Example: |
300 | * to register-relative addressing via a scratch register. | 392 | * 89 05 disp32 mov %eax,disp32(%rip) becomes |
393 | * 89 86 disp32 mov %eax,disp32(%rsi) | ||
301 | */ | 394 | */ |
302 | reg = MODRM_REG(insn); | 395 | *cursor = 0x80 | (reg << 3) | reg2; |
303 | if (reg == 0) { | ||
304 | /* | ||
305 | * The register operand (if any) is either the A register | ||
306 | * (%rax, %eax, etc.) or (if the 0x4 bit is set in the | ||
307 | * REX prefix) %r8. In any case, we know the C register | ||
308 | * is NOT the register operand, so we use %rcx (register | ||
309 | * #1) for the scratch register. | ||
310 | */ | ||
311 | auprobe->def.fixups |= UPROBE_FIX_RIP_CX; | ||
312 | /* | ||
313 | * Change modrm from "00 000 101" to "10 000 001". Example: | ||
314 | * 89 05 disp32 mov %eax,disp32(%rip) becomes | ||
315 | * 89 81 disp32 mov %eax,disp32(%rcx) | ||
316 | */ | ||
317 | *cursor = 0x81; | ||
318 | } else { | ||
319 | /* Use %rax (register #0) for the scratch register. */ | ||
320 | auprobe->def.fixups |= UPROBE_FIX_RIP_AX; | ||
321 | /* | ||
322 | * Change modrm from "00 reg 101" to "10 reg 000". Example: | ||
323 | * 89 1d disp32 mov %edx,disp32(%rip) becomes | ||
324 | * 89 98 disp32 mov %edx,disp32(%rax) | ||
325 | */ | ||
326 | *cursor = (reg << 3) | 0x80; | ||
327 | } | ||
328 | } | 396 | } |
329 | 397 | ||
330 | static inline unsigned long * | 398 | static inline unsigned long * |
331 | scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) | 399 | scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) |
332 | { | 400 | { |
333 | return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? ®s->ax : ®s->cx; | 401 | if (auprobe->def.fixups & UPROBE_FIX_RIP_SI) |
402 | return ®s->si; | ||
403 | if (auprobe->def.fixups & UPROBE_FIX_RIP_DI) | ||
404 | return ®s->di; | ||
405 | return ®s->bx; | ||
334 | } | 406 | } |
335 | 407 | ||
336 | /* | 408 | /* |
@@ -339,7 +411,7 @@ scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
339 | */ | 411 | */ |
340 | static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 412 | static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
341 | { | 413 | { |
342 | if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { | 414 | if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { |
343 | struct uprobe_task *utask = current->utask; | 415 | struct uprobe_task *utask = current->utask; |
344 | unsigned long *sr = scratch_reg(auprobe, regs); | 416 | unsigned long *sr = scratch_reg(auprobe, regs); |
345 | 417 | ||
@@ -350,7 +422,7 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
350 | 422 | ||
351 | static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 423 | static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
352 | { | 424 | { |
353 | if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { | 425 | if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { |
354 | struct uprobe_task *utask = current->utask; | 426 | struct uprobe_task *utask = current->utask; |
355 | unsigned long *sr = scratch_reg(auprobe, regs); | 427 | unsigned long *sr = scratch_reg(auprobe, regs); |
356 | 428 | ||
@@ -405,6 +477,23 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip) | |||
405 | return 0; | 477 | return 0; |
406 | } | 478 | } |
407 | 479 | ||
480 | /* | ||
481 | * We have to fix things up as follows: | ||
482 | * | ||
483 | * Typically, the new ip is relative to the copied instruction. We need | ||
484 | * to make it relative to the original instruction (FIX_IP). Exceptions | ||
485 | * are return instructions and absolute or indirect jump or call instructions. | ||
486 | * | ||
487 | * If the single-stepped instruction was a call, the return address that | ||
488 | * is atop the stack is the address following the copied instruction. We | ||
489 | * need to make it the address following the original instruction (FIX_CALL). | ||
490 | * | ||
491 | * If the original instruction was a rip-relative instruction such as | ||
492 | * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent | ||
493 | * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". | ||
494 | * We need to restore the contents of the scratch register | ||
495 | * (FIX_RIP_reg). | ||
496 | */ | ||
408 | static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | 497 | static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
409 | { | 498 | { |
410 | struct uprobe_task *utask = current->utask; | 499 | struct uprobe_task *utask = current->utask; |
@@ -711,21 +800,6 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) | |||
711 | * single-step, we single-stepped a copy of the instruction. | 800 | * single-step, we single-stepped a copy of the instruction. |
712 | * | 801 | * |
713 | * This function prepares to resume execution after the single-step. | 802 | * This function prepares to resume execution after the single-step. |
714 | * We have to fix things up as follows: | ||
715 | * | ||
716 | * Typically, the new ip is relative to the copied instruction. We need | ||
717 | * to make it relative to the original instruction (FIX_IP). Exceptions | ||
718 | * are return instructions and absolute or indirect jump or call instructions. | ||
719 | * | ||
720 | * If the single-stepped instruction was a call, the return address that | ||
721 | * is atop the stack is the address following the copied instruction. We | ||
722 | * need to make it the address following the original instruction (FIX_CALL). | ||
723 | * | ||
724 | * If the original instruction was a rip-relative instruction such as | ||
725 | * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent | ||
726 | * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rax)". | ||
727 | * We need to restore the contents of the scratch register | ||
728 | * (FIX_RIP_AX or FIX_RIP_CX). | ||
729 | */ | 803 | */ |
730 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 804 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
731 | { | 805 | { |