aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/uprobes.c176
1 files changed, 125 insertions, 51 deletions
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 31dcb4d5ea46..159ca520ef5b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -41,8 +41,11 @@
41/* Instruction will modify TF, don't change it */ 41/* Instruction will modify TF, don't change it */
42#define UPROBE_FIX_SETF 0x04 42#define UPROBE_FIX_SETF 0x04
43 43
44#define UPROBE_FIX_RIP_AX 0x08 44#define UPROBE_FIX_RIP_SI 0x08
45#define UPROBE_FIX_RIP_CX 0x10 45#define UPROBE_FIX_RIP_DI 0x10
46#define UPROBE_FIX_RIP_BX 0x20
47#define UPROBE_FIX_RIP_MASK \
48 (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX)
46 49
47#define UPROBE_TRAP_NR UINT_MAX 50#define UPROBE_TRAP_NR UINT_MAX
48 51
@@ -275,20 +278,109 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
275{ 278{
276 u8 *cursor; 279 u8 *cursor;
277 u8 reg; 280 u8 reg;
281 u8 reg2;
278 282
279 if (!insn_rip_relative(insn)) 283 if (!insn_rip_relative(insn))
280 return; 284 return;
281 285
282 /* 286 /*
283 * insn_rip_relative() would have decoded rex_prefix, modrm. 287 * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm.
284 * Clear REX.b bit (extension of MODRM.rm field): 288 * Clear REX.b bit (extension of MODRM.rm field):
285 * we want to encode rax/rcx, not r8/r9. 289 * we want to encode low numbered reg, not r8+.
286 */ 290 */
287 if (insn->rex_prefix.nbytes) { 291 if (insn->rex_prefix.nbytes) {
288 cursor = auprobe->insn + insn_offset_rex_prefix(insn); 292 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
289 *cursor &= 0xfe; /* Clearing REX.B bit */ 293 /* REX byte has 0100wrxb layout, clearing REX.b bit */
294 *cursor &= 0xfe;
290 } 295 }
296 /*
297 * Similar treatment for VEX3 prefix.
298 * TODO: add XOP/EVEX treatment when insn decoder supports them
299 */
300 if (insn->vex_prefix.nbytes == 3) {
301 /*
302 * vex2: c5 rvvvvLpp (has no b bit)
303 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
304 * evex: 62 rxbR00mm wvvvv1pp zllBVaaa
305 * (evex will need setting of both b and x since
306 * in non-sib encoding evex.x is 4th bit of MODRM.rm)
307 * Setting VEX3.b (setting because it has inverted meaning):
308 */
309 cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
310 *cursor |= 0x20;
311 }
312
313 /*
314 * Convert from rip-relative addressing to register-relative addressing
315 * via a scratch register.
316 *
317 * This is tricky since there are insns with modrm byte
318 * which also use registers not encoded in modrm byte:
319 * [i]div/[i]mul: implicitly use dx:ax
320 * shift ops: implicitly use cx
321 * cmpxchg: implicitly uses ax
322 * cmpxchg8/16b: implicitly uses dx:ax and bx:cx
323 * Encoding: 0f c7/1 modrm
324 * The code below thinks that reg=1 (cx), chooses si as scratch.
325 * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m.
326 * First appeared in Haswell (BMI2 insn). It is vex-encoded.
327 * Example where none of bx,cx,dx can be used as scratch reg:
328 * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx
329 * [v]pcmpistri: implicitly uses cx, xmm0
330 * [v]pcmpistrm: implicitly uses xmm0
331 * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0
332 * [v]pcmpestrm: implicitly uses ax, dx, xmm0
333 * Evil SSE4.2 string comparison ops from hell.
334 * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination.
335 * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm.
336 * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi).
337 * AMD says it has no 3-operand form (vex.vvvv must be 1111)
338 * and that it can have only register operands, not mem
339 * (its modrm byte must have mode=11).
340 * If these restrictions will ever be lifted,
341 * we'll need code to prevent selection of di as scratch reg!
342 *
343 * Summary: I don't know any insns with modrm byte which
344 * use SI register implicitly. DI register is used only
345 * by one insn (maskmovq) and BX register is used
346 * only by one too (cmpxchg8b).
347 * BP is stack-segment based (may be a problem?).
348 * AX, DX, CX are off-limits (many implicit users).
349 * SP is unusable (it's stack pointer - think about "pop mem";
350 * also, rsp+disp32 needs sib encoding -> insn length change).
351 */
291 352
353 reg = MODRM_REG(insn); /* Fetch modrm.reg */
354 reg2 = 0xff; /* Fetch vex.vvvv */
355 if (insn->vex_prefix.nbytes == 2)
356 reg2 = insn->vex_prefix.bytes[1];
357 else if (insn->vex_prefix.nbytes == 3)
358 reg2 = insn->vex_prefix.bytes[2];
359 /*
360 * TODO: add XOP, EXEV vvvv reading.
361 *
362 * vex.vvvv field is in bits 6-3, bits are inverted.
363 * But in 32-bit mode, high-order bit may be ignored.
364 * Therefore, let's consider only 3 low-order bits.
365 */
366 reg2 = ((reg2 >> 3) & 0x7) ^ 0x7;
367 /*
368 * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15.
369 *
370 * Choose scratch reg. Order is important: must not select bx
371 * if we can use si (cmpxchg8b case!)
372 */
373 if (reg != 6 && reg2 != 6) {
374 reg2 = 6;
375 auprobe->def.fixups |= UPROBE_FIX_RIP_SI;
376 } else if (reg != 7 && reg2 != 7) {
377 reg2 = 7;
378 auprobe->def.fixups |= UPROBE_FIX_RIP_DI;
379 /* TODO (paranoia): force maskmovq to not use di */
380 } else {
381 reg2 = 3;
382 auprobe->def.fixups |= UPROBE_FIX_RIP_BX;
383 }
292 /* 384 /*
293 * Point cursor at the modrm byte. The next 4 bytes are the 385 * Point cursor at the modrm byte. The next 4 bytes are the
294 * displacement. Beyond the displacement, for some instructions, 386 * displacement. Beyond the displacement, for some instructions,
@@ -296,41 +388,21 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
296 */ 388 */
297 cursor = auprobe->insn + insn_offset_modrm(insn); 389 cursor = auprobe->insn + insn_offset_modrm(insn);
298 /* 390 /*
299 * Convert from rip-relative addressing 391 * Change modrm from "00 reg 101" to "10 reg reg2". Example:
300 * to register-relative addressing via a scratch register. 392 * 89 05 disp32 mov %eax,disp32(%rip) becomes
393 * 89 86 disp32 mov %eax,disp32(%rsi)
301 */ 394 */
302 reg = MODRM_REG(insn); 395 *cursor = 0x80 | (reg << 3) | reg2;
303 if (reg == 0) {
304 /*
305 * The register operand (if any) is either the A register
306 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
307 * REX prefix) %r8. In any case, we know the C register
308 * is NOT the register operand, so we use %rcx (register
309 * #1) for the scratch register.
310 */
311 auprobe->def.fixups |= UPROBE_FIX_RIP_CX;
312 /*
313 * Change modrm from "00 000 101" to "10 000 001". Example:
314 * 89 05 disp32 mov %eax,disp32(%rip) becomes
315 * 89 81 disp32 mov %eax,disp32(%rcx)
316 */
317 *cursor = 0x81;
318 } else {
319 /* Use %rax (register #0) for the scratch register. */
320 auprobe->def.fixups |= UPROBE_FIX_RIP_AX;
321 /*
322 * Change modrm from "00 reg 101" to "10 reg 000". Example:
323 * 89 1d disp32 mov %edx,disp32(%rip) becomes
324 * 89 98 disp32 mov %edx,disp32(%rax)
325 */
326 *cursor = (reg << 3) | 0x80;
327 }
328} 396}
329 397
330static inline unsigned long * 398static inline unsigned long *
331scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) 399scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
332{ 400{
333 return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? &regs->ax : &regs->cx; 401 if (auprobe->def.fixups & UPROBE_FIX_RIP_SI)
402 return &regs->si;
403 if (auprobe->def.fixups & UPROBE_FIX_RIP_DI)
404 return &regs->di;
405 return &regs->bx;
334} 406}
335 407
336/* 408/*
@@ -339,7 +411,7 @@ scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
339 */ 411 */
340static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 412static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
341{ 413{
342 if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { 414 if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) {
343 struct uprobe_task *utask = current->utask; 415 struct uprobe_task *utask = current->utask;
344 unsigned long *sr = scratch_reg(auprobe, regs); 416 unsigned long *sr = scratch_reg(auprobe, regs);
345 417
@@ -350,7 +422,7 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
350 422
351static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 423static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
352{ 424{
353 if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { 425 if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) {
354 struct uprobe_task *utask = current->utask; 426 struct uprobe_task *utask = current->utask;
355 unsigned long *sr = scratch_reg(auprobe, regs); 427 unsigned long *sr = scratch_reg(auprobe, regs);
356 428
@@ -405,6 +477,23 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip)
405 return 0; 477 return 0;
406} 478}
407 479
480/*
481 * We have to fix things up as follows:
482 *
483 * Typically, the new ip is relative to the copied instruction. We need
484 * to make it relative to the original instruction (FIX_IP). Exceptions
485 * are return instructions and absolute or indirect jump or call instructions.
486 *
487 * If the single-stepped instruction was a call, the return address that
488 * is atop the stack is the address following the copied instruction. We
489 * need to make it the address following the original instruction (FIX_CALL).
490 *
491 * If the original instruction was a rip-relative instruction such as
492 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
493 * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)".
494 * We need to restore the contents of the scratch register
495 * (FIX_RIP_reg).
496 */
408static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 497static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
409{ 498{
410 struct uprobe_task *utask = current->utask; 499 struct uprobe_task *utask = current->utask;
@@ -711,21 +800,6 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
711 * single-step, we single-stepped a copy of the instruction. 800 * single-step, we single-stepped a copy of the instruction.
712 * 801 *
713 * This function prepares to resume execution after the single-step. 802 * This function prepares to resume execution after the single-step.
714 * We have to fix things up as follows:
715 *
716 * Typically, the new ip is relative to the copied instruction. We need
717 * to make it relative to the original instruction (FIX_IP). Exceptions
718 * are return instructions and absolute or indirect jump or call instructions.
719 *
720 * If the single-stepped instruction was a call, the return address that
721 * is atop the stack is the address following the copied instruction. We
722 * need to make it the address following the original instruction (FIX_CALL).
723 *
724 * If the original instruction was a rip-relative instruction such as
725 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
726 * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rax)".
727 * We need to restore the contents of the scratch register
728 * (FIX_RIP_AX or FIX_RIP_CX).
729 */ 803 */
730int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 804int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
731{ 805{