diff options
author | Ingo Molnar <mingo@kernel.org> | 2014-05-22 05:39:08 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-05-22 05:39:08 -0400 |
commit | a03b1e1c372b60183b8141cdd161316429fab5ac (patch) | |
tree | d0db097668940dce698fec8b00d228cd29c1dee0 /arch | |
parent | 8e02ae573e6ae86930d08662790827a938203e70 (diff) | |
parent | b02ef20a9fba08948e643d3eec0efadf1da01a44 (diff) |
Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/uprobes
Pull uprobes fixes and changes from Oleg Nesterov:
" Denys found another nasty old bug in uprobes/x86: div, mul, shifts with
count in CL, and cmpxchg are not handled correctly.
Plus a couple of other minor fixes. Nobody acked the changes in x86/traps,
hopefully they are simple enough, and I believe that they make sense anyway
and allow to do more cleanups."
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/traps.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/uprobes.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 110 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 213 |
4 files changed, 200 insertions, 127 deletions
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 58d66fe06b61..a7b212db9e04 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -98,7 +98,6 @@ static inline int get_si_code(unsigned long condition) | |||
98 | 98 | ||
99 | extern int panic_on_unrecovered_nmi; | 99 | extern int panic_on_unrecovered_nmi; |
100 | 100 | ||
101 | void math_error(struct pt_regs *, int, int); | ||
102 | void math_emulate(struct math_emu_info *); | 101 | void math_emulate(struct math_emu_info *); |
103 | #ifndef CONFIG_X86_32 | 102 | #ifndef CONFIG_X86_32 |
104 | asmlinkage void smp_thermal_interrupt(void); | 103 | asmlinkage void smp_thermal_interrupt(void); |
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index a040d493a4f9..7be3c079e389 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -50,9 +50,6 @@ struct arch_uprobe { | |||
50 | u8 opc1; | 50 | u8 opc1; |
51 | } branch; | 51 | } branch; |
52 | struct { | 52 | struct { |
53 | #ifdef CONFIG_X86_64 | ||
54 | long riprel_target; | ||
55 | #endif | ||
56 | u8 fixups; | 53 | u8 fixups; |
57 | u8 ilen; | 54 | u8 ilen; |
58 | } def; | 55 | } def; |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 57409f6b8c62..3fdb20548c4b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
26 | #include <linux/uprobes.h> | ||
26 | #include <linux/string.h> | 27 | #include <linux/string.h> |
27 | #include <linux/delay.h> | 28 | #include <linux/delay.h> |
28 | #include <linux/errno.h> | 29 | #include <linux/errno.h> |
@@ -136,6 +137,37 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, | |||
136 | return -1; | 137 | return -1; |
137 | } | 138 | } |
138 | 139 | ||
140 | static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, | ||
141 | siginfo_t *info) | ||
142 | { | ||
143 | unsigned long siaddr; | ||
144 | int sicode; | ||
145 | |||
146 | switch (trapnr) { | ||
147 | default: | ||
148 | return SEND_SIG_PRIV; | ||
149 | |||
150 | case X86_TRAP_DE: | ||
151 | sicode = FPE_INTDIV; | ||
152 | siaddr = uprobe_get_trap_addr(regs); | ||
153 | break; | ||
154 | case X86_TRAP_UD: | ||
155 | sicode = ILL_ILLOPN; | ||
156 | siaddr = uprobe_get_trap_addr(regs); | ||
157 | break; | ||
158 | case X86_TRAP_AC: | ||
159 | sicode = BUS_ADRALN; | ||
160 | siaddr = 0; | ||
161 | break; | ||
162 | } | ||
163 | |||
164 | info->si_signo = signr; | ||
165 | info->si_errno = 0; | ||
166 | info->si_code = sicode; | ||
167 | info->si_addr = (void __user *)siaddr; | ||
168 | return info; | ||
169 | } | ||
170 | |||
139 | static void __kprobes | 171 | static void __kprobes |
140 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | 172 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
141 | long error_code, siginfo_t *info) | 173 | long error_code, siginfo_t *info) |
@@ -168,60 +200,42 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | |||
168 | } | 200 | } |
169 | #endif | 201 | #endif |
170 | 202 | ||
171 | if (info) | 203 | force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); |
172 | force_sig_info(signr, info, tsk); | ||
173 | else | ||
174 | force_sig(signr, tsk); | ||
175 | } | 204 | } |
176 | 205 | ||
177 | #define DO_ERROR(trapnr, signr, str, name) \ | 206 | static void do_error_trap(struct pt_regs *regs, long error_code, char *str, |
178 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | 207 | unsigned long trapnr, int signr) |
179 | { \ | 208 | { |
180 | enum ctx_state prev_state; \ | 209 | enum ctx_state prev_state = exception_enter(); |
181 | \ | 210 | siginfo_t info; |
182 | prev_state = exception_enter(); \ | 211 | |
183 | if (notify_die(DIE_TRAP, str, regs, error_code, \ | 212 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != |
184 | trapnr, signr) == NOTIFY_STOP) { \ | 213 | NOTIFY_STOP) { |
185 | exception_exit(prev_state); \ | 214 | conditional_sti(regs); |
186 | return; \ | 215 | do_trap(trapnr, signr, str, regs, error_code, |
187 | } \ | 216 | fill_trap_info(regs, signr, trapnr, &info)); |
188 | conditional_sti(regs); \ | 217 | } |
189 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | 218 | |
190 | exception_exit(prev_state); \ | 219 | exception_exit(prev_state); |
191 | } | 220 | } |
192 | 221 | ||
193 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 222 | #define DO_ERROR(trapnr, signr, str, name) \ |
194 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | 223 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
195 | { \ | 224 | { \ |
196 | siginfo_t info; \ | 225 | do_error_trap(regs, error_code, str, trapnr, signr); \ |
197 | enum ctx_state prev_state; \ | ||
198 | \ | ||
199 | info.si_signo = signr; \ | ||
200 | info.si_errno = 0; \ | ||
201 | info.si_code = sicode; \ | ||
202 | info.si_addr = (void __user *)siaddr; \ | ||
203 | prev_state = exception_enter(); \ | ||
204 | if (notify_die(DIE_TRAP, str, regs, error_code, \ | ||
205 | trapnr, signr) == NOTIFY_STOP) { \ | ||
206 | exception_exit(prev_state); \ | ||
207 | return; \ | ||
208 | } \ | ||
209 | conditional_sti(regs); \ | ||
210 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | ||
211 | exception_exit(prev_state); \ | ||
212 | } | 226 | } |
213 | 227 | ||
214 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) | 228 | DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) |
215 | DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) | 229 | DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) |
216 | DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) | 230 | DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) |
217 | DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) | 231 | DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) |
218 | DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) | 232 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) |
219 | DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) | 233 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) |
220 | DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) | 234 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) |
221 | #ifdef CONFIG_X86_32 | 235 | #ifdef CONFIG_X86_32 |
222 | DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) | 236 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) |
223 | #endif | 237 | #endif |
224 | DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) | 238 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) |
225 | 239 | ||
226 | #ifdef CONFIG_X86_64 | 240 | #ifdef CONFIG_X86_64 |
227 | /* Runs on IST stack */ | 241 | /* Runs on IST stack */ |
@@ -305,7 +319,7 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
305 | pr_cont("\n"); | 319 | pr_cont("\n"); |
306 | } | 320 | } |
307 | 321 | ||
308 | force_sig(SIGSEGV, tsk); | 322 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); |
309 | exit: | 323 | exit: |
310 | exception_exit(prev_state); | 324 | exception_exit(prev_state); |
311 | } | 325 | } |
@@ -488,7 +502,7 @@ exit: | |||
488 | * the correct behaviour even in the presence of the asynchronous | 502 | * the correct behaviour even in the presence of the asynchronous |
489 | * IRQ13 behaviour | 503 | * IRQ13 behaviour |
490 | */ | 504 | */ |
491 | void math_error(struct pt_regs *regs, int error_code, int trapnr) | 505 | static void math_error(struct pt_regs *regs, int error_code, int trapnr) |
492 | { | 506 | { |
493 | struct task_struct *task = current; | 507 | struct task_struct *task = current; |
494 | siginfo_t info; | 508 | siginfo_t info; |
@@ -518,7 +532,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
518 | task->thread.error_code = error_code; | 532 | task->thread.error_code = error_code; |
519 | info.si_signo = SIGFPE; | 533 | info.si_signo = SIGFPE; |
520 | info.si_errno = 0; | 534 | info.si_errno = 0; |
521 | info.si_addr = (void __user *)regs->ip; | 535 | info.si_addr = (void __user *)uprobe_get_trap_addr(regs); |
522 | if (trapnr == X86_TRAP_MF) { | 536 | if (trapnr == X86_TRAP_MF) { |
523 | unsigned short cwd, swd; | 537 | unsigned short cwd, swd; |
524 | /* | 538 | /* |
@@ -645,7 +659,7 @@ void math_state_restore(void) | |||
645 | */ | 659 | */ |
646 | if (unlikely(restore_fpu_checking(tsk))) { | 660 | if (unlikely(restore_fpu_checking(tsk))) { |
647 | drop_init_fpu(tsk); | 661 | drop_init_fpu(tsk); |
648 | force_sig(SIGSEGV, tsk); | 662 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); |
649 | return; | 663 | return; |
650 | } | 664 | } |
651 | 665 | ||
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 2ebadb252093..159ca520ef5b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -41,8 +41,11 @@ | |||
41 | /* Instruction will modify TF, don't change it */ | 41 | /* Instruction will modify TF, don't change it */ |
42 | #define UPROBE_FIX_SETF 0x04 | 42 | #define UPROBE_FIX_SETF 0x04 |
43 | 43 | ||
44 | #define UPROBE_FIX_RIP_AX 0x08 | 44 | #define UPROBE_FIX_RIP_SI 0x08 |
45 | #define UPROBE_FIX_RIP_CX 0x10 | 45 | #define UPROBE_FIX_RIP_DI 0x10 |
46 | #define UPROBE_FIX_RIP_BX 0x20 | ||
47 | #define UPROBE_FIX_RIP_MASK \ | ||
48 | (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX) | ||
46 | 49 | ||
47 | #define UPROBE_TRAP_NR UINT_MAX | 50 | #define UPROBE_TRAP_NR UINT_MAX |
48 | 51 | ||
@@ -251,9 +254,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm) | |||
251 | * If arch_uprobe->insn doesn't use rip-relative addressing, return | 254 | * If arch_uprobe->insn doesn't use rip-relative addressing, return |
252 | * immediately. Otherwise, rewrite the instruction so that it accesses | 255 | * immediately. Otherwise, rewrite the instruction so that it accesses |
253 | * its memory operand indirectly through a scratch register. Set | 256 | * its memory operand indirectly through a scratch register. Set |
254 | * def->fixups and def->riprel_target accordingly. (The contents of the | 257 | * def->fixups accordingly. (The contents of the scratch register |
255 | * scratch register will be saved before we single-step the modified | 258 | * will be saved before we single-step the modified instruction, |
256 | * instruction, and restored afterward). | 259 | * and restored afterward). |
257 | * | 260 | * |
258 | * We do this because a rip-relative instruction can access only a | 261 | * We do this because a rip-relative instruction can access only a |
259 | * relatively small area (+/- 2 GB from the instruction), and the XOL | 262 | * relatively small area (+/- 2 GB from the instruction), and the XOL |
@@ -264,28 +267,120 @@ static inline bool is_64bit_mm(struct mm_struct *mm) | |||
264 | * | 267 | * |
265 | * Some useful facts about rip-relative instructions: | 268 | * Some useful facts about rip-relative instructions: |
266 | * | 269 | * |
267 | * - There's always a modrm byte. | 270 | * - There's always a modrm byte with bit layout "00 reg 101". |
268 | * - There's never a SIB byte. | 271 | * - There's never a SIB byte. |
269 | * - The displacement is always 4 bytes. | 272 | * - The displacement is always 4 bytes. |
273 | * - REX.B=1 bit in REX prefix, which normally extends r/m field, | ||
274 | * has no effect on rip-relative mode. It doesn't make modrm byte | ||
275 | * with r/m=101 refer to register 1101 = R13. | ||
270 | */ | 276 | */ |
271 | static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) | 277 | static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) |
272 | { | 278 | { |
273 | u8 *cursor; | 279 | u8 *cursor; |
274 | u8 reg; | 280 | u8 reg; |
281 | u8 reg2; | ||
275 | 282 | ||
276 | if (!insn_rip_relative(insn)) | 283 | if (!insn_rip_relative(insn)) |
277 | return; | 284 | return; |
278 | 285 | ||
279 | /* | 286 | /* |
280 | * insn_rip_relative() would have decoded rex_prefix, modrm. | 287 | * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. |
281 | * Clear REX.b bit (extension of MODRM.rm field): | 288 | * Clear REX.b bit (extension of MODRM.rm field): |
282 | * we want to encode rax/rcx, not r8/r9. | 289 | * we want to encode low numbered reg, not r8+. |
283 | */ | 290 | */ |
284 | if (insn->rex_prefix.nbytes) { | 291 | if (insn->rex_prefix.nbytes) { |
285 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); | 292 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); |
286 | *cursor &= 0xfe; /* Clearing REX.B bit */ | 293 | /* REX byte has 0100wrxb layout, clearing REX.b bit */ |
294 | *cursor &= 0xfe; | ||
287 | } | 295 | } |
296 | /* | ||
297 | * Similar treatment for VEX3 prefix. | ||
298 | * TODO: add XOP/EVEX treatment when insn decoder supports them | ||
299 | */ | ||
300 | if (insn->vex_prefix.nbytes == 3) { | ||
301 | /* | ||
302 | * vex2: c5 rvvvvLpp (has no b bit) | ||
303 | * vex3/xop: c4/8f rxbmmmmm wvvvvLpp | ||
304 | * evex: 62 rxbR00mm wvvvv1pp zllBVaaa | ||
305 | * (evex will need setting of both b and x since | ||
306 | * in non-sib encoding evex.x is 4th bit of MODRM.rm) | ||
307 | * Setting VEX3.b (setting because it has inverted meaning): | ||
308 | */ | ||
309 | cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; | ||
310 | *cursor |= 0x20; | ||
311 | } | ||
312 | |||
313 | /* | ||
314 | * Convert from rip-relative addressing to register-relative addressing | ||
315 | * via a scratch register. | ||
316 | * | ||
317 | * This is tricky since there are insns with modrm byte | ||
318 | * which also use registers not encoded in modrm byte: | ||
319 | * [i]div/[i]mul: implicitly use dx:ax | ||
320 | * shift ops: implicitly use cx | ||
321 | * cmpxchg: implicitly uses ax | ||
322 | * cmpxchg8/16b: implicitly uses dx:ax and bx:cx | ||
323 | * Encoding: 0f c7/1 modrm | ||
324 | * The code below thinks that reg=1 (cx), chooses si as scratch. | ||
325 | * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. | ||
326 | * First appeared in Haswell (BMI2 insn). It is vex-encoded. | ||
327 | * Example where none of bx,cx,dx can be used as scratch reg: | ||
328 | * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx | ||
329 | * [v]pcmpistri: implicitly uses cx, xmm0 | ||
330 | * [v]pcmpistrm: implicitly uses xmm0 | ||
331 | * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 | ||
332 | * [v]pcmpestrm: implicitly uses ax, dx, xmm0 | ||
333 | * Evil SSE4.2 string comparison ops from hell. | ||
334 | * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. | ||
335 | * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. | ||
336 | * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). | ||
337 | * AMD says it has no 3-operand form (vex.vvvv must be 1111) | ||
338 | * and that it can have only register operands, not mem | ||
339 | * (its modrm byte must have mode=11). | ||
340 | * If these restrictions will ever be lifted, | ||
341 | * we'll need code to prevent selection of di as scratch reg! | ||
342 | * | ||
343 | * Summary: I don't know any insns with modrm byte which | ||
344 | * use SI register implicitly. DI register is used only | ||
345 | * by one insn (maskmovq) and BX register is used | ||
346 | * only by one too (cmpxchg8b). | ||
347 | * BP is stack-segment based (may be a problem?). | ||
348 | * AX, DX, CX are off-limits (many implicit users). | ||
349 | * SP is unusable (it's stack pointer - think about "pop mem"; | ||
350 | * also, rsp+disp32 needs sib encoding -> insn length change). | ||
351 | */ | ||
288 | 352 | ||
353 | reg = MODRM_REG(insn); /* Fetch modrm.reg */ | ||
354 | reg2 = 0xff; /* Fetch vex.vvvv */ | ||
355 | if (insn->vex_prefix.nbytes == 2) | ||
356 | reg2 = insn->vex_prefix.bytes[1]; | ||
357 | else if (insn->vex_prefix.nbytes == 3) | ||
358 | reg2 = insn->vex_prefix.bytes[2]; | ||
359 | /* | ||
360 | * TODO: add XOP, EXEV vvvv reading. | ||
361 | * | ||
362 | * vex.vvvv field is in bits 6-3, bits are inverted. | ||
363 | * But in 32-bit mode, high-order bit may be ignored. | ||
364 | * Therefore, let's consider only 3 low-order bits. | ||
365 | */ | ||
366 | reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; | ||
367 | /* | ||
368 | * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. | ||
369 | * | ||
370 | * Choose scratch reg. Order is important: must not select bx | ||
371 | * if we can use si (cmpxchg8b case!) | ||
372 | */ | ||
373 | if (reg != 6 && reg2 != 6) { | ||
374 | reg2 = 6; | ||
375 | auprobe->def.fixups |= UPROBE_FIX_RIP_SI; | ||
376 | } else if (reg != 7 && reg2 != 7) { | ||
377 | reg2 = 7; | ||
378 | auprobe->def.fixups |= UPROBE_FIX_RIP_DI; | ||
379 | /* TODO (paranoia): force maskmovq to not use di */ | ||
380 | } else { | ||
381 | reg2 = 3; | ||
382 | auprobe->def.fixups |= UPROBE_FIX_RIP_BX; | ||
383 | } | ||
289 | /* | 384 | /* |
290 | * Point cursor at the modrm byte. The next 4 bytes are the | 385 | * Point cursor at the modrm byte. The next 4 bytes are the |
291 | * displacement. Beyond the displacement, for some instructions, | 386 | * displacement. Beyond the displacement, for some instructions, |
@@ -293,43 +388,21 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) | |||
293 | */ | 388 | */ |
294 | cursor = auprobe->insn + insn_offset_modrm(insn); | 389 | cursor = auprobe->insn + insn_offset_modrm(insn); |
295 | /* | 390 | /* |
296 | * Convert from rip-relative addressing to indirect addressing | 391 | * Change modrm from "00 reg 101" to "10 reg reg2". Example: |
297 | * via a scratch register. Change the r/m field from 0x5 (%rip) | 392 | * 89 05 disp32 mov %eax,disp32(%rip) becomes |
298 | * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. | 393 | * 89 86 disp32 mov %eax,disp32(%rsi) |
299 | */ | 394 | */ |
300 | reg = MODRM_REG(insn); | 395 | *cursor = 0x80 | (reg << 3) | reg2; |
301 | if (reg == 0) { | ||
302 | /* | ||
303 | * The register operand (if any) is either the A register | ||
304 | * (%rax, %eax, etc.) or (if the 0x4 bit is set in the | ||
305 | * REX prefix) %r8. In any case, we know the C register | ||
306 | * is NOT the register operand, so we use %rcx (register | ||
307 | * #1) for the scratch register. | ||
308 | */ | ||
309 | auprobe->def.fixups |= UPROBE_FIX_RIP_CX; | ||
310 | /* Change modrm from 00 000 101 to 00 000 001. */ | ||
311 | *cursor = 0x1; | ||
312 | } else { | ||
313 | /* Use %rax (register #0) for the scratch register. */ | ||
314 | auprobe->def.fixups |= UPROBE_FIX_RIP_AX; | ||
315 | /* Change modrm from 00 xxx 101 to 00 xxx 000 */ | ||
316 | *cursor = (reg << 3); | ||
317 | } | ||
318 | |||
319 | /* Target address = address of next instruction + (signed) offset */ | ||
320 | auprobe->def.riprel_target = (long)insn->length + insn->displacement.value; | ||
321 | |||
322 | /* Displacement field is gone; slide immediate field (if any) over. */ | ||
323 | if (insn->immediate.nbytes) { | ||
324 | cursor++; | ||
325 | memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); | ||
326 | } | ||
327 | } | 396 | } |
328 | 397 | ||
329 | static inline unsigned long * | 398 | static inline unsigned long * |
330 | scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) | 399 | scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) |
331 | { | 400 | { |
332 | return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? ®s->ax : ®s->cx; | 401 | if (auprobe->def.fixups & UPROBE_FIX_RIP_SI) |
402 | return ®s->si; | ||
403 | if (auprobe->def.fixups & UPROBE_FIX_RIP_DI) | ||
404 | return ®s->di; | ||
405 | return ®s->bx; | ||
333 | } | 406 | } |
334 | 407 | ||
335 | /* | 408 | /* |
@@ -338,31 +411,22 @@ scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
338 | */ | 411 | */ |
339 | static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 412 | static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
340 | { | 413 | { |
341 | if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { | 414 | if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { |
342 | struct uprobe_task *utask = current->utask; | 415 | struct uprobe_task *utask = current->utask; |
343 | unsigned long *sr = scratch_reg(auprobe, regs); | 416 | unsigned long *sr = scratch_reg(auprobe, regs); |
344 | 417 | ||
345 | utask->autask.saved_scratch_register = *sr; | 418 | utask->autask.saved_scratch_register = *sr; |
346 | *sr = utask->vaddr + auprobe->def.riprel_target; | 419 | *sr = utask->vaddr + auprobe->def.ilen; |
347 | } | 420 | } |
348 | } | 421 | } |
349 | 422 | ||
350 | static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, | 423 | static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
351 | long *correction) | ||
352 | { | 424 | { |
353 | if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { | 425 | if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { |
354 | struct uprobe_task *utask = current->utask; | 426 | struct uprobe_task *utask = current->utask; |
355 | unsigned long *sr = scratch_reg(auprobe, regs); | 427 | unsigned long *sr = scratch_reg(auprobe, regs); |
356 | 428 | ||
357 | *sr = utask->autask.saved_scratch_register; | 429 | *sr = utask->autask.saved_scratch_register; |
358 | /* | ||
359 | * The original instruction includes a displacement, and so | ||
360 | * is 4 bytes longer than what we've just single-stepped. | ||
361 | * Caller may need to apply other fixups to handle stuff | ||
362 | * like "jmpq *...(%rip)" and "callq *...(%rip)". | ||
363 | */ | ||
364 | if (correction) | ||
365 | *correction += 4; | ||
366 | } | 430 | } |
367 | } | 431 | } |
368 | #else /* 32-bit: */ | 432 | #else /* 32-bit: */ |
@@ -379,8 +443,7 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) | |||
379 | static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 443 | static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
380 | { | 444 | { |
381 | } | 445 | } |
382 | static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, | 446 | static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
383 | long *correction) | ||
384 | { | 447 | { |
385 | } | 448 | } |
386 | #endif /* CONFIG_X86_64 */ | 449 | #endif /* CONFIG_X86_64 */ |
@@ -414,13 +477,30 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip) | |||
414 | return 0; | 477 | return 0; |
415 | } | 478 | } |
416 | 479 | ||
480 | /* | ||
481 | * We have to fix things up as follows: | ||
482 | * | ||
483 | * Typically, the new ip is relative to the copied instruction. We need | ||
484 | * to make it relative to the original instruction (FIX_IP). Exceptions | ||
485 | * are return instructions and absolute or indirect jump or call instructions. | ||
486 | * | ||
487 | * If the single-stepped instruction was a call, the return address that | ||
488 | * is atop the stack is the address following the copied instruction. We | ||
489 | * need to make it the address following the original instruction (FIX_CALL). | ||
490 | * | ||
491 | * If the original instruction was a rip-relative instruction such as | ||
492 | * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent | ||
493 | * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". | ||
494 | * We need to restore the contents of the scratch register | ||
495 | * (FIX_RIP_reg). | ||
496 | */ | ||
417 | static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | 497 | static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
418 | { | 498 | { |
419 | struct uprobe_task *utask = current->utask; | 499 | struct uprobe_task *utask = current->utask; |
420 | long correction = (long)(utask->vaddr - utask->xol_vaddr); | ||
421 | 500 | ||
422 | riprel_post_xol(auprobe, regs, &correction); | 501 | riprel_post_xol(auprobe, regs); |
423 | if (auprobe->def.fixups & UPROBE_FIX_IP) { | 502 | if (auprobe->def.fixups & UPROBE_FIX_IP) { |
503 | long correction = utask->vaddr - utask->xol_vaddr; | ||
424 | regs->ip += correction; | 504 | regs->ip += correction; |
425 | } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { | 505 | } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { |
426 | regs->sp += sizeof_long(); | 506 | regs->sp += sizeof_long(); |
@@ -436,7 +516,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs | |||
436 | 516 | ||
437 | static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | 517 | static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
438 | { | 518 | { |
439 | riprel_post_xol(auprobe, regs, NULL); | 519 | riprel_post_xol(auprobe, regs); |
440 | } | 520 | } |
441 | 521 | ||
442 | static struct uprobe_xol_ops default_xol_ops = { | 522 | static struct uprobe_xol_ops default_xol_ops = { |
@@ -720,23 +800,6 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) | |||
720 | * single-step, we single-stepped a copy of the instruction. | 800 | * single-step, we single-stepped a copy of the instruction. |
721 | * | 801 | * |
722 | * This function prepares to resume execution after the single-step. | 802 | * This function prepares to resume execution after the single-step. |
723 | * We have to fix things up as follows: | ||
724 | * | ||
725 | * Typically, the new ip is relative to the copied instruction. We need | ||
726 | * to make it relative to the original instruction (FIX_IP). Exceptions | ||
727 | * are return instructions and absolute or indirect jump or call instructions. | ||
728 | * | ||
729 | * If the single-stepped instruction was a call, the return address that | ||
730 | * is atop the stack is the address following the copied instruction. We | ||
731 | * need to make it the address following the original instruction (FIX_CALL). | ||
732 | * | ||
733 | * If the original instruction was a rip-relative instruction such as | ||
734 | * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent | ||
735 | * instruction using a scratch register -- e.g., "movl %edx,(%rax)". | ||
736 | * We need to restore the contents of the scratch register and adjust | ||
737 | * the ip, keeping in mind that the instruction we executed is 4 bytes | ||
738 | * shorter than the original instruction (since we squeezed out the offset | ||
739 | * field). (FIX_RIP_AX or FIX_RIP_CX) | ||
740 | */ | 803 | */ |
741 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 804 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
742 | { | 805 | { |