diff options
author | Ingo Molnar <mingo@kernel.org> | 2014-06-05 05:18:03 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-06-05 06:26:27 -0400 |
commit | c56d34064b6eb9f9cde9e35bbfe16eedf3d81f94 (patch) | |
tree | 9877ef9e1b238e14a1878f10d51ea55fbca5f619 /arch/x86 | |
parent | b13fa91421213a8d1fd05086050f05e994f3b72d (diff) | |
parent | a03b1e1c372b60183b8141cdd161316429fab5ac (diff) |
Merge branch 'perf/uprobes' into perf/core
These bits from Oleg are fully cooked, ship them to Linus.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/traps.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/uprobes.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 110 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 506 |
5 files changed, 351 insertions, 283 deletions
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 58d66fe06b61..a7b212db9e04 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -98,7 +98,6 @@ static inline int get_si_code(unsigned long condition) | |||
98 | 98 | ||
99 | extern int panic_on_unrecovered_nmi; | 99 | extern int panic_on_unrecovered_nmi; |
100 | 100 | ||
101 | void math_error(struct pt_regs *, int, int); | ||
102 | void math_emulate(struct math_emu_info *); | 101 | void math_emulate(struct math_emu_info *); |
103 | #ifndef CONFIG_X86_32 | 102 | #ifndef CONFIG_X86_32 |
104 | asmlinkage void smp_thermal_interrupt(void); | 103 | asmlinkage void smp_thermal_interrupt(void); |
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 93bee7b93854..7be3c079e389 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -41,18 +41,18 @@ struct arch_uprobe { | |||
41 | u8 ixol[MAX_UINSN_BYTES]; | 41 | u8 ixol[MAX_UINSN_BYTES]; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | u16 fixups; | ||
45 | const struct uprobe_xol_ops *ops; | 44 | const struct uprobe_xol_ops *ops; |
46 | 45 | ||
47 | union { | 46 | union { |
48 | #ifdef CONFIG_X86_64 | ||
49 | unsigned long rip_rela_target_address; | ||
50 | #endif | ||
51 | struct { | 47 | struct { |
52 | s32 offs; | 48 | s32 offs; |
53 | u8 ilen; | 49 | u8 ilen; |
54 | u8 opc1; | 50 | u8 opc1; |
55 | } branch; | 51 | } branch; |
52 | struct { | ||
53 | u8 fixups; | ||
54 | u8 ilen; | ||
55 | } def; | ||
56 | }; | 56 | }; |
57 | }; | 57 | }; |
58 | 58 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9c0280f93d05..9b53940981b7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32) | |||
413 | set_thread_flag(TIF_ADDR32); | 413 | set_thread_flag(TIF_ADDR32); |
414 | 414 | ||
415 | /* Mark the associated mm as containing 32-bit tasks. */ | 415 | /* Mark the associated mm as containing 32-bit tasks. */ |
416 | if (current->mm) | ||
417 | current->mm->context.ia32_compat = 1; | ||
418 | |||
419 | if (x32) { | 416 | if (x32) { |
420 | clear_thread_flag(TIF_IA32); | 417 | clear_thread_flag(TIF_IA32); |
421 | set_thread_flag(TIF_X32); | 418 | set_thread_flag(TIF_X32); |
419 | if (current->mm) | ||
420 | current->mm->context.ia32_compat = TIF_X32; | ||
422 | current->personality &= ~READ_IMPLIES_EXEC; | 421 | current->personality &= ~READ_IMPLIES_EXEC; |
423 | /* is_compat_task() uses the presence of the x32 | 422 | /* is_compat_task() uses the presence of the x32 |
424 | syscall bit flag to determine compat status */ | 423 | syscall bit flag to determine compat status */ |
@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32) | |||
426 | } else { | 425 | } else { |
427 | set_thread_flag(TIF_IA32); | 426 | set_thread_flag(TIF_IA32); |
428 | clear_thread_flag(TIF_X32); | 427 | clear_thread_flag(TIF_X32); |
428 | if (current->mm) | ||
429 | current->mm->context.ia32_compat = TIF_IA32; | ||
429 | current->personality |= force_personality32; | 430 | current->personality |= force_personality32; |
430 | /* Prepare the first "return" to user space */ | 431 | /* Prepare the first "return" to user space */ |
431 | current_thread_info()->status |= TS_COMPAT; | 432 | current_thread_info()->status |= TS_COMPAT; |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 57409f6b8c62..3fdb20548c4b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
26 | #include <linux/uprobes.h> | ||
26 | #include <linux/string.h> | 27 | #include <linux/string.h> |
27 | #include <linux/delay.h> | 28 | #include <linux/delay.h> |
28 | #include <linux/errno.h> | 29 | #include <linux/errno.h> |
@@ -136,6 +137,37 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, | |||
136 | return -1; | 137 | return -1; |
137 | } | 138 | } |
138 | 139 | ||
140 | static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, | ||
141 | siginfo_t *info) | ||
142 | { | ||
143 | unsigned long siaddr; | ||
144 | int sicode; | ||
145 | |||
146 | switch (trapnr) { | ||
147 | default: | ||
148 | return SEND_SIG_PRIV; | ||
149 | |||
150 | case X86_TRAP_DE: | ||
151 | sicode = FPE_INTDIV; | ||
152 | siaddr = uprobe_get_trap_addr(regs); | ||
153 | break; | ||
154 | case X86_TRAP_UD: | ||
155 | sicode = ILL_ILLOPN; | ||
156 | siaddr = uprobe_get_trap_addr(regs); | ||
157 | break; | ||
158 | case X86_TRAP_AC: | ||
159 | sicode = BUS_ADRALN; | ||
160 | siaddr = 0; | ||
161 | break; | ||
162 | } | ||
163 | |||
164 | info->si_signo = signr; | ||
165 | info->si_errno = 0; | ||
166 | info->si_code = sicode; | ||
167 | info->si_addr = (void __user *)siaddr; | ||
168 | return info; | ||
169 | } | ||
170 | |||
139 | static void __kprobes | 171 | static void __kprobes |
140 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | 172 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
141 | long error_code, siginfo_t *info) | 173 | long error_code, siginfo_t *info) |
@@ -168,60 +200,42 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | |||
168 | } | 200 | } |
169 | #endif | 201 | #endif |
170 | 202 | ||
171 | if (info) | 203 | force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); |
172 | force_sig_info(signr, info, tsk); | ||
173 | else | ||
174 | force_sig(signr, tsk); | ||
175 | } | 204 | } |
176 | 205 | ||
177 | #define DO_ERROR(trapnr, signr, str, name) \ | 206 | static void do_error_trap(struct pt_regs *regs, long error_code, char *str, |
178 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | 207 | unsigned long trapnr, int signr) |
179 | { \ | 208 | { |
180 | enum ctx_state prev_state; \ | 209 | enum ctx_state prev_state = exception_enter(); |
181 | \ | 210 | siginfo_t info; |
182 | prev_state = exception_enter(); \ | 211 | |
183 | if (notify_die(DIE_TRAP, str, regs, error_code, \ | 212 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != |
184 | trapnr, signr) == NOTIFY_STOP) { \ | 213 | NOTIFY_STOP) { |
185 | exception_exit(prev_state); \ | 214 | conditional_sti(regs); |
186 | return; \ | 215 | do_trap(trapnr, signr, str, regs, error_code, |
187 | } \ | 216 | fill_trap_info(regs, signr, trapnr, &info)); |
188 | conditional_sti(regs); \ | 217 | } |
189 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | 218 | |
190 | exception_exit(prev_state); \ | 219 | exception_exit(prev_state); |
191 | } | 220 | } |
192 | 221 | ||
193 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 222 | #define DO_ERROR(trapnr, signr, str, name) \ |
194 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | 223 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
195 | { \ | 224 | { \ |
196 | siginfo_t info; \ | 225 | do_error_trap(regs, error_code, str, trapnr, signr); \ |
197 | enum ctx_state prev_state; \ | ||
198 | \ | ||
199 | info.si_signo = signr; \ | ||
200 | info.si_errno = 0; \ | ||
201 | info.si_code = sicode; \ | ||
202 | info.si_addr = (void __user *)siaddr; \ | ||
203 | prev_state = exception_enter(); \ | ||
204 | if (notify_die(DIE_TRAP, str, regs, error_code, \ | ||
205 | trapnr, signr) == NOTIFY_STOP) { \ | ||
206 | exception_exit(prev_state); \ | ||
207 | return; \ | ||
208 | } \ | ||
209 | conditional_sti(regs); \ | ||
210 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | ||
211 | exception_exit(prev_state); \ | ||
212 | } | 226 | } |
213 | 227 | ||
214 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) | 228 | DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) |
215 | DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) | 229 | DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) |
216 | DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) | 230 | DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) |
217 | DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) | 231 | DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) |
218 | DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) | 232 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) |
219 | DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) | 233 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) |
220 | DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) | 234 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) |
221 | #ifdef CONFIG_X86_32 | 235 | #ifdef CONFIG_X86_32 |
222 | DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) | 236 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) |
223 | #endif | 237 | #endif |
224 | DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) | 238 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) |
225 | 239 | ||
226 | #ifdef CONFIG_X86_64 | 240 | #ifdef CONFIG_X86_64 |
227 | /* Runs on IST stack */ | 241 | /* Runs on IST stack */ |
@@ -305,7 +319,7 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
305 | pr_cont("\n"); | 319 | pr_cont("\n"); |
306 | } | 320 | } |
307 | 321 | ||
308 | force_sig(SIGSEGV, tsk); | 322 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); |
309 | exit: | 323 | exit: |
310 | exception_exit(prev_state); | 324 | exception_exit(prev_state); |
311 | } | 325 | } |
@@ -488,7 +502,7 @@ exit: | |||
488 | * the correct behaviour even in the presence of the asynchronous | 502 | * the correct behaviour even in the presence of the asynchronous |
489 | * IRQ13 behaviour | 503 | * IRQ13 behaviour |
490 | */ | 504 | */ |
491 | void math_error(struct pt_regs *regs, int error_code, int trapnr) | 505 | static void math_error(struct pt_regs *regs, int error_code, int trapnr) |
492 | { | 506 | { |
493 | struct task_struct *task = current; | 507 | struct task_struct *task = current; |
494 | siginfo_t info; | 508 | siginfo_t info; |
@@ -518,7 +532,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
518 | task->thread.error_code = error_code; | 532 | task->thread.error_code = error_code; |
519 | info.si_signo = SIGFPE; | 533 | info.si_signo = SIGFPE; |
520 | info.si_errno = 0; | 534 | info.si_errno = 0; |
521 | info.si_addr = (void __user *)regs->ip; | 535 | info.si_addr = (void __user *)uprobe_get_trap_addr(regs); |
522 | if (trapnr == X86_TRAP_MF) { | 536 | if (trapnr == X86_TRAP_MF) { |
523 | unsigned short cwd, swd; | 537 | unsigned short cwd, swd; |
524 | /* | 538 | /* |
@@ -645,7 +659,7 @@ void math_state_restore(void) | |||
645 | */ | 659 | */ |
646 | if (unlikely(restore_fpu_checking(tsk))) { | 660 | if (unlikely(restore_fpu_checking(tsk))) { |
647 | drop_init_fpu(tsk); | 661 | drop_init_fpu(tsk); |
648 | force_sig(SIGSEGV, tsk); | 662 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); |
649 | return; | 663 | return; |
650 | } | 664 | } |
651 | 665 | ||
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ace22916ade3..159ca520ef5b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -32,20 +32,20 @@ | |||
32 | 32 | ||
33 | /* Post-execution fixups. */ | 33 | /* Post-execution fixups. */ |
34 | 34 | ||
35 | /* No fixup needed */ | ||
36 | #define UPROBE_FIX_NONE 0x0 | ||
37 | |||
38 | /* Adjust IP back to vicinity of actual insn */ | 35 | /* Adjust IP back to vicinity of actual insn */ |
39 | #define UPROBE_FIX_IP 0x1 | 36 | #define UPROBE_FIX_IP 0x01 |
40 | 37 | ||
41 | /* Adjust the return address of a call insn */ | 38 | /* Adjust the return address of a call insn */ |
42 | #define UPROBE_FIX_CALL 0x2 | 39 | #define UPROBE_FIX_CALL 0x02 |
43 | 40 | ||
44 | /* Instruction will modify TF, don't change it */ | 41 | /* Instruction will modify TF, don't change it */ |
45 | #define UPROBE_FIX_SETF 0x4 | 42 | #define UPROBE_FIX_SETF 0x04 |
46 | 43 | ||
47 | #define UPROBE_FIX_RIP_AX 0x8000 | 44 | #define UPROBE_FIX_RIP_SI 0x08 |
48 | #define UPROBE_FIX_RIP_CX 0x4000 | 45 | #define UPROBE_FIX_RIP_DI 0x10 |
46 | #define UPROBE_FIX_RIP_BX 0x20 | ||
47 | #define UPROBE_FIX_RIP_MASK \ | ||
48 | (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX) | ||
49 | 49 | ||
50 | #define UPROBE_TRAP_NR UINT_MAX | 50 | #define UPROBE_TRAP_NR UINT_MAX |
51 | 51 | ||
@@ -67,6 +67,7 @@ | |||
67 | * to keep gcc from statically optimizing it out, as variable_test_bit makes | 67 | * to keep gcc from statically optimizing it out, as variable_test_bit makes |
68 | * some versions of gcc to think only *(unsigned long*) is used. | 68 | * some versions of gcc to think only *(unsigned long*) is used. |
69 | */ | 69 | */ |
70 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | ||
70 | static volatile u32 good_insns_32[256 / 32] = { | 71 | static volatile u32 good_insns_32[256 / 32] = { |
71 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 72 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
72 | /* ---------------------------------------------- */ | 73 | /* ---------------------------------------------- */ |
@@ -89,33 +90,12 @@ static volatile u32 good_insns_32[256 / 32] = { | |||
89 | /* ---------------------------------------------- */ | 90 | /* ---------------------------------------------- */ |
90 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 91 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
91 | }; | 92 | }; |
93 | #else | ||
94 | #define good_insns_32 NULL | ||
95 | #endif | ||
92 | 96 | ||
93 | /* Using this for both 64-bit and 32-bit apps */ | ||
94 | static volatile u32 good_2byte_insns[256 / 32] = { | ||
95 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
96 | /* ---------------------------------------------- */ | ||
97 | W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ | ||
98 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ | ||
99 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ | ||
100 | W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | ||
101 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
102 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
103 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ | ||
104 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | ||
105 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
106 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
107 | W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ | ||
108 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
109 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | ||
110 | W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
111 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ | ||
112 | W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ | ||
113 | /* ---------------------------------------------- */ | ||
114 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
115 | }; | ||
116 | |||
117 | #ifdef CONFIG_X86_64 | ||
118 | /* Good-instruction tables for 64-bit apps */ | 97 | /* Good-instruction tables for 64-bit apps */ |
98 | #if defined(CONFIG_X86_64) | ||
119 | static volatile u32 good_insns_64[256 / 32] = { | 99 | static volatile u32 good_insns_64[256 / 32] = { |
120 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 100 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
121 | /* ---------------------------------------------- */ | 101 | /* ---------------------------------------------- */ |
@@ -138,7 +118,33 @@ static volatile u32 good_insns_64[256 / 32] = { | |||
138 | /* ---------------------------------------------- */ | 118 | /* ---------------------------------------------- */ |
139 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 119 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
140 | }; | 120 | }; |
121 | #else | ||
122 | #define good_insns_64 NULL | ||
141 | #endif | 123 | #endif |
124 | |||
125 | /* Using this for both 64-bit and 32-bit apps */ | ||
126 | static volatile u32 good_2byte_insns[256 / 32] = { | ||
127 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
128 | /* ---------------------------------------------- */ | ||
129 | W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ | ||
130 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ | ||
131 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ | ||
132 | W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | ||
133 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
134 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
135 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ | ||
136 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | ||
137 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
138 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
139 | W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ | ||
140 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
141 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | ||
142 | W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
143 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ | ||
144 | W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ | ||
145 | /* ---------------------------------------------- */ | ||
146 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
147 | }; | ||
142 | #undef W | 148 | #undef W |
143 | 149 | ||
144 | /* | 150 | /* |
@@ -209,16 +215,25 @@ static bool is_prefix_bad(struct insn *insn) | |||
209 | return false; | 215 | return false; |
210 | } | 216 | } |
211 | 217 | ||
212 | static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) | 218 | static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64) |
213 | { | 219 | { |
214 | insn_init(insn, auprobe->insn, false); | 220 | u32 volatile *good_insns; |
221 | |||
222 | insn_init(insn, auprobe->insn, x86_64); | ||
223 | /* has the side-effect of processing the entire instruction */ | ||
224 | insn_get_length(insn); | ||
225 | if (WARN_ON_ONCE(!insn_complete(insn))) | ||
226 | return -ENOEXEC; | ||
215 | 227 | ||
216 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
217 | insn_get_opcode(insn); | ||
218 | if (is_prefix_bad(insn)) | 228 | if (is_prefix_bad(insn)) |
219 | return -ENOTSUPP; | 229 | return -ENOTSUPP; |
220 | 230 | ||
221 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) | 231 | if (x86_64) |
232 | good_insns = good_insns_64; | ||
233 | else | ||
234 | good_insns = good_insns_32; | ||
235 | |||
236 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns)) | ||
222 | return 0; | 237 | return 0; |
223 | 238 | ||
224 | if (insn->opcode.nbytes == 2) { | 239 | if (insn->opcode.nbytes == 2) { |
@@ -230,14 +245,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) | |||
230 | } | 245 | } |
231 | 246 | ||
232 | #ifdef CONFIG_X86_64 | 247 | #ifdef CONFIG_X86_64 |
248 | static inline bool is_64bit_mm(struct mm_struct *mm) | ||
249 | { | ||
250 | return !config_enabled(CONFIG_IA32_EMULATION) || | ||
251 | !(mm->context.ia32_compat == TIF_IA32); | ||
252 | } | ||
233 | /* | 253 | /* |
234 | * If arch_uprobe->insn doesn't use rip-relative addressing, return | 254 | * If arch_uprobe->insn doesn't use rip-relative addressing, return |
235 | * immediately. Otherwise, rewrite the instruction so that it accesses | 255 | * immediately. Otherwise, rewrite the instruction so that it accesses |
236 | * its memory operand indirectly through a scratch register. Set | 256 | * its memory operand indirectly through a scratch register. Set |
237 | * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address | 257 | * def->fixups accordingly. (The contents of the scratch register |
238 | * accordingly. (The contents of the scratch register will be saved | 258 | * will be saved before we single-step the modified instruction, |
239 | * before we single-step the modified instruction, and restored | 259 | * and restored afterward). |
240 | * afterward.) | ||
241 | * | 260 | * |
242 | * We do this because a rip-relative instruction can access only a | 261 | * We do this because a rip-relative instruction can access only a |
243 | * relatively small area (+/- 2 GB from the instruction), and the XOL | 262 | * relatively small area (+/- 2 GB from the instruction), and the XOL |
@@ -248,164 +267,192 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) | |||
248 | * | 267 | * |
249 | * Some useful facts about rip-relative instructions: | 268 | * Some useful facts about rip-relative instructions: |
250 | * | 269 | * |
251 | * - There's always a modrm byte. | 270 | * - There's always a modrm byte with bit layout "00 reg 101". |
252 | * - There's never a SIB byte. | 271 | * - There's never a SIB byte. |
253 | * - The displacement is always 4 bytes. | 272 | * - The displacement is always 4 bytes. |
273 | * - REX.B=1 bit in REX prefix, which normally extends r/m field, | ||
274 | * has no effect on rip-relative mode. It doesn't make modrm byte | ||
275 | * with r/m=101 refer to register 1101 = R13. | ||
254 | */ | 276 | */ |
255 | static void | 277 | static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) |
256 | handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) | ||
257 | { | 278 | { |
258 | u8 *cursor; | 279 | u8 *cursor; |
259 | u8 reg; | 280 | u8 reg; |
281 | u8 reg2; | ||
260 | 282 | ||
261 | if (!insn_rip_relative(insn)) | 283 | if (!insn_rip_relative(insn)) |
262 | return; | 284 | return; |
263 | 285 | ||
264 | /* | 286 | /* |
265 | * insn_rip_relative() would have decoded rex_prefix, modrm. | 287 | * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. |
266 | * Clear REX.b bit (extension of MODRM.rm field): | 288 | * Clear REX.b bit (extension of MODRM.rm field): |
267 | * we want to encode rax/rcx, not r8/r9. | 289 | * we want to encode low numbered reg, not r8+. |
268 | */ | 290 | */ |
269 | if (insn->rex_prefix.nbytes) { | 291 | if (insn->rex_prefix.nbytes) { |
270 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); | 292 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); |
271 | *cursor &= 0xfe; /* Clearing REX.B bit */ | 293 | /* REX byte has 0100wrxb layout, clearing REX.b bit */ |
294 | *cursor &= 0xfe; | ||
295 | } | ||
296 | /* | ||
297 | * Similar treatment for VEX3 prefix. | ||
298 | * TODO: add XOP/EVEX treatment when insn decoder supports them | ||
299 | */ | ||
300 | if (insn->vex_prefix.nbytes == 3) { | ||
301 | /* | ||
302 | * vex2: c5 rvvvvLpp (has no b bit) | ||
303 | * vex3/xop: c4/8f rxbmmmmm wvvvvLpp | ||
304 | * evex: 62 rxbR00mm wvvvv1pp zllBVaaa | ||
305 | * (evex will need setting of both b and x since | ||
306 | * in non-sib encoding evex.x is 4th bit of MODRM.rm) | ||
307 | * Setting VEX3.b (setting because it has inverted meaning): | ||
308 | */ | ||
309 | cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; | ||
310 | *cursor |= 0x20; | ||
272 | } | 311 | } |
273 | 312 | ||
274 | /* | 313 | /* |
314 | * Convert from rip-relative addressing to register-relative addressing | ||
315 | * via a scratch register. | ||
316 | * | ||
317 | * This is tricky since there are insns with modrm byte | ||
318 | * which also use registers not encoded in modrm byte: | ||
319 | * [i]div/[i]mul: implicitly use dx:ax | ||
320 | * shift ops: implicitly use cx | ||
321 | * cmpxchg: implicitly uses ax | ||
322 | * cmpxchg8/16b: implicitly uses dx:ax and bx:cx | ||
323 | * Encoding: 0f c7/1 modrm | ||
324 | * The code below thinks that reg=1 (cx), chooses si as scratch. | ||
325 | * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. | ||
326 | * First appeared in Haswell (BMI2 insn). It is vex-encoded. | ||
327 | * Example where none of bx,cx,dx can be used as scratch reg: | ||
328 | * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx | ||
329 | * [v]pcmpistri: implicitly uses cx, xmm0 | ||
330 | * [v]pcmpistrm: implicitly uses xmm0 | ||
331 | * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 | ||
332 | * [v]pcmpestrm: implicitly uses ax, dx, xmm0 | ||
333 | * Evil SSE4.2 string comparison ops from hell. | ||
334 | * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. | ||
335 | * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. | ||
336 | * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). | ||
337 | * AMD says it has no 3-operand form (vex.vvvv must be 1111) | ||
338 | * and that it can have only register operands, not mem | ||
339 | * (its modrm byte must have mode=11). | ||
340 | * If these restrictions will ever be lifted, | ||
341 | * we'll need code to prevent selection of di as scratch reg! | ||
342 | * | ||
343 | * Summary: I don't know any insns with modrm byte which | ||
344 | * use SI register implicitly. DI register is used only | ||
345 | * by one insn (maskmovq) and BX register is used | ||
346 | * only by one too (cmpxchg8b). | ||
347 | * BP is stack-segment based (may be a problem?). | ||
348 | * AX, DX, CX are off-limits (many implicit users). | ||
349 | * SP is unusable (it's stack pointer - think about "pop mem"; | ||
350 | * also, rsp+disp32 needs sib encoding -> insn length change). | ||
351 | */ | ||
352 | |||
353 | reg = MODRM_REG(insn); /* Fetch modrm.reg */ | ||
354 | reg2 = 0xff; /* Fetch vex.vvvv */ | ||
355 | if (insn->vex_prefix.nbytes == 2) | ||
356 | reg2 = insn->vex_prefix.bytes[1]; | ||
357 | else if (insn->vex_prefix.nbytes == 3) | ||
358 | reg2 = insn->vex_prefix.bytes[2]; | ||
359 | /* | ||
360 | * TODO: add XOP, EXEV vvvv reading. | ||
361 | * | ||
362 | * vex.vvvv field is in bits 6-3, bits are inverted. | ||
363 | * But in 32-bit mode, high-order bit may be ignored. | ||
364 | * Therefore, let's consider only 3 low-order bits. | ||
365 | */ | ||
366 | reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; | ||
367 | /* | ||
368 | * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. | ||
369 | * | ||
370 | * Choose scratch reg. Order is important: must not select bx | ||
371 | * if we can use si (cmpxchg8b case!) | ||
372 | */ | ||
373 | if (reg != 6 && reg2 != 6) { | ||
374 | reg2 = 6; | ||
375 | auprobe->def.fixups |= UPROBE_FIX_RIP_SI; | ||
376 | } else if (reg != 7 && reg2 != 7) { | ||
377 | reg2 = 7; | ||
378 | auprobe->def.fixups |= UPROBE_FIX_RIP_DI; | ||
379 | /* TODO (paranoia): force maskmovq to not use di */ | ||
380 | } else { | ||
381 | reg2 = 3; | ||
382 | auprobe->def.fixups |= UPROBE_FIX_RIP_BX; | ||
383 | } | ||
384 | /* | ||
275 | * Point cursor at the modrm byte. The next 4 bytes are the | 385 | * Point cursor at the modrm byte. The next 4 bytes are the |
276 | * displacement. Beyond the displacement, for some instructions, | 386 | * displacement. Beyond the displacement, for some instructions, |
277 | * is the immediate operand. | 387 | * is the immediate operand. |
278 | */ | 388 | */ |
279 | cursor = auprobe->insn + insn_offset_modrm(insn); | 389 | cursor = auprobe->insn + insn_offset_modrm(insn); |
280 | insn_get_length(insn); | ||
281 | |||
282 | /* | 390 | /* |
283 | * Convert from rip-relative addressing to indirect addressing | 391 | * Change modrm from "00 reg 101" to "10 reg reg2". Example: |
284 | * via a scratch register. Change the r/m field from 0x5 (%rip) | 392 | * 89 05 disp32 mov %eax,disp32(%rip) becomes |
285 | * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. | 393 | * 89 86 disp32 mov %eax,disp32(%rsi) |
286 | */ | 394 | */ |
287 | reg = MODRM_REG(insn); | 395 | *cursor = 0x80 | (reg << 3) | reg2; |
288 | if (reg == 0) { | 396 | } |
289 | /* | ||
290 | * The register operand (if any) is either the A register | ||
291 | * (%rax, %eax, etc.) or (if the 0x4 bit is set in the | ||
292 | * REX prefix) %r8. In any case, we know the C register | ||
293 | * is NOT the register operand, so we use %rcx (register | ||
294 | * #1) for the scratch register. | ||
295 | */ | ||
296 | auprobe->fixups = UPROBE_FIX_RIP_CX; | ||
297 | /* Change modrm from 00 000 101 to 00 000 001. */ | ||
298 | *cursor = 0x1; | ||
299 | } else { | ||
300 | /* Use %rax (register #0) for the scratch register. */ | ||
301 | auprobe->fixups = UPROBE_FIX_RIP_AX; | ||
302 | /* Change modrm from 00 xxx 101 to 00 xxx 000 */ | ||
303 | *cursor = (reg << 3); | ||
304 | } | ||
305 | |||
306 | /* Target address = address of next instruction + (signed) offset */ | ||
307 | auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; | ||
308 | 397 | ||
309 | /* Displacement field is gone; slide immediate field (if any) over. */ | 398 | static inline unsigned long * |
310 | if (insn->immediate.nbytes) { | 399 | scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) |
311 | cursor++; | 400 | { |
312 | memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); | 401 | if (auprobe->def.fixups & UPROBE_FIX_RIP_SI) |
313 | } | 402 | return ®s->si; |
403 | if (auprobe->def.fixups & UPROBE_FIX_RIP_DI) | ||
404 | return ®s->di; | ||
405 | return ®s->bx; | ||
314 | } | 406 | } |
315 | 407 | ||
316 | /* | 408 | /* |
317 | * If we're emulating a rip-relative instruction, save the contents | 409 | * If we're emulating a rip-relative instruction, save the contents |
318 | * of the scratch register and store the target address in that register. | 410 | * of the scratch register and store the target address in that register. |
319 | */ | 411 | */ |
320 | static void | 412 | static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
321 | pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, | ||
322 | struct arch_uprobe_task *autask) | ||
323 | { | ||
324 | if (auprobe->fixups & UPROBE_FIX_RIP_AX) { | ||
325 | autask->saved_scratch_register = regs->ax; | ||
326 | regs->ax = current->utask->vaddr; | ||
327 | regs->ax += auprobe->rip_rela_target_address; | ||
328 | } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { | ||
329 | autask->saved_scratch_register = regs->cx; | ||
330 | regs->cx = current->utask->vaddr; | ||
331 | regs->cx += auprobe->rip_rela_target_address; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static void | ||
336 | handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) | ||
337 | { | 413 | { |
338 | if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { | 414 | if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { |
339 | struct arch_uprobe_task *autask; | 415 | struct uprobe_task *utask = current->utask; |
340 | 416 | unsigned long *sr = scratch_reg(auprobe, regs); | |
341 | autask = ¤t->utask->autask; | ||
342 | if (auprobe->fixups & UPROBE_FIX_RIP_AX) | ||
343 | regs->ax = autask->saved_scratch_register; | ||
344 | else | ||
345 | regs->cx = autask->saved_scratch_register; | ||
346 | 417 | ||
347 | /* | 418 | utask->autask.saved_scratch_register = *sr; |
348 | * The original instruction includes a displacement, and so | 419 | *sr = utask->vaddr + auprobe->def.ilen; |
349 | * is 4 bytes longer than what we've just single-stepped. | ||
350 | * Caller may need to apply other fixups to handle stuff | ||
351 | * like "jmpq *...(%rip)" and "callq *...(%rip)". | ||
352 | */ | ||
353 | if (correction) | ||
354 | *correction += 4; | ||
355 | } | 420 | } |
356 | } | 421 | } |
357 | 422 | ||
358 | static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) | 423 | static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
359 | { | 424 | { |
360 | insn_init(insn, auprobe->insn, true); | 425 | if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { |
361 | 426 | struct uprobe_task *utask = current->utask; | |
362 | /* Skip good instruction prefixes; reject "bad" ones. */ | 427 | unsigned long *sr = scratch_reg(auprobe, regs); |
363 | insn_get_opcode(insn); | ||
364 | if (is_prefix_bad(insn)) | ||
365 | return -ENOTSUPP; | ||
366 | 428 | ||
367 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) | 429 | *sr = utask->autask.saved_scratch_register; |
368 | return 0; | ||
369 | |||
370 | if (insn->opcode.nbytes == 2) { | ||
371 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
372 | return 0; | ||
373 | } | 430 | } |
374 | return -ENOTSUPP; | ||
375 | } | 431 | } |
376 | 432 | #else /* 32-bit: */ | |
377 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | 433 | static inline bool is_64bit_mm(struct mm_struct *mm) |
378 | { | 434 | { |
379 | if (mm->context.ia32_compat) | 435 | return false; |
380 | return validate_insn_32bits(auprobe, insn); | ||
381 | return validate_insn_64bits(auprobe, insn); | ||
382 | } | 436 | } |
383 | #else /* 32-bit: */ | ||
384 | /* | 437 | /* |
385 | * No RIP-relative addressing on 32-bit | 438 | * No RIP-relative addressing on 32-bit |
386 | */ | 439 | */ |
387 | static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) | 440 | static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) |
388 | { | 441 | { |
389 | } | 442 | } |
390 | static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, | 443 | static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
391 | struct arch_uprobe_task *autask) | ||
392 | { | 444 | { |
393 | } | 445 | } |
394 | static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, | 446 | static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
395 | long *correction) | ||
396 | { | 447 | { |
397 | } | 448 | } |
398 | |||
399 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
400 | { | ||
401 | return validate_insn_32bits(auprobe, insn); | ||
402 | } | ||
403 | #endif /* CONFIG_X86_64 */ | 449 | #endif /* CONFIG_X86_64 */ |
404 | 450 | ||
405 | struct uprobe_xol_ops { | 451 | struct uprobe_xol_ops { |
406 | bool (*emulate)(struct arch_uprobe *, struct pt_regs *); | 452 | bool (*emulate)(struct arch_uprobe *, struct pt_regs *); |
407 | int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); | 453 | int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); |
408 | int (*post_xol)(struct arch_uprobe *, struct pt_regs *); | 454 | int (*post_xol)(struct arch_uprobe *, struct pt_regs *); |
455 | void (*abort)(struct arch_uprobe *, struct pt_regs *); | ||
409 | }; | 456 | }; |
410 | 457 | ||
411 | static inline int sizeof_long(void) | 458 | static inline int sizeof_long(void) |
@@ -415,50 +462,67 @@ static inline int sizeof_long(void) | |||
415 | 462 | ||
416 | static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | 463 | static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
417 | { | 464 | { |
418 | pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); | 465 | riprel_pre_xol(auprobe, regs); |
419 | return 0; | 466 | return 0; |
420 | } | 467 | } |
421 | 468 | ||
422 | /* | 469 | static int push_ret_address(struct pt_regs *regs, unsigned long ip) |
423 | * Adjust the return address pushed by a call insn executed out of line. | ||
424 | */ | ||
425 | static int adjust_ret_addr(unsigned long sp, long correction) | ||
426 | { | 470 | { |
427 | int rasize = sizeof_long(); | 471 | unsigned long new_sp = regs->sp - sizeof_long(); |
428 | long ra; | ||
429 | |||
430 | if (copy_from_user(&ra, (void __user *)sp, rasize)) | ||
431 | return -EFAULT; | ||
432 | 472 | ||
433 | ra += correction; | 473 | if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) |
434 | if (copy_to_user((void __user *)sp, &ra, rasize)) | ||
435 | return -EFAULT; | 474 | return -EFAULT; |
436 | 475 | ||
476 | regs->sp = new_sp; | ||
437 | return 0; | 477 | return 0; |
438 | } | 478 | } |
439 | 479 | ||
480 | /* | ||
481 | * We have to fix things up as follows: | ||
482 | * | ||
483 | * Typically, the new ip is relative to the copied instruction. We need | ||
484 | * to make it relative to the original instruction (FIX_IP). Exceptions | ||
485 | * are return instructions and absolute or indirect jump or call instructions. | ||
486 | * | ||
487 | * If the single-stepped instruction was a call, the return address that | ||
488 | * is atop the stack is the address following the copied instruction. We | ||
489 | * need to make it the address following the original instruction (FIX_CALL). | ||
490 | * | ||
491 | * If the original instruction was a rip-relative instruction such as | ||
492 | * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent | ||
493 | * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". | ||
494 | * We need to restore the contents of the scratch register | ||
495 | * (FIX_RIP_reg). | ||
496 | */ | ||
440 | static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | 497 | static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
441 | { | 498 | { |
442 | struct uprobe_task *utask = current->utask; | 499 | struct uprobe_task *utask = current->utask; |
443 | long correction = (long)(utask->vaddr - utask->xol_vaddr); | ||
444 | 500 | ||
445 | handle_riprel_post_xol(auprobe, regs, &correction); | 501 | riprel_post_xol(auprobe, regs); |
446 | if (auprobe->fixups & UPROBE_FIX_IP) | 502 | if (auprobe->def.fixups & UPROBE_FIX_IP) { |
503 | long correction = utask->vaddr - utask->xol_vaddr; | ||
447 | regs->ip += correction; | 504 | regs->ip += correction; |
448 | 505 | } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { | |
449 | if (auprobe->fixups & UPROBE_FIX_CALL) { | 506 | regs->sp += sizeof_long(); |
450 | if (adjust_ret_addr(regs->sp, correction)) { | 507 | if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen)) |
451 | regs->sp += sizeof_long(); | ||
452 | return -ERESTART; | 508 | return -ERESTART; |
453 | } | ||
454 | } | 509 | } |
510 | /* popf; tell the caller to not touch TF */ | ||
511 | if (auprobe->def.fixups & UPROBE_FIX_SETF) | ||
512 | utask->autask.saved_tf = true; | ||
455 | 513 | ||
456 | return 0; | 514 | return 0; |
457 | } | 515 | } |
458 | 516 | ||
517 | static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
518 | { | ||
519 | riprel_post_xol(auprobe, regs); | ||
520 | } | ||
521 | |||
459 | static struct uprobe_xol_ops default_xol_ops = { | 522 | static struct uprobe_xol_ops default_xol_ops = { |
460 | .pre_xol = default_pre_xol_op, | 523 | .pre_xol = default_pre_xol_op, |
461 | .post_xol = default_post_xol_op, | 524 | .post_xol = default_post_xol_op, |
525 | .abort = default_abort_op, | ||
462 | }; | 526 | }; |
463 | 527 | ||
464 | static bool branch_is_call(struct arch_uprobe *auprobe) | 528 | static bool branch_is_call(struct arch_uprobe *auprobe) |
@@ -520,7 +584,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
520 | unsigned long offs = (long)auprobe->branch.offs; | 584 | unsigned long offs = (long)auprobe->branch.offs; |
521 | 585 | ||
522 | if (branch_is_call(auprobe)) { | 586 | if (branch_is_call(auprobe)) { |
523 | unsigned long new_sp = regs->sp - sizeof_long(); | ||
524 | /* | 587 | /* |
525 | * If it fails we execute this (mangled, see the comment in | 588 | * If it fails we execute this (mangled, see the comment in |
526 | * branch_clear_offset) insn out-of-line. In the likely case | 589 | * branch_clear_offset) insn out-of-line. In the likely case |
@@ -530,9 +593,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
530 | * | 593 | * |
531 | * But there is corner case, see the comment in ->post_xol(). | 594 | * But there is corner case, see the comment in ->post_xol(). |
532 | */ | 595 | */ |
533 | if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) | 596 | if (push_ret_address(regs, new_ip)) |
534 | return false; | 597 | return false; |
535 | regs->sp = new_sp; | ||
536 | } else if (!check_jmp_cond(auprobe, regs)) { | 598 | } else if (!check_jmp_cond(auprobe, regs)) { |
537 | offs = 0; | 599 | offs = 0; |
538 | } | 600 | } |
@@ -583,11 +645,7 @@ static struct uprobe_xol_ops branch_xol_ops = { | |||
583 | static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) | 645 | static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) |
584 | { | 646 | { |
585 | u8 opc1 = OPCODE1(insn); | 647 | u8 opc1 = OPCODE1(insn); |
586 | 648 | int i; | |
587 | /* has the side-effect of processing the entire instruction */ | ||
588 | insn_get_length(insn); | ||
589 | if (WARN_ON_ONCE(!insn_complete(insn))) | ||
590 | return -ENOEXEC; | ||
591 | 649 | ||
592 | switch (opc1) { | 650 | switch (opc1) { |
593 | case 0xeb: /* jmp 8 */ | 651 | case 0xeb: /* jmp 8 */ |
@@ -612,6 +670,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) | |||
612 | return -ENOSYS; | 670 | return -ENOSYS; |
613 | } | 671 | } |
614 | 672 | ||
673 | /* | ||
674 | * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. | ||
675 | * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. | ||
676 | * No one uses these insns, reject any branch insns with such prefix. | ||
677 | */ | ||
678 | for (i = 0; i < insn->prefixes.nbytes; i++) { | ||
679 | if (insn->prefixes.bytes[i] == 0x66) | ||
680 | return -ENOTSUPP; | ||
681 | } | ||
682 | |||
615 | auprobe->branch.opc1 = opc1; | 683 | auprobe->branch.opc1 = opc1; |
616 | auprobe->branch.ilen = insn->length; | 684 | auprobe->branch.ilen = insn->length; |
617 | auprobe->branch.offs = insn->immediate.value; | 685 | auprobe->branch.offs = insn->immediate.value; |
@@ -630,10 +698,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) | |||
630 | int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) | 698 | int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) |
631 | { | 699 | { |
632 | struct insn insn; | 700 | struct insn insn; |
633 | bool fix_ip = true, fix_call = false; | 701 | u8 fix_ip_or_call = UPROBE_FIX_IP; |
634 | int ret; | 702 | int ret; |
635 | 703 | ||
636 | ret = validate_insn_bits(auprobe, mm, &insn); | 704 | ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); |
637 | if (ret) | 705 | if (ret) |
638 | return ret; | 706 | return ret; |
639 | 707 | ||
@@ -642,44 +710,40 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
642 | return ret; | 710 | return ret; |
643 | 711 | ||
644 | /* | 712 | /* |
645 | * Figure out which fixups arch_uprobe_post_xol() will need to perform, | 713 | * Figure out which fixups default_post_xol_op() will need to perform, |
646 | * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups | 714 | * and annotate def->fixups accordingly. To start with, ->fixups is |
647 | * is either zero or it reflects rip-related fixups. | 715 | * either zero or it reflects rip-related fixups. |
648 | */ | 716 | */ |
649 | switch (OPCODE1(&insn)) { | 717 | switch (OPCODE1(&insn)) { |
650 | case 0x9d: /* popf */ | 718 | case 0x9d: /* popf */ |
651 | auprobe->fixups |= UPROBE_FIX_SETF; | 719 | auprobe->def.fixups |= UPROBE_FIX_SETF; |
652 | break; | 720 | break; |
653 | case 0xc3: /* ret or lret -- ip is correct */ | 721 | case 0xc3: /* ret or lret -- ip is correct */ |
654 | case 0xcb: | 722 | case 0xcb: |
655 | case 0xc2: | 723 | case 0xc2: |
656 | case 0xca: | 724 | case 0xca: |
657 | fix_ip = false; | 725 | case 0xea: /* jmp absolute -- ip is correct */ |
726 | fix_ip_or_call = 0; | ||
658 | break; | 727 | break; |
659 | case 0x9a: /* call absolute - Fix return addr, not ip */ | 728 | case 0x9a: /* call absolute - Fix return addr, not ip */ |
660 | fix_call = true; | 729 | fix_ip_or_call = UPROBE_FIX_CALL; |
661 | fix_ip = false; | ||
662 | break; | ||
663 | case 0xea: /* jmp absolute -- ip is correct */ | ||
664 | fix_ip = false; | ||
665 | break; | 730 | break; |
666 | case 0xff: | 731 | case 0xff: |
667 | insn_get_modrm(&insn); | ||
668 | switch (MODRM_REG(&insn)) { | 732 | switch (MODRM_REG(&insn)) { |
669 | case 2: case 3: /* call or lcall, indirect */ | 733 | case 2: case 3: /* call or lcall, indirect */ |
670 | fix_call = true; | 734 | fix_ip_or_call = UPROBE_FIX_CALL; |
735 | break; | ||
671 | case 4: case 5: /* jmp or ljmp, indirect */ | 736 | case 4: case 5: /* jmp or ljmp, indirect */ |
672 | fix_ip = false; | 737 | fix_ip_or_call = 0; |
738 | break; | ||
673 | } | 739 | } |
674 | /* fall through */ | 740 | /* fall through */ |
675 | default: | 741 | default: |
676 | handle_riprel_insn(auprobe, &insn); | 742 | riprel_analyze(auprobe, &insn); |
677 | } | 743 | } |
678 | 744 | ||
679 | if (fix_ip) | 745 | auprobe->def.ilen = insn.length; |
680 | auprobe->fixups |= UPROBE_FIX_IP; | 746 | auprobe->def.fixups |= fix_ip_or_call; |
681 | if (fix_call) | ||
682 | auprobe->fixups |= UPROBE_FIX_CALL; | ||
683 | 747 | ||
684 | auprobe->ops = &default_xol_ops; | 748 | auprobe->ops = &default_xol_ops; |
685 | return 0; | 749 | return 0; |
@@ -694,6 +758,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
694 | { | 758 | { |
695 | struct uprobe_task *utask = current->utask; | 759 | struct uprobe_task *utask = current->utask; |
696 | 760 | ||
761 | if (auprobe->ops->pre_xol) { | ||
762 | int err = auprobe->ops->pre_xol(auprobe, regs); | ||
763 | if (err) | ||
764 | return err; | ||
765 | } | ||
766 | |||
697 | regs->ip = utask->xol_vaddr; | 767 | regs->ip = utask->xol_vaddr; |
698 | utask->autask.saved_trap_nr = current->thread.trap_nr; | 768 | utask->autask.saved_trap_nr = current->thread.trap_nr; |
699 | current->thread.trap_nr = UPROBE_TRAP_NR; | 769 | current->thread.trap_nr = UPROBE_TRAP_NR; |
@@ -703,8 +773,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
703 | if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) | 773 | if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) |
704 | set_task_blockstep(current, false); | 774 | set_task_blockstep(current, false); |
705 | 775 | ||
706 | if (auprobe->ops->pre_xol) | ||
707 | return auprobe->ops->pre_xol(auprobe, regs); | ||
708 | return 0; | 776 | return 0; |
709 | } | 777 | } |
710 | 778 | ||
@@ -732,56 +800,42 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) | |||
732 | * single-step, we single-stepped a copy of the instruction. | 800 | * single-step, we single-stepped a copy of the instruction. |
733 | * | 801 | * |
734 | * This function prepares to resume execution after the single-step. | 802 | * This function prepares to resume execution after the single-step. |
735 | * We have to fix things up as follows: | ||
736 | * | ||
737 | * Typically, the new ip is relative to the copied instruction. We need | ||
738 | * to make it relative to the original instruction (FIX_IP). Exceptions | ||
739 | * are return instructions and absolute or indirect jump or call instructions. | ||
740 | * | ||
741 | * If the single-stepped instruction was a call, the return address that | ||
742 | * is atop the stack is the address following the copied instruction. We | ||
743 | * need to make it the address following the original instruction (FIX_CALL). | ||
744 | * | ||
745 | * If the original instruction was a rip-relative instruction such as | ||
746 | * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent | ||
747 | * instruction using a scratch register -- e.g., "movl %edx,(%rax)". | ||
748 | * We need to restore the contents of the scratch register and adjust | ||
749 | * the ip, keeping in mind that the instruction we executed is 4 bytes | ||
750 | * shorter than the original instruction (since we squeezed out the offset | ||
751 | * field). (FIX_RIP_AX or FIX_RIP_CX) | ||
752 | */ | 803 | */ |
753 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 804 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
754 | { | 805 | { |
755 | struct uprobe_task *utask = current->utask; | 806 | struct uprobe_task *utask = current->utask; |
807 | bool send_sigtrap = utask->autask.saved_tf; | ||
808 | int err = 0; | ||
756 | 809 | ||
757 | WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); | 810 | WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); |
811 | current->thread.trap_nr = utask->autask.saved_trap_nr; | ||
758 | 812 | ||
759 | if (auprobe->ops->post_xol) { | 813 | if (auprobe->ops->post_xol) { |
760 | int err = auprobe->ops->post_xol(auprobe, regs); | 814 | err = auprobe->ops->post_xol(auprobe, regs); |
761 | if (err) { | 815 | if (err) { |
762 | arch_uprobe_abort_xol(auprobe, regs); | ||
763 | /* | 816 | /* |
764 | * Restart the probed insn. ->post_xol() must ensure | 817 | * Restore ->ip for restart or post mortem analysis. |
765 | * this is really possible if it returns -ERESTART. | 818 | * ->post_xol() must not return -ERESTART unless this |
819 | * is really possible. | ||
766 | */ | 820 | */ |
821 | regs->ip = utask->vaddr; | ||
767 | if (err == -ERESTART) | 822 | if (err == -ERESTART) |
768 | return 0; | 823 | err = 0; |
769 | return err; | 824 | send_sigtrap = false; |
770 | } | 825 | } |
771 | } | 826 | } |
772 | |||
773 | current->thread.trap_nr = utask->autask.saved_trap_nr; | ||
774 | /* | 827 | /* |
775 | * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP | 828 | * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP |
776 | * so we can get an extra SIGTRAP if we do not clear TF. We need | 829 | * so we can get an extra SIGTRAP if we do not clear TF. We need |
777 | * to examine the opcode to make it right. | 830 | * to examine the opcode to make it right. |
778 | */ | 831 | */ |
779 | if (utask->autask.saved_tf) | 832 | if (send_sigtrap) |
780 | send_sig(SIGTRAP, current, 0); | 833 | send_sig(SIGTRAP, current, 0); |
781 | else if (!(auprobe->fixups & UPROBE_FIX_SETF)) | 834 | |
835 | if (!utask->autask.saved_tf) | ||
782 | regs->flags &= ~X86_EFLAGS_TF; | 836 | regs->flags &= ~X86_EFLAGS_TF; |
783 | 837 | ||
784 | return 0; | 838 | return err; |
785 | } | 839 | } |
786 | 840 | ||
787 | /* callback routine for handling exceptions. */ | 841 | /* callback routine for handling exceptions. */ |
@@ -815,18 +869,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, | |||
815 | 869 | ||
816 | /* | 870 | /* |
817 | * This function gets called when XOL instruction either gets trapped or | 871 | * This function gets called when XOL instruction either gets trapped or |
818 | * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. | 872 | * the thread has a fatal signal. Reset the instruction pointer to its |
819 | * Reset the instruction pointer to its probed address for the potential | 873 | * probed address for the potential restart or for post mortem analysis. |
820 | * restart or for post mortem analysis. | ||
821 | */ | 874 | */ |
822 | void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 875 | void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
823 | { | 876 | { |
824 | struct uprobe_task *utask = current->utask; | 877 | struct uprobe_task *utask = current->utask; |
825 | 878 | ||
826 | current->thread.trap_nr = utask->autask.saved_trap_nr; | 879 | if (auprobe->ops->abort) |
827 | handle_riprel_post_xol(auprobe, regs, NULL); | 880 | auprobe->ops->abort(auprobe, regs); |
828 | instruction_pointer_set(regs, utask->vaddr); | ||
829 | 881 | ||
882 | current->thread.trap_nr = utask->autask.saved_trap_nr; | ||
883 | regs->ip = utask->vaddr; | ||
830 | /* clear TF if it was set by us in arch_uprobe_pre_xol() */ | 884 | /* clear TF if it was set by us in arch_uprobe_pre_xol() */ |
831 | if (!utask->autask.saved_tf) | 885 | if (!utask->autask.saved_tf) |
832 | regs->flags &= ~X86_EFLAGS_TF; | 886 | regs->flags &= ~X86_EFLAGS_TF; |