aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-06-05 05:18:03 -0400
committerIngo Molnar <mingo@kernel.org>2014-06-05 06:26:27 -0400
commitc56d34064b6eb9f9cde9e35bbfe16eedf3d81f94 (patch)
tree9877ef9e1b238e14a1878f10d51ea55fbca5f619 /arch/x86
parentb13fa91421213a8d1fd05086050f05e994f3b72d (diff)
parenta03b1e1c372b60183b8141cdd161316429fab5ac (diff)
Merge branch 'perf/uprobes' into perf/core
These bits from Oleg are fully cooked, ship them to Linus. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/uprobes.h10
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/traps.c110
-rw-r--r--arch/x86/kernel/uprobes.c506
5 files changed, 351 insertions, 283 deletions
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 58d66fe06b61..a7b212db9e04 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -98,7 +98,6 @@ static inline int get_si_code(unsigned long condition)
98 98
99extern int panic_on_unrecovered_nmi; 99extern int panic_on_unrecovered_nmi;
100 100
101void math_error(struct pt_regs *, int, int);
102void math_emulate(struct math_emu_info *); 101void math_emulate(struct math_emu_info *);
103#ifndef CONFIG_X86_32 102#ifndef CONFIG_X86_32
104asmlinkage void smp_thermal_interrupt(void); 103asmlinkage void smp_thermal_interrupt(void);
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 93bee7b93854..7be3c079e389 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -41,18 +41,18 @@ struct arch_uprobe {
41 u8 ixol[MAX_UINSN_BYTES]; 41 u8 ixol[MAX_UINSN_BYTES];
42 }; 42 };
43 43
44 u16 fixups;
45 const struct uprobe_xol_ops *ops; 44 const struct uprobe_xol_ops *ops;
46 45
47 union { 46 union {
48#ifdef CONFIG_X86_64
49 unsigned long rip_rela_target_address;
50#endif
51 struct { 47 struct {
52 s32 offs; 48 s32 offs;
53 u8 ilen; 49 u8 ilen;
54 u8 opc1; 50 u8 opc1;
55 } branch; 51 } branch;
52 struct {
53 u8 fixups;
54 u8 ilen;
55 } def;
56 }; 56 };
57}; 57};
58 58
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c0280f93d05..9b53940981b7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32)
413 set_thread_flag(TIF_ADDR32); 413 set_thread_flag(TIF_ADDR32);
414 414
415 /* Mark the associated mm as containing 32-bit tasks. */ 415 /* Mark the associated mm as containing 32-bit tasks. */
416 if (current->mm)
417 current->mm->context.ia32_compat = 1;
418
419 if (x32) { 416 if (x32) {
420 clear_thread_flag(TIF_IA32); 417 clear_thread_flag(TIF_IA32);
421 set_thread_flag(TIF_X32); 418 set_thread_flag(TIF_X32);
419 if (current->mm)
420 current->mm->context.ia32_compat = TIF_X32;
422 current->personality &= ~READ_IMPLIES_EXEC; 421 current->personality &= ~READ_IMPLIES_EXEC;
423 /* is_compat_task() uses the presence of the x32 422 /* is_compat_task() uses the presence of the x32
424 syscall bit flag to determine compat status */ 423 syscall bit flag to determine compat status */
@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32)
426 } else { 425 } else {
427 set_thread_flag(TIF_IA32); 426 set_thread_flag(TIF_IA32);
428 clear_thread_flag(TIF_X32); 427 clear_thread_flag(TIF_X32);
428 if (current->mm)
429 current->mm->context.ia32_compat = TIF_IA32;
429 current->personality |= force_personality32; 430 current->personality |= force_personality32;
430 /* Prepare the first "return" to user space */ 431 /* Prepare the first "return" to user space */
431 current_thread_info()->status |= TS_COMPAT; 432 current_thread_info()->status |= TS_COMPAT;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 57409f6b8c62..3fdb20548c4b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -23,6 +23,7 @@
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/ptrace.h> 25#include <linux/ptrace.h>
26#include <linux/uprobes.h>
26#include <linux/string.h> 27#include <linux/string.h>
27#include <linux/delay.h> 28#include <linux/delay.h>
28#include <linux/errno.h> 29#include <linux/errno.h>
@@ -136,6 +137,37 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
136 return -1; 137 return -1;
137} 138}
138 139
140static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
141 siginfo_t *info)
142{
143 unsigned long siaddr;
144 int sicode;
145
146 switch (trapnr) {
147 default:
148 return SEND_SIG_PRIV;
149
150 case X86_TRAP_DE:
151 sicode = FPE_INTDIV;
152 siaddr = uprobe_get_trap_addr(regs);
153 break;
154 case X86_TRAP_UD:
155 sicode = ILL_ILLOPN;
156 siaddr = uprobe_get_trap_addr(regs);
157 break;
158 case X86_TRAP_AC:
159 sicode = BUS_ADRALN;
160 siaddr = 0;
161 break;
162 }
163
164 info->si_signo = signr;
165 info->si_errno = 0;
166 info->si_code = sicode;
167 info->si_addr = (void __user *)siaddr;
168 return info;
169}
170
139static void __kprobes 171static void __kprobes
140do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, 172do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
141 long error_code, siginfo_t *info) 173 long error_code, siginfo_t *info)
@@ -168,60 +200,42 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
168 } 200 }
169#endif 201#endif
170 202
171 if (info) 203 force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
172 force_sig_info(signr, info, tsk);
173 else
174 force_sig(signr, tsk);
175} 204}
176 205
177#define DO_ERROR(trapnr, signr, str, name) \ 206static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
178dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 207 unsigned long trapnr, int signr)
179{ \ 208{
180 enum ctx_state prev_state; \ 209 enum ctx_state prev_state = exception_enter();
181 \ 210 siginfo_t info;
182 prev_state = exception_enter(); \ 211
183 if (notify_die(DIE_TRAP, str, regs, error_code, \ 212 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
184 trapnr, signr) == NOTIFY_STOP) { \ 213 NOTIFY_STOP) {
185 exception_exit(prev_state); \ 214 conditional_sti(regs);
186 return; \ 215 do_trap(trapnr, signr, str, regs, error_code,
187 } \ 216 fill_trap_info(regs, signr, trapnr, &info));
188 conditional_sti(regs); \ 217 }
189 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 218
190 exception_exit(prev_state); \ 219 exception_exit(prev_state);
191} 220}
192 221
193#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 222#define DO_ERROR(trapnr, signr, str, name) \
194dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 223dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
195{ \ 224{ \
196 siginfo_t info; \ 225 do_error_trap(regs, error_code, str, trapnr, signr); \
197 enum ctx_state prev_state; \
198 \
199 info.si_signo = signr; \
200 info.si_errno = 0; \
201 info.si_code = sicode; \
202 info.si_addr = (void __user *)siaddr; \
203 prev_state = exception_enter(); \
204 if (notify_die(DIE_TRAP, str, regs, error_code, \
205 trapnr, signr) == NOTIFY_STOP) { \
206 exception_exit(prev_state); \
207 return; \
208 } \
209 conditional_sti(regs); \
210 do_trap(trapnr, signr, str, regs, error_code, &info); \
211 exception_exit(prev_state); \
212} 226}
213 227
214DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) 228DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error)
215DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) 229DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow)
216DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) 230DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds)
217DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) 231DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
218DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) 232DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
219DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) 233DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
220DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) 234DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
221#ifdef CONFIG_X86_32 235#ifdef CONFIG_X86_32
222DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) 236DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
223#endif 237#endif
224DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) 238DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
225 239
226#ifdef CONFIG_X86_64 240#ifdef CONFIG_X86_64
227/* Runs on IST stack */ 241/* Runs on IST stack */
@@ -305,7 +319,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
305 pr_cont("\n"); 319 pr_cont("\n");
306 } 320 }
307 321
308 force_sig(SIGSEGV, tsk); 322 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
309exit: 323exit:
310 exception_exit(prev_state); 324 exception_exit(prev_state);
311} 325}
@@ -488,7 +502,7 @@ exit:
488 * the correct behaviour even in the presence of the asynchronous 502 * the correct behaviour even in the presence of the asynchronous
489 * IRQ13 behaviour 503 * IRQ13 behaviour
490 */ 504 */
491void math_error(struct pt_regs *regs, int error_code, int trapnr) 505static void math_error(struct pt_regs *regs, int error_code, int trapnr)
492{ 506{
493 struct task_struct *task = current; 507 struct task_struct *task = current;
494 siginfo_t info; 508 siginfo_t info;
@@ -518,7 +532,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
518 task->thread.error_code = error_code; 532 task->thread.error_code = error_code;
519 info.si_signo = SIGFPE; 533 info.si_signo = SIGFPE;
520 info.si_errno = 0; 534 info.si_errno = 0;
521 info.si_addr = (void __user *)regs->ip; 535 info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
522 if (trapnr == X86_TRAP_MF) { 536 if (trapnr == X86_TRAP_MF) {
523 unsigned short cwd, swd; 537 unsigned short cwd, swd;
524 /* 538 /*
@@ -645,7 +659,7 @@ void math_state_restore(void)
645 */ 659 */
646 if (unlikely(restore_fpu_checking(tsk))) { 660 if (unlikely(restore_fpu_checking(tsk))) {
647 drop_init_fpu(tsk); 661 drop_init_fpu(tsk);
648 force_sig(SIGSEGV, tsk); 662 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
649 return; 663 return;
650 } 664 }
651 665
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ace22916ade3..159ca520ef5b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -32,20 +32,20 @@
32 32
33/* Post-execution fixups. */ 33/* Post-execution fixups. */
34 34
35/* No fixup needed */
36#define UPROBE_FIX_NONE 0x0
37
38/* Adjust IP back to vicinity of actual insn */ 35/* Adjust IP back to vicinity of actual insn */
39#define UPROBE_FIX_IP 0x1 36#define UPROBE_FIX_IP 0x01
40 37
41/* Adjust the return address of a call insn */ 38/* Adjust the return address of a call insn */
42#define UPROBE_FIX_CALL 0x2 39#define UPROBE_FIX_CALL 0x02
43 40
44/* Instruction will modify TF, don't change it */ 41/* Instruction will modify TF, don't change it */
45#define UPROBE_FIX_SETF 0x4 42#define UPROBE_FIX_SETF 0x04
46 43
47#define UPROBE_FIX_RIP_AX 0x8000 44#define UPROBE_FIX_RIP_SI 0x08
48#define UPROBE_FIX_RIP_CX 0x4000 45#define UPROBE_FIX_RIP_DI 0x10
46#define UPROBE_FIX_RIP_BX 0x20
47#define UPROBE_FIX_RIP_MASK \
48 (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX)
49 49
50#define UPROBE_TRAP_NR UINT_MAX 50#define UPROBE_TRAP_NR UINT_MAX
51 51
@@ -67,6 +67,7 @@
67 * to keep gcc from statically optimizing it out, as variable_test_bit makes 67 * to keep gcc from statically optimizing it out, as variable_test_bit makes
68 * some versions of gcc to think only *(unsigned long*) is used. 68 * some versions of gcc to think only *(unsigned long*) is used.
69 */ 69 */
70#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
70static volatile u32 good_insns_32[256 / 32] = { 71static volatile u32 good_insns_32[256 / 32] = {
71 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 72 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
72 /* ---------------------------------------------- */ 73 /* ---------------------------------------------- */
@@ -89,33 +90,12 @@ static volatile u32 good_insns_32[256 / 32] = {
89 /* ---------------------------------------------- */ 90 /* ---------------------------------------------- */
90 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 91 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
91}; 92};
93#else
94#define good_insns_32 NULL
95#endif
92 96
93/* Using this for both 64-bit and 32-bit apps */
94static volatile u32 good_2byte_insns[256 / 32] = {
95 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
96 /* ---------------------------------------------- */
97 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
98 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
99 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
100 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
101 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
102 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
103 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
104 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
105 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
106 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
107 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
108 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
109 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
110 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
111 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
112 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
113 /* ---------------------------------------------- */
114 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
115};
116
117#ifdef CONFIG_X86_64
118/* Good-instruction tables for 64-bit apps */ 97/* Good-instruction tables for 64-bit apps */
98#if defined(CONFIG_X86_64)
119static volatile u32 good_insns_64[256 / 32] = { 99static volatile u32 good_insns_64[256 / 32] = {
120 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 100 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
121 /* ---------------------------------------------- */ 101 /* ---------------------------------------------- */
@@ -138,7 +118,33 @@ static volatile u32 good_insns_64[256 / 32] = {
138 /* ---------------------------------------------- */ 118 /* ---------------------------------------------- */
139 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 119 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
140}; 120};
121#else
122#define good_insns_64 NULL
141#endif 123#endif
124
125/* Using this for both 64-bit and 32-bit apps */
126static volatile u32 good_2byte_insns[256 / 32] = {
127 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
128 /* ---------------------------------------------- */
129 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
130 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
131 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
132 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
133 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
134 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
135 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
136 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
137 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
138 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
139 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
140 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
141 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
142 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
143 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
144 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
145 /* ---------------------------------------------- */
146 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
147};
142#undef W 148#undef W
143 149
144/* 150/*
@@ -209,16 +215,25 @@ static bool is_prefix_bad(struct insn *insn)
209 return false; 215 return false;
210} 216}
211 217
212static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) 218static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
213{ 219{
214 insn_init(insn, auprobe->insn, false); 220 u32 volatile *good_insns;
221
222 insn_init(insn, auprobe->insn, x86_64);
223 /* has the side-effect of processing the entire instruction */
224 insn_get_length(insn);
225 if (WARN_ON_ONCE(!insn_complete(insn)))
226 return -ENOEXEC;
215 227
216 /* Skip good instruction prefixes; reject "bad" ones. */
217 insn_get_opcode(insn);
218 if (is_prefix_bad(insn)) 228 if (is_prefix_bad(insn))
219 return -ENOTSUPP; 229 return -ENOTSUPP;
220 230
221 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) 231 if (x86_64)
232 good_insns = good_insns_64;
233 else
234 good_insns = good_insns_32;
235
236 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns))
222 return 0; 237 return 0;
223 238
224 if (insn->opcode.nbytes == 2) { 239 if (insn->opcode.nbytes == 2) {
@@ -230,14 +245,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
230} 245}
231 246
232#ifdef CONFIG_X86_64 247#ifdef CONFIG_X86_64
248static inline bool is_64bit_mm(struct mm_struct *mm)
249{
250 return !config_enabled(CONFIG_IA32_EMULATION) ||
251 !(mm->context.ia32_compat == TIF_IA32);
252}
233/* 253/*
234 * If arch_uprobe->insn doesn't use rip-relative addressing, return 254 * If arch_uprobe->insn doesn't use rip-relative addressing, return
235 * immediately. Otherwise, rewrite the instruction so that it accesses 255 * immediately. Otherwise, rewrite the instruction so that it accesses
236 * its memory operand indirectly through a scratch register. Set 256 * its memory operand indirectly through a scratch register. Set
237 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address 257 * def->fixups accordingly. (The contents of the scratch register
238 * accordingly. (The contents of the scratch register will be saved 258 * will be saved before we single-step the modified instruction,
239 * before we single-step the modified instruction, and restored 259 * and restored afterward).
240 * afterward.)
241 * 260 *
242 * We do this because a rip-relative instruction can access only a 261 * We do this because a rip-relative instruction can access only a
243 * relatively small area (+/- 2 GB from the instruction), and the XOL 262 * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -248,164 +267,192 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
248 * 267 *
249 * Some useful facts about rip-relative instructions: 268 * Some useful facts about rip-relative instructions:
250 * 269 *
251 * - There's always a modrm byte. 270 * - There's always a modrm byte with bit layout "00 reg 101".
252 * - There's never a SIB byte. 271 * - There's never a SIB byte.
253 * - The displacement is always 4 bytes. 272 * - The displacement is always 4 bytes.
273 * - REX.B=1 bit in REX prefix, which normally extends r/m field,
274 * has no effect on rip-relative mode. It doesn't make modrm byte
275 * with r/m=101 refer to register 1101 = R13.
254 */ 276 */
255static void 277static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
256handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
257{ 278{
258 u8 *cursor; 279 u8 *cursor;
259 u8 reg; 280 u8 reg;
281 u8 reg2;
260 282
261 if (!insn_rip_relative(insn)) 283 if (!insn_rip_relative(insn))
262 return; 284 return;
263 285
264 /* 286 /*
265 * insn_rip_relative() would have decoded rex_prefix, modrm. 287 * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm.
266 * Clear REX.b bit (extension of MODRM.rm field): 288 * Clear REX.b bit (extension of MODRM.rm field):
267 * we want to encode rax/rcx, not r8/r9. 289 * we want to encode low numbered reg, not r8+.
268 */ 290 */
269 if (insn->rex_prefix.nbytes) { 291 if (insn->rex_prefix.nbytes) {
270 cursor = auprobe->insn + insn_offset_rex_prefix(insn); 292 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
271 *cursor &= 0xfe; /* Clearing REX.B bit */ 293 /* REX byte has 0100wrxb layout, clearing REX.b bit */
294 *cursor &= 0xfe;
295 }
296 /*
297 * Similar treatment for VEX3 prefix.
298 * TODO: add XOP/EVEX treatment when insn decoder supports them
299 */
300 if (insn->vex_prefix.nbytes == 3) {
301 /*
302 * vex2: c5 rvvvvLpp (has no b bit)
303 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
304 * evex: 62 rxbR00mm wvvvv1pp zllBVaaa
305 * (evex will need setting of both b and x since
306 * in non-sib encoding evex.x is 4th bit of MODRM.rm)
307 * Setting VEX3.b (setting because it has inverted meaning):
308 */
309 cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
310 *cursor |= 0x20;
272 } 311 }
273 312
274 /* 313 /*
314 * Convert from rip-relative addressing to register-relative addressing
315 * via a scratch register.
316 *
317 * This is tricky since there are insns with modrm byte
318 * which also use registers not encoded in modrm byte:
319 * [i]div/[i]mul: implicitly use dx:ax
320 * shift ops: implicitly use cx
321 * cmpxchg: implicitly uses ax
322 * cmpxchg8/16b: implicitly uses dx:ax and bx:cx
323 * Encoding: 0f c7/1 modrm
324 * The code below thinks that reg=1 (cx), chooses si as scratch.
325 * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m.
326 * First appeared in Haswell (BMI2 insn). It is vex-encoded.
327 * Example where none of bx,cx,dx can be used as scratch reg:
328 * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx
329 * [v]pcmpistri: implicitly uses cx, xmm0
330 * [v]pcmpistrm: implicitly uses xmm0
331 * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0
332 * [v]pcmpestrm: implicitly uses ax, dx, xmm0
333 * Evil SSE4.2 string comparison ops from hell.
334 * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination.
335 * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm.
336 * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi).
337 * AMD says it has no 3-operand form (vex.vvvv must be 1111)
338 * and that it can have only register operands, not mem
339 * (its modrm byte must have mode=11).
340 * If these restrictions will ever be lifted,
341 * we'll need code to prevent selection of di as scratch reg!
342 *
343 * Summary: I don't know any insns with modrm byte which
344 * use SI register implicitly. DI register is used only
345 * by one insn (maskmovq) and BX register is used
346 * only by one too (cmpxchg8b).
347 * BP is stack-segment based (may be a problem?).
348 * AX, DX, CX are off-limits (many implicit users).
349 * SP is unusable (it's stack pointer - think about "pop mem";
350 * also, rsp+disp32 needs sib encoding -> insn length change).
351 */
352
353 reg = MODRM_REG(insn); /* Fetch modrm.reg */
354 reg2 = 0xff; /* Fetch vex.vvvv */
355 if (insn->vex_prefix.nbytes == 2)
356 reg2 = insn->vex_prefix.bytes[1];
357 else if (insn->vex_prefix.nbytes == 3)
358 reg2 = insn->vex_prefix.bytes[2];
359 /*
360 * TODO: add XOP, EXEV vvvv reading.
361 *
362 * vex.vvvv field is in bits 6-3, bits are inverted.
363 * But in 32-bit mode, high-order bit may be ignored.
364 * Therefore, let's consider only 3 low-order bits.
365 */
366 reg2 = ((reg2 >> 3) & 0x7) ^ 0x7;
367 /*
368 * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15.
369 *
370 * Choose scratch reg. Order is important: must not select bx
371 * if we can use si (cmpxchg8b case!)
372 */
373 if (reg != 6 && reg2 != 6) {
374 reg2 = 6;
375 auprobe->def.fixups |= UPROBE_FIX_RIP_SI;
376 } else if (reg != 7 && reg2 != 7) {
377 reg2 = 7;
378 auprobe->def.fixups |= UPROBE_FIX_RIP_DI;
379 /* TODO (paranoia): force maskmovq to not use di */
380 } else {
381 reg2 = 3;
382 auprobe->def.fixups |= UPROBE_FIX_RIP_BX;
383 }
384 /*
275 * Point cursor at the modrm byte. The next 4 bytes are the 385 * Point cursor at the modrm byte. The next 4 bytes are the
276 * displacement. Beyond the displacement, for some instructions, 386 * displacement. Beyond the displacement, for some instructions,
277 * is the immediate operand. 387 * is the immediate operand.
278 */ 388 */
279 cursor = auprobe->insn + insn_offset_modrm(insn); 389 cursor = auprobe->insn + insn_offset_modrm(insn);
280 insn_get_length(insn);
281
282 /* 390 /*
283 * Convert from rip-relative addressing to indirect addressing 391 * Change modrm from "00 reg 101" to "10 reg reg2". Example:
284 * via a scratch register. Change the r/m field from 0x5 (%rip) 392 * 89 05 disp32 mov %eax,disp32(%rip) becomes
285 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. 393 * 89 86 disp32 mov %eax,disp32(%rsi)
286 */ 394 */
287 reg = MODRM_REG(insn); 395 *cursor = 0x80 | (reg << 3) | reg2;
288 if (reg == 0) { 396}
289 /*
290 * The register operand (if any) is either the A register
291 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
292 * REX prefix) %r8. In any case, we know the C register
293 * is NOT the register operand, so we use %rcx (register
294 * #1) for the scratch register.
295 */
296 auprobe->fixups = UPROBE_FIX_RIP_CX;
297 /* Change modrm from 00 000 101 to 00 000 001. */
298 *cursor = 0x1;
299 } else {
300 /* Use %rax (register #0) for the scratch register. */
301 auprobe->fixups = UPROBE_FIX_RIP_AX;
302 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
303 *cursor = (reg << 3);
304 }
305
306 /* Target address = address of next instruction + (signed) offset */
307 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
308 397
309 /* Displacement field is gone; slide immediate field (if any) over. */ 398static inline unsigned long *
310 if (insn->immediate.nbytes) { 399scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
311 cursor++; 400{
312 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); 401 if (auprobe->def.fixups & UPROBE_FIX_RIP_SI)
313 } 402 return &regs->si;
403 if (auprobe->def.fixups & UPROBE_FIX_RIP_DI)
404 return &regs->di;
405 return &regs->bx;
314} 406}
315 407
316/* 408/*
317 * If we're emulating a rip-relative instruction, save the contents 409 * If we're emulating a rip-relative instruction, save the contents
318 * of the scratch register and store the target address in that register. 410 * of the scratch register and store the target address in that register.
319 */ 411 */
320static void 412static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
321pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
322 struct arch_uprobe_task *autask)
323{
324 if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
325 autask->saved_scratch_register = regs->ax;
326 regs->ax = current->utask->vaddr;
327 regs->ax += auprobe->rip_rela_target_address;
328 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
329 autask->saved_scratch_register = regs->cx;
330 regs->cx = current->utask->vaddr;
331 regs->cx += auprobe->rip_rela_target_address;
332 }
333}
334
335static void
336handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
337{ 413{
338 if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { 414 if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) {
339 struct arch_uprobe_task *autask; 415 struct uprobe_task *utask = current->utask;
340 416 unsigned long *sr = scratch_reg(auprobe, regs);
341 autask = &current->utask->autask;
342 if (auprobe->fixups & UPROBE_FIX_RIP_AX)
343 regs->ax = autask->saved_scratch_register;
344 else
345 regs->cx = autask->saved_scratch_register;
346 417
347 /* 418 utask->autask.saved_scratch_register = *sr;
348 * The original instruction includes a displacement, and so 419 *sr = utask->vaddr + auprobe->def.ilen;
349 * is 4 bytes longer than what we've just single-stepped.
350 * Caller may need to apply other fixups to handle stuff
351 * like "jmpq *...(%rip)" and "callq *...(%rip)".
352 */
353 if (correction)
354 *correction += 4;
355 } 420 }
356} 421}
357 422
358static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) 423static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
359{ 424{
360 insn_init(insn, auprobe->insn, true); 425 if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) {
361 426 struct uprobe_task *utask = current->utask;
362 /* Skip good instruction prefixes; reject "bad" ones. */ 427 unsigned long *sr = scratch_reg(auprobe, regs);
363 insn_get_opcode(insn);
364 if (is_prefix_bad(insn))
365 return -ENOTSUPP;
366 428
367 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) 429 *sr = utask->autask.saved_scratch_register;
368 return 0;
369
370 if (insn->opcode.nbytes == 2) {
371 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
372 return 0;
373 } 430 }
374 return -ENOTSUPP;
375} 431}
376 432#else /* 32-bit: */
377static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) 433static inline bool is_64bit_mm(struct mm_struct *mm)
378{ 434{
379 if (mm->context.ia32_compat) 435 return false;
380 return validate_insn_32bits(auprobe, insn);
381 return validate_insn_64bits(auprobe, insn);
382} 436}
383#else /* 32-bit: */
384/* 437/*
385 * No RIP-relative addressing on 32-bit 438 * No RIP-relative addressing on 32-bit
386 */ 439 */
387static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) 440static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
388{ 441{
389} 442}
390static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, 443static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
391 struct arch_uprobe_task *autask)
392{ 444{
393} 445}
394static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, 446static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
395 long *correction)
396{ 447{
397} 448}
398
399static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
400{
401 return validate_insn_32bits(auprobe, insn);
402}
403#endif /* CONFIG_X86_64 */ 449#endif /* CONFIG_X86_64 */
404 450
405struct uprobe_xol_ops { 451struct uprobe_xol_ops {
406 bool (*emulate)(struct arch_uprobe *, struct pt_regs *); 452 bool (*emulate)(struct arch_uprobe *, struct pt_regs *);
407 int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); 453 int (*pre_xol)(struct arch_uprobe *, struct pt_regs *);
408 int (*post_xol)(struct arch_uprobe *, struct pt_regs *); 454 int (*post_xol)(struct arch_uprobe *, struct pt_regs *);
455 void (*abort)(struct arch_uprobe *, struct pt_regs *);
409}; 456};
410 457
411static inline int sizeof_long(void) 458static inline int sizeof_long(void)
@@ -415,50 +462,67 @@ static inline int sizeof_long(void)
415 462
416static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 463static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
417{ 464{
418 pre_xol_rip_insn(auprobe, regs, &current->utask->autask); 465 riprel_pre_xol(auprobe, regs);
419 return 0; 466 return 0;
420} 467}
421 468
422/* 469static int push_ret_address(struct pt_regs *regs, unsigned long ip)
423 * Adjust the return address pushed by a call insn executed out of line.
424 */
425static int adjust_ret_addr(unsigned long sp, long correction)
426{ 470{
427 int rasize = sizeof_long(); 471 unsigned long new_sp = regs->sp - sizeof_long();
428 long ra;
429
430 if (copy_from_user(&ra, (void __user *)sp, rasize))
431 return -EFAULT;
432 472
433 ra += correction; 473 if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
434 if (copy_to_user((void __user *)sp, &ra, rasize))
435 return -EFAULT; 474 return -EFAULT;
436 475
476 regs->sp = new_sp;
437 return 0; 477 return 0;
438} 478}
439 479
480/*
481 * We have to fix things up as follows:
482 *
483 * Typically, the new ip is relative to the copied instruction. We need
484 * to make it relative to the original instruction (FIX_IP). Exceptions
485 * are return instructions and absolute or indirect jump or call instructions.
486 *
487 * If the single-stepped instruction was a call, the return address that
488 * is atop the stack is the address following the copied instruction. We
489 * need to make it the address following the original instruction (FIX_CALL).
490 *
491 * If the original instruction was a rip-relative instruction such as
492 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
493 * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)".
494 * We need to restore the contents of the scratch register
495 * (FIX_RIP_reg).
496 */
440static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 497static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
441{ 498{
442 struct uprobe_task *utask = current->utask; 499 struct uprobe_task *utask = current->utask;
443 long correction = (long)(utask->vaddr - utask->xol_vaddr);
444 500
445 handle_riprel_post_xol(auprobe, regs, &correction); 501 riprel_post_xol(auprobe, regs);
446 if (auprobe->fixups & UPROBE_FIX_IP) 502 if (auprobe->def.fixups & UPROBE_FIX_IP) {
503 long correction = utask->vaddr - utask->xol_vaddr;
447 regs->ip += correction; 504 regs->ip += correction;
448 505 } else if (auprobe->def.fixups & UPROBE_FIX_CALL) {
449 if (auprobe->fixups & UPROBE_FIX_CALL) { 506 regs->sp += sizeof_long();
450 if (adjust_ret_addr(regs->sp, correction)) { 507 if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen))
451 regs->sp += sizeof_long();
452 return -ERESTART; 508 return -ERESTART;
453 }
454 } 509 }
510 /* popf; tell the caller to not touch TF */
511 if (auprobe->def.fixups & UPROBE_FIX_SETF)
512 utask->autask.saved_tf = true;
455 513
456 return 0; 514 return 0;
457} 515}
458 516
517static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
518{
519 riprel_post_xol(auprobe, regs);
520}
521
459static struct uprobe_xol_ops default_xol_ops = { 522static struct uprobe_xol_ops default_xol_ops = {
460 .pre_xol = default_pre_xol_op, 523 .pre_xol = default_pre_xol_op,
461 .post_xol = default_post_xol_op, 524 .post_xol = default_post_xol_op,
525 .abort = default_abort_op,
462}; 526};
463 527
464static bool branch_is_call(struct arch_uprobe *auprobe) 528static bool branch_is_call(struct arch_uprobe *auprobe)
@@ -520,7 +584,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
520 unsigned long offs = (long)auprobe->branch.offs; 584 unsigned long offs = (long)auprobe->branch.offs;
521 585
522 if (branch_is_call(auprobe)) { 586 if (branch_is_call(auprobe)) {
523 unsigned long new_sp = regs->sp - sizeof_long();
524 /* 587 /*
525 * If it fails we execute this (mangled, see the comment in 588 * If it fails we execute this (mangled, see the comment in
526 * branch_clear_offset) insn out-of-line. In the likely case 589 * branch_clear_offset) insn out-of-line. In the likely case
@@ -530,9 +593,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
530 * 593 *
531 * But there is corner case, see the comment in ->post_xol(). 594 * But there is corner case, see the comment in ->post_xol().
532 */ 595 */
533 if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) 596 if (push_ret_address(regs, new_ip))
534 return false; 597 return false;
535 regs->sp = new_sp;
536 } else if (!check_jmp_cond(auprobe, regs)) { 598 } else if (!check_jmp_cond(auprobe, regs)) {
537 offs = 0; 599 offs = 0;
538 } 600 }
@@ -583,11 +645,7 @@ static struct uprobe_xol_ops branch_xol_ops = {
583static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) 645static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
584{ 646{
585 u8 opc1 = OPCODE1(insn); 647 u8 opc1 = OPCODE1(insn);
586 648 int i;
587 /* has the side-effect of processing the entire instruction */
588 insn_get_length(insn);
589 if (WARN_ON_ONCE(!insn_complete(insn)))
590 return -ENOEXEC;
591 649
592 switch (opc1) { 650 switch (opc1) {
593 case 0xeb: /* jmp 8 */ 651 case 0xeb: /* jmp 8 */
@@ -612,6 +670,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
612 return -ENOSYS; 670 return -ENOSYS;
613 } 671 }
614 672
673 /*
674 * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported.
675 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
676 * No one uses these insns, reject any branch insns with such prefix.
677 */
678 for (i = 0; i < insn->prefixes.nbytes; i++) {
679 if (insn->prefixes.bytes[i] == 0x66)
680 return -ENOTSUPP;
681 }
682
615 auprobe->branch.opc1 = opc1; 683 auprobe->branch.opc1 = opc1;
616 auprobe->branch.ilen = insn->length; 684 auprobe->branch.ilen = insn->length;
617 auprobe->branch.offs = insn->immediate.value; 685 auprobe->branch.offs = insn->immediate.value;
@@ -630,10 +698,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
630int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) 698int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
631{ 699{
632 struct insn insn; 700 struct insn insn;
633 bool fix_ip = true, fix_call = false; 701 u8 fix_ip_or_call = UPROBE_FIX_IP;
634 int ret; 702 int ret;
635 703
636 ret = validate_insn_bits(auprobe, mm, &insn); 704 ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
637 if (ret) 705 if (ret)
638 return ret; 706 return ret;
639 707
@@ -642,44 +710,40 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
642 return ret; 710 return ret;
643 711
644 /* 712 /*
645 * Figure out which fixups arch_uprobe_post_xol() will need to perform, 713 * Figure out which fixups default_post_xol_op() will need to perform,
646 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups 714 * and annotate def->fixups accordingly. To start with, ->fixups is
647 * is either zero or it reflects rip-related fixups. 715 * either zero or it reflects rip-related fixups.
648 */ 716 */
649 switch (OPCODE1(&insn)) { 717 switch (OPCODE1(&insn)) {
650 case 0x9d: /* popf */ 718 case 0x9d: /* popf */
651 auprobe->fixups |= UPROBE_FIX_SETF; 719 auprobe->def.fixups |= UPROBE_FIX_SETF;
652 break; 720 break;
653 case 0xc3: /* ret or lret -- ip is correct */ 721 case 0xc3: /* ret or lret -- ip is correct */
654 case 0xcb: 722 case 0xcb:
655 case 0xc2: 723 case 0xc2:
656 case 0xca: 724 case 0xca:
657 fix_ip = false; 725 case 0xea: /* jmp absolute -- ip is correct */
726 fix_ip_or_call = 0;
658 break; 727 break;
659 case 0x9a: /* call absolute - Fix return addr, not ip */ 728 case 0x9a: /* call absolute - Fix return addr, not ip */
660 fix_call = true; 729 fix_ip_or_call = UPROBE_FIX_CALL;
661 fix_ip = false;
662 break;
663 case 0xea: /* jmp absolute -- ip is correct */
664 fix_ip = false;
665 break; 730 break;
666 case 0xff: 731 case 0xff:
667 insn_get_modrm(&insn);
668 switch (MODRM_REG(&insn)) { 732 switch (MODRM_REG(&insn)) {
669 case 2: case 3: /* call or lcall, indirect */ 733 case 2: case 3: /* call or lcall, indirect */
670 fix_call = true; 734 fix_ip_or_call = UPROBE_FIX_CALL;
735 break;
671 case 4: case 5: /* jmp or ljmp, indirect */ 736 case 4: case 5: /* jmp or ljmp, indirect */
672 fix_ip = false; 737 fix_ip_or_call = 0;
738 break;
673 } 739 }
674 /* fall through */ 740 /* fall through */
675 default: 741 default:
676 handle_riprel_insn(auprobe, &insn); 742 riprel_analyze(auprobe, &insn);
677 } 743 }
678 744
679 if (fix_ip) 745 auprobe->def.ilen = insn.length;
680 auprobe->fixups |= UPROBE_FIX_IP; 746 auprobe->def.fixups |= fix_ip_or_call;
681 if (fix_call)
682 auprobe->fixups |= UPROBE_FIX_CALL;
683 747
684 auprobe->ops = &default_xol_ops; 748 auprobe->ops = &default_xol_ops;
685 return 0; 749 return 0;
@@ -694,6 +758,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
694{ 758{
695 struct uprobe_task *utask = current->utask; 759 struct uprobe_task *utask = current->utask;
696 760
761 if (auprobe->ops->pre_xol) {
762 int err = auprobe->ops->pre_xol(auprobe, regs);
763 if (err)
764 return err;
765 }
766
697 regs->ip = utask->xol_vaddr; 767 regs->ip = utask->xol_vaddr;
698 utask->autask.saved_trap_nr = current->thread.trap_nr; 768 utask->autask.saved_trap_nr = current->thread.trap_nr;
699 current->thread.trap_nr = UPROBE_TRAP_NR; 769 current->thread.trap_nr = UPROBE_TRAP_NR;
@@ -703,8 +773,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
703 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) 773 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
704 set_task_blockstep(current, false); 774 set_task_blockstep(current, false);
705 775
706 if (auprobe->ops->pre_xol)
707 return auprobe->ops->pre_xol(auprobe, regs);
708 return 0; 776 return 0;
709} 777}
710 778
@@ -732,56 +800,42 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
732 * single-step, we single-stepped a copy of the instruction. 800 * single-step, we single-stepped a copy of the instruction.
733 * 801 *
734 * This function prepares to resume execution after the single-step. 802 * This function prepares to resume execution after the single-step.
735 * We have to fix things up as follows:
736 *
737 * Typically, the new ip is relative to the copied instruction. We need
738 * to make it relative to the original instruction (FIX_IP). Exceptions
739 * are return instructions and absolute or indirect jump or call instructions.
740 *
741 * If the single-stepped instruction was a call, the return address that
742 * is atop the stack is the address following the copied instruction. We
743 * need to make it the address following the original instruction (FIX_CALL).
744 *
745 * If the original instruction was a rip-relative instruction such as
746 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
747 * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
748 * We need to restore the contents of the scratch register and adjust
749 * the ip, keeping in mind that the instruction we executed is 4 bytes
750 * shorter than the original instruction (since we squeezed out the offset
751 * field). (FIX_RIP_AX or FIX_RIP_CX)
752 */ 803 */
753int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 804int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
754{ 805{
755 struct uprobe_task *utask = current->utask; 806 struct uprobe_task *utask = current->utask;
807 bool send_sigtrap = utask->autask.saved_tf;
808 int err = 0;
756 809
757 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); 810 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
811 current->thread.trap_nr = utask->autask.saved_trap_nr;
758 812
759 if (auprobe->ops->post_xol) { 813 if (auprobe->ops->post_xol) {
760 int err = auprobe->ops->post_xol(auprobe, regs); 814 err = auprobe->ops->post_xol(auprobe, regs);
761 if (err) { 815 if (err) {
762 arch_uprobe_abort_xol(auprobe, regs);
763 /* 816 /*
764 * Restart the probed insn. ->post_xol() must ensure 817 * Restore ->ip for restart or post mortem analysis.
765 * this is really possible if it returns -ERESTART. 818 * ->post_xol() must not return -ERESTART unless this
819 * is really possible.
766 */ 820 */
821 regs->ip = utask->vaddr;
767 if (err == -ERESTART) 822 if (err == -ERESTART)
768 return 0; 823 err = 0;
769 return err; 824 send_sigtrap = false;
770 } 825 }
771 } 826 }
772
773 current->thread.trap_nr = utask->autask.saved_trap_nr;
774 /* 827 /*
775 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP 828 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
776 * so we can get an extra SIGTRAP if we do not clear TF. We need 829 * so we can get an extra SIGTRAP if we do not clear TF. We need
777 * to examine the opcode to make it right. 830 * to examine the opcode to make it right.
778 */ 831 */
779 if (utask->autask.saved_tf) 832 if (send_sigtrap)
780 send_sig(SIGTRAP, current, 0); 833 send_sig(SIGTRAP, current, 0);
781 else if (!(auprobe->fixups & UPROBE_FIX_SETF)) 834
835 if (!utask->autask.saved_tf)
782 regs->flags &= ~X86_EFLAGS_TF; 836 regs->flags &= ~X86_EFLAGS_TF;
783 837
784 return 0; 838 return err;
785} 839}
786 840
787/* callback routine for handling exceptions. */ 841/* callback routine for handling exceptions. */
@@ -815,18 +869,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
815 869
816/* 870/*
817 * This function gets called when XOL instruction either gets trapped or 871 * This function gets called when XOL instruction either gets trapped or
818 * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. 872 * the thread has a fatal signal. Reset the instruction pointer to its
819 * Reset the instruction pointer to its probed address for the potential 873 * probed address for the potential restart or for post mortem analysis.
820 * restart or for post mortem analysis.
821 */ 874 */
822void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 875void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
823{ 876{
824 struct uprobe_task *utask = current->utask; 877 struct uprobe_task *utask = current->utask;
825 878
826 current->thread.trap_nr = utask->autask.saved_trap_nr; 879 if (auprobe->ops->abort)
827 handle_riprel_post_xol(auprobe, regs, NULL); 880 auprobe->ops->abort(auprobe, regs);
828 instruction_pointer_set(regs, utask->vaddr);
829 881
882 current->thread.trap_nr = utask->autask.saved_trap_nr;
883 regs->ip = utask->vaddr;
830 /* clear TF if it was set by us in arch_uprobe_pre_xol() */ 884 /* clear TF if it was set by us in arch_uprobe_pre_xol() */
831 if (!utask->autask.saved_tf) 885 if (!utask->autask.saved_tf)
832 regs->flags &= ~X86_EFLAGS_TF; 886 regs->flags &= ~X86_EFLAGS_TF;