aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-05-05 04:59:47 -0400
committerIngo Molnar <mingo@kernel.org>2014-05-05 04:59:47 -0400
commit8e02ae573e6ae86930d08662790827a938203e70 (patch)
treeef87732e10e1c4162c45774274da07b32bee32ef
parent3617660e4e1618a888a2e3a4067224534302cb33 (diff)
parent13f59c5e45be59665c11ddde19799b6295543b7d (diff)
Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/uprobes
Pull uprobes updates from Oleg Nesterov: "This hopefully completes the previous 'fix the handling of relative jmp/call's' series, all changes except the last 3 unrelated fixes try to address TODO's mentioned in the changelogs." Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/include/asm/uprobes.h13
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/uprobes.c333
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/trace/trace_uprobe.c46
5 files changed, 200 insertions, 201 deletions
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 93bee7b93854..a040d493a4f9 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -41,18 +41,21 @@ struct arch_uprobe {
41 u8 ixol[MAX_UINSN_BYTES]; 41 u8 ixol[MAX_UINSN_BYTES];
42 }; 42 };
43 43
44 u16 fixups;
45 const struct uprobe_xol_ops *ops; 44 const struct uprobe_xol_ops *ops;
46 45
47 union { 46 union {
48#ifdef CONFIG_X86_64
49 unsigned long rip_rela_target_address;
50#endif
51 struct { 47 struct {
52 s32 offs; 48 s32 offs;
53 u8 ilen; 49 u8 ilen;
54 u8 opc1; 50 u8 opc1;
55 } branch; 51 } branch;
52 struct {
53#ifdef CONFIG_X86_64
54 long riprel_target;
55#endif
56 u8 fixups;
57 u8 ilen;
58 } def;
56 }; 59 };
57}; 60};
58 61
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c0280f93d05..9b53940981b7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32)
413 set_thread_flag(TIF_ADDR32); 413 set_thread_flag(TIF_ADDR32);
414 414
415 /* Mark the associated mm as containing 32-bit tasks. */ 415 /* Mark the associated mm as containing 32-bit tasks. */
416 if (current->mm)
417 current->mm->context.ia32_compat = 1;
418
419 if (x32) { 416 if (x32) {
420 clear_thread_flag(TIF_IA32); 417 clear_thread_flag(TIF_IA32);
421 set_thread_flag(TIF_X32); 418 set_thread_flag(TIF_X32);
419 if (current->mm)
420 current->mm->context.ia32_compat = TIF_X32;
422 current->personality &= ~READ_IMPLIES_EXEC; 421 current->personality &= ~READ_IMPLIES_EXEC;
423 /* is_compat_task() uses the presence of the x32 422 /* is_compat_task() uses the presence of the x32
424 syscall bit flag to determine compat status */ 423 syscall bit flag to determine compat status */
@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32)
426 } else { 425 } else {
427 set_thread_flag(TIF_IA32); 426 set_thread_flag(TIF_IA32);
428 clear_thread_flag(TIF_X32); 427 clear_thread_flag(TIF_X32);
428 if (current->mm)
429 current->mm->context.ia32_compat = TIF_IA32;
429 current->personality |= force_personality32; 430 current->personality |= force_personality32;
430 /* Prepare the first "return" to user space */ 431 /* Prepare the first "return" to user space */
431 current_thread_info()->status |= TS_COMPAT; 432 current_thread_info()->status |= TS_COMPAT;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ace22916ade3..2ebadb252093 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -32,20 +32,17 @@
32 32
33/* Post-execution fixups. */ 33/* Post-execution fixups. */
34 34
35/* No fixup needed */
36#define UPROBE_FIX_NONE 0x0
37
38/* Adjust IP back to vicinity of actual insn */ 35/* Adjust IP back to vicinity of actual insn */
39#define UPROBE_FIX_IP 0x1 36#define UPROBE_FIX_IP 0x01
40 37
41/* Adjust the return address of a call insn */ 38/* Adjust the return address of a call insn */
42#define UPROBE_FIX_CALL 0x2 39#define UPROBE_FIX_CALL 0x02
43 40
44/* Instruction will modify TF, don't change it */ 41/* Instruction will modify TF, don't change it */
45#define UPROBE_FIX_SETF 0x4 42#define UPROBE_FIX_SETF 0x04
46 43
47#define UPROBE_FIX_RIP_AX 0x8000 44#define UPROBE_FIX_RIP_AX 0x08
48#define UPROBE_FIX_RIP_CX 0x4000 45#define UPROBE_FIX_RIP_CX 0x10
49 46
50#define UPROBE_TRAP_NR UINT_MAX 47#define UPROBE_TRAP_NR UINT_MAX
51 48
@@ -67,6 +64,7 @@
67 * to keep gcc from statically optimizing it out, as variable_test_bit makes 64 * to keep gcc from statically optimizing it out, as variable_test_bit makes
68 * some versions of gcc to think only *(unsigned long*) is used. 65 * some versions of gcc to think only *(unsigned long*) is used.
69 */ 66 */
67#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
70static volatile u32 good_insns_32[256 / 32] = { 68static volatile u32 good_insns_32[256 / 32] = {
71 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 69 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
72 /* ---------------------------------------------- */ 70 /* ---------------------------------------------- */
@@ -89,33 +87,12 @@ static volatile u32 good_insns_32[256 / 32] = {
89 /* ---------------------------------------------- */ 87 /* ---------------------------------------------- */
90 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 88 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
91}; 89};
90#else
91#define good_insns_32 NULL
92#endif
92 93
93/* Using this for both 64-bit and 32-bit apps */
94static volatile u32 good_2byte_insns[256 / 32] = {
95 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
96 /* ---------------------------------------------- */
97 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
98 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
99 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
100 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
101 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
102 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
103 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
104 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
105 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
106 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
107 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
108 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
109 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
110 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
111 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
112 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
113 /* ---------------------------------------------- */
114 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
115};
116
117#ifdef CONFIG_X86_64
118/* Good-instruction tables for 64-bit apps */ 94/* Good-instruction tables for 64-bit apps */
95#if defined(CONFIG_X86_64)
119static volatile u32 good_insns_64[256 / 32] = { 96static volatile u32 good_insns_64[256 / 32] = {
120 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 97 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
121 /* ---------------------------------------------- */ 98 /* ---------------------------------------------- */
@@ -138,7 +115,33 @@ static volatile u32 good_insns_64[256 / 32] = {
138 /* ---------------------------------------------- */ 115 /* ---------------------------------------------- */
139 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 116 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
140}; 117};
118#else
119#define good_insns_64 NULL
141#endif 120#endif
121
122/* Using this for both 64-bit and 32-bit apps */
123static volatile u32 good_2byte_insns[256 / 32] = {
124 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
125 /* ---------------------------------------------- */
126 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
127 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
128 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
129 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
130 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
131 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
132 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
133 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
134 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
135 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
136 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
137 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
138 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
139 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
140 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
141 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
142 /* ---------------------------------------------- */
143 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
144};
142#undef W 145#undef W
143 146
144/* 147/*
@@ -209,16 +212,25 @@ static bool is_prefix_bad(struct insn *insn)
209 return false; 212 return false;
210} 213}
211 214
212static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) 215static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
213{ 216{
214 insn_init(insn, auprobe->insn, false); 217 u32 volatile *good_insns;
218
219 insn_init(insn, auprobe->insn, x86_64);
220 /* has the side-effect of processing the entire instruction */
221 insn_get_length(insn);
222 if (WARN_ON_ONCE(!insn_complete(insn)))
223 return -ENOEXEC;
215 224
216 /* Skip good instruction prefixes; reject "bad" ones. */
217 insn_get_opcode(insn);
218 if (is_prefix_bad(insn)) 225 if (is_prefix_bad(insn))
219 return -ENOTSUPP; 226 return -ENOTSUPP;
220 227
221 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) 228 if (x86_64)
229 good_insns = good_insns_64;
230 else
231 good_insns = good_insns_32;
232
233 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns))
222 return 0; 234 return 0;
223 235
224 if (insn->opcode.nbytes == 2) { 236 if (insn->opcode.nbytes == 2) {
@@ -230,14 +242,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
230} 242}
231 243
232#ifdef CONFIG_X86_64 244#ifdef CONFIG_X86_64
245static inline bool is_64bit_mm(struct mm_struct *mm)
246{
247 return !config_enabled(CONFIG_IA32_EMULATION) ||
248 !(mm->context.ia32_compat == TIF_IA32);
249}
233/* 250/*
234 * If arch_uprobe->insn doesn't use rip-relative addressing, return 251 * If arch_uprobe->insn doesn't use rip-relative addressing, return
235 * immediately. Otherwise, rewrite the instruction so that it accesses 252 * immediately. Otherwise, rewrite the instruction so that it accesses
236 * its memory operand indirectly through a scratch register. Set 253 * its memory operand indirectly through a scratch register. Set
237 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address 254 * def->fixups and def->riprel_target accordingly. (The contents of the
238 * accordingly. (The contents of the scratch register will be saved 255 * scratch register will be saved before we single-step the modified
239 * before we single-step the modified instruction, and restored 256 * instruction, and restored afterward).
240 * afterward.)
241 * 257 *
242 * We do this because a rip-relative instruction can access only a 258 * We do this because a rip-relative instruction can access only a
243 * relatively small area (+/- 2 GB from the instruction), and the XOL 259 * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -252,8 +268,7 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
252 * - There's never a SIB byte. 268 * - There's never a SIB byte.
253 * - The displacement is always 4 bytes. 269 * - The displacement is always 4 bytes.
254 */ 270 */
255static void 271static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
256handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
257{ 272{
258 u8 *cursor; 273 u8 *cursor;
259 u8 reg; 274 u8 reg;
@@ -277,8 +292,6 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
277 * is the immediate operand. 292 * is the immediate operand.
278 */ 293 */
279 cursor = auprobe->insn + insn_offset_modrm(insn); 294 cursor = auprobe->insn + insn_offset_modrm(insn);
280 insn_get_length(insn);
281
282 /* 295 /*
283 * Convert from rip-relative addressing to indirect addressing 296 * Convert from rip-relative addressing to indirect addressing
284 * via a scratch register. Change the r/m field from 0x5 (%rip) 297 * via a scratch register. Change the r/m field from 0x5 (%rip)
@@ -293,18 +306,18 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
293 * is NOT the register operand, so we use %rcx (register 306 * is NOT the register operand, so we use %rcx (register
294 * #1) for the scratch register. 307 * #1) for the scratch register.
295 */ 308 */
296 auprobe->fixups = UPROBE_FIX_RIP_CX; 309 auprobe->def.fixups |= UPROBE_FIX_RIP_CX;
297 /* Change modrm from 00 000 101 to 00 000 001. */ 310 /* Change modrm from 00 000 101 to 00 000 001. */
298 *cursor = 0x1; 311 *cursor = 0x1;
299 } else { 312 } else {
300 /* Use %rax (register #0) for the scratch register. */ 313 /* Use %rax (register #0) for the scratch register. */
301 auprobe->fixups = UPROBE_FIX_RIP_AX; 314 auprobe->def.fixups |= UPROBE_FIX_RIP_AX;
302 /* Change modrm from 00 xxx 101 to 00 xxx 000 */ 315 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
303 *cursor = (reg << 3); 316 *cursor = (reg << 3);
304 } 317 }
305 318
306 /* Target address = address of next instruction + (signed) offset */ 319 /* Target address = address of next instruction + (signed) offset */
307 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; 320 auprobe->def.riprel_target = (long)insn->length + insn->displacement.value;
308 321
309 /* Displacement field is gone; slide immediate field (if any) over. */ 322 /* Displacement field is gone; slide immediate field (if any) over. */
310 if (insn->immediate.nbytes) { 323 if (insn->immediate.nbytes) {
@@ -313,37 +326,35 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
313 } 326 }
314} 327}
315 328
329static inline unsigned long *
330scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
331{
332 return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? &regs->ax : &regs->cx;
333}
334
316/* 335/*
317 * If we're emulating a rip-relative instruction, save the contents 336 * If we're emulating a rip-relative instruction, save the contents
318 * of the scratch register and store the target address in that register. 337 * of the scratch register and store the target address in that register.
319 */ 338 */
320static void 339static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
321pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, 340{
322 struct arch_uprobe_task *autask) 341 if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
323{ 342 struct uprobe_task *utask = current->utask;
324 if (auprobe->fixups & UPROBE_FIX_RIP_AX) { 343 unsigned long *sr = scratch_reg(auprobe, regs);
325 autask->saved_scratch_register = regs->ax; 344
326 regs->ax = current->utask->vaddr; 345 utask->autask.saved_scratch_register = *sr;
327 regs->ax += auprobe->rip_rela_target_address; 346 *sr = utask->vaddr + auprobe->def.riprel_target;
328 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
329 autask->saved_scratch_register = regs->cx;
330 regs->cx = current->utask->vaddr;
331 regs->cx += auprobe->rip_rela_target_address;
332 } 347 }
333} 348}
334 349
335static void 350static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
336handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) 351 long *correction)
337{ 352{
338 if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { 353 if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
339 struct arch_uprobe_task *autask; 354 struct uprobe_task *utask = current->utask;
340 355 unsigned long *sr = scratch_reg(auprobe, regs);
341 autask = &current->utask->autask;
342 if (auprobe->fixups & UPROBE_FIX_RIP_AX)
343 regs->ax = autask->saved_scratch_register;
344 else
345 regs->cx = autask->saved_scratch_register;
346 356
357 *sr = utask->autask.saved_scratch_register;
347 /* 358 /*
348 * The original instruction includes a displacement, and so 359 * The original instruction includes a displacement, and so
349 * is 4 bytes longer than what we've just single-stepped. 360 * is 4 bytes longer than what we've just single-stepped.
@@ -354,58 +365,31 @@ handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *
354 *correction += 4; 365 *correction += 4;
355 } 366 }
356} 367}
357 368#else /* 32-bit: */
358static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) 369static inline bool is_64bit_mm(struct mm_struct *mm)
359{
360 insn_init(insn, auprobe->insn, true);
361
362 /* Skip good instruction prefixes; reject "bad" ones. */
363 insn_get_opcode(insn);
364 if (is_prefix_bad(insn))
365 return -ENOTSUPP;
366
367 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
368 return 0;
369
370 if (insn->opcode.nbytes == 2) {
371 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
372 return 0;
373 }
374 return -ENOTSUPP;
375}
376
377static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
378{ 370{
379 if (mm->context.ia32_compat) 371 return false;
380 return validate_insn_32bits(auprobe, insn);
381 return validate_insn_64bits(auprobe, insn);
382} 372}
383#else /* 32-bit: */
384/* 373/*
385 * No RIP-relative addressing on 32-bit 374 * No RIP-relative addressing on 32-bit
386 */ 375 */
387static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) 376static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
388{ 377{
389} 378}
390static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, 379static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
391 struct arch_uprobe_task *autask)
392{ 380{
393} 381}
394static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, 382static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
395 long *correction) 383 long *correction)
396{ 384{
397} 385}
398
399static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
400{
401 return validate_insn_32bits(auprobe, insn);
402}
403#endif /* CONFIG_X86_64 */ 386#endif /* CONFIG_X86_64 */
404 387
405struct uprobe_xol_ops { 388struct uprobe_xol_ops {
406 bool (*emulate)(struct arch_uprobe *, struct pt_regs *); 389 bool (*emulate)(struct arch_uprobe *, struct pt_regs *);
407 int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); 390 int (*pre_xol)(struct arch_uprobe *, struct pt_regs *);
408 int (*post_xol)(struct arch_uprobe *, struct pt_regs *); 391 int (*post_xol)(struct arch_uprobe *, struct pt_regs *);
392 void (*abort)(struct arch_uprobe *, struct pt_regs *);
409}; 393};
410 394
411static inline int sizeof_long(void) 395static inline int sizeof_long(void)
@@ -415,25 +399,18 @@ static inline int sizeof_long(void)
415 399
416static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 400static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
417{ 401{
418 pre_xol_rip_insn(auprobe, regs, &current->utask->autask); 402 riprel_pre_xol(auprobe, regs);
419 return 0; 403 return 0;
420} 404}
421 405
422/* 406static int push_ret_address(struct pt_regs *regs, unsigned long ip)
423 * Adjust the return address pushed by a call insn executed out of line.
424 */
425static int adjust_ret_addr(unsigned long sp, long correction)
426{ 407{
427 int rasize = sizeof_long(); 408 unsigned long new_sp = regs->sp - sizeof_long();
428 long ra;
429 409
430 if (copy_from_user(&ra, (void __user *)sp, rasize)) 410 if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
431 return -EFAULT;
432
433 ra += correction;
434 if (copy_to_user((void __user *)sp, &ra, rasize))
435 return -EFAULT; 411 return -EFAULT;
436 412
413 regs->sp = new_sp;
437 return 0; 414 return 0;
438} 415}
439 416
@@ -442,23 +419,30 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
442 struct uprobe_task *utask = current->utask; 419 struct uprobe_task *utask = current->utask;
443 long correction = (long)(utask->vaddr - utask->xol_vaddr); 420 long correction = (long)(utask->vaddr - utask->xol_vaddr);
444 421
445 handle_riprel_post_xol(auprobe, regs, &correction); 422 riprel_post_xol(auprobe, regs, &correction);
446 if (auprobe->fixups & UPROBE_FIX_IP) 423 if (auprobe->def.fixups & UPROBE_FIX_IP) {
447 regs->ip += correction; 424 regs->ip += correction;
448 425 } else if (auprobe->def.fixups & UPROBE_FIX_CALL) {
449 if (auprobe->fixups & UPROBE_FIX_CALL) { 426 regs->sp += sizeof_long();
450 if (adjust_ret_addr(regs->sp, correction)) { 427 if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen))
451 regs->sp += sizeof_long();
452 return -ERESTART; 428 return -ERESTART;
453 }
454 } 429 }
430 /* popf; tell the caller to not touch TF */
431 if (auprobe->def.fixups & UPROBE_FIX_SETF)
432 utask->autask.saved_tf = true;
455 433
456 return 0; 434 return 0;
457} 435}
458 436
437static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
438{
439 riprel_post_xol(auprobe, regs, NULL);
440}
441
459static struct uprobe_xol_ops default_xol_ops = { 442static struct uprobe_xol_ops default_xol_ops = {
460 .pre_xol = default_pre_xol_op, 443 .pre_xol = default_pre_xol_op,
461 .post_xol = default_post_xol_op, 444 .post_xol = default_post_xol_op,
445 .abort = default_abort_op,
462}; 446};
463 447
464static bool branch_is_call(struct arch_uprobe *auprobe) 448static bool branch_is_call(struct arch_uprobe *auprobe)
@@ -520,7 +504,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
520 unsigned long offs = (long)auprobe->branch.offs; 504 unsigned long offs = (long)auprobe->branch.offs;
521 505
522 if (branch_is_call(auprobe)) { 506 if (branch_is_call(auprobe)) {
523 unsigned long new_sp = regs->sp - sizeof_long();
524 /* 507 /*
525 * If it fails we execute this (mangled, see the comment in 508 * If it fails we execute this (mangled, see the comment in
526 * branch_clear_offset) insn out-of-line. In the likely case 509 * branch_clear_offset) insn out-of-line. In the likely case
@@ -530,9 +513,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
530 * 513 *
531 * But there is corner case, see the comment in ->post_xol(). 514 * But there is corner case, see the comment in ->post_xol().
532 */ 515 */
533 if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) 516 if (push_ret_address(regs, new_ip))
534 return false; 517 return false;
535 regs->sp = new_sp;
536 } else if (!check_jmp_cond(auprobe, regs)) { 518 } else if (!check_jmp_cond(auprobe, regs)) {
537 offs = 0; 519 offs = 0;
538 } 520 }
@@ -583,11 +565,7 @@ static struct uprobe_xol_ops branch_xol_ops = {
583static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) 565static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
584{ 566{
585 u8 opc1 = OPCODE1(insn); 567 u8 opc1 = OPCODE1(insn);
586 568 int i;
587 /* has the side-effect of processing the entire instruction */
588 insn_get_length(insn);
589 if (WARN_ON_ONCE(!insn_complete(insn)))
590 return -ENOEXEC;
591 569
592 switch (opc1) { 570 switch (opc1) {
593 case 0xeb: /* jmp 8 */ 571 case 0xeb: /* jmp 8 */
@@ -612,6 +590,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
612 return -ENOSYS; 590 return -ENOSYS;
613 } 591 }
614 592
593 /*
594 * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported.
595 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
596 * No one uses these insns, reject any branch insns with such prefix.
597 */
598 for (i = 0; i < insn->prefixes.nbytes; i++) {
599 if (insn->prefixes.bytes[i] == 0x66)
600 return -ENOTSUPP;
601 }
602
615 auprobe->branch.opc1 = opc1; 603 auprobe->branch.opc1 = opc1;
616 auprobe->branch.ilen = insn->length; 604 auprobe->branch.ilen = insn->length;
617 auprobe->branch.offs = insn->immediate.value; 605 auprobe->branch.offs = insn->immediate.value;
@@ -630,10 +618,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
630int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) 618int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
631{ 619{
632 struct insn insn; 620 struct insn insn;
633 bool fix_ip = true, fix_call = false; 621 u8 fix_ip_or_call = UPROBE_FIX_IP;
634 int ret; 622 int ret;
635 623
636 ret = validate_insn_bits(auprobe, mm, &insn); 624 ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
637 if (ret) 625 if (ret)
638 return ret; 626 return ret;
639 627
@@ -642,44 +630,40 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
642 return ret; 630 return ret;
643 631
644 /* 632 /*
645 * Figure out which fixups arch_uprobe_post_xol() will need to perform, 633 * Figure out which fixups default_post_xol_op() will need to perform,
646 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups 634 * and annotate def->fixups accordingly. To start with, ->fixups is
647 * is either zero or it reflects rip-related fixups. 635 * either zero or it reflects rip-related fixups.
648 */ 636 */
649 switch (OPCODE1(&insn)) { 637 switch (OPCODE1(&insn)) {
650 case 0x9d: /* popf */ 638 case 0x9d: /* popf */
651 auprobe->fixups |= UPROBE_FIX_SETF; 639 auprobe->def.fixups |= UPROBE_FIX_SETF;
652 break; 640 break;
653 case 0xc3: /* ret or lret -- ip is correct */ 641 case 0xc3: /* ret or lret -- ip is correct */
654 case 0xcb: 642 case 0xcb:
655 case 0xc2: 643 case 0xc2:
656 case 0xca: 644 case 0xca:
657 fix_ip = false; 645 case 0xea: /* jmp absolute -- ip is correct */
646 fix_ip_or_call = 0;
658 break; 647 break;
659 case 0x9a: /* call absolute - Fix return addr, not ip */ 648 case 0x9a: /* call absolute - Fix return addr, not ip */
660 fix_call = true; 649 fix_ip_or_call = UPROBE_FIX_CALL;
661 fix_ip = false;
662 break;
663 case 0xea: /* jmp absolute -- ip is correct */
664 fix_ip = false;
665 break; 650 break;
666 case 0xff: 651 case 0xff:
667 insn_get_modrm(&insn);
668 switch (MODRM_REG(&insn)) { 652 switch (MODRM_REG(&insn)) {
669 case 2: case 3: /* call or lcall, indirect */ 653 case 2: case 3: /* call or lcall, indirect */
670 fix_call = true; 654 fix_ip_or_call = UPROBE_FIX_CALL;
655 break;
671 case 4: case 5: /* jmp or ljmp, indirect */ 656 case 4: case 5: /* jmp or ljmp, indirect */
672 fix_ip = false; 657 fix_ip_or_call = 0;
658 break;
673 } 659 }
674 /* fall through */ 660 /* fall through */
675 default: 661 default:
676 handle_riprel_insn(auprobe, &insn); 662 riprel_analyze(auprobe, &insn);
677 } 663 }
678 664
679 if (fix_ip) 665 auprobe->def.ilen = insn.length;
680 auprobe->fixups |= UPROBE_FIX_IP; 666 auprobe->def.fixups |= fix_ip_or_call;
681 if (fix_call)
682 auprobe->fixups |= UPROBE_FIX_CALL;
683 667
684 auprobe->ops = &default_xol_ops; 668 auprobe->ops = &default_xol_ops;
685 return 0; 669 return 0;
@@ -694,6 +678,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
694{ 678{
695 struct uprobe_task *utask = current->utask; 679 struct uprobe_task *utask = current->utask;
696 680
681 if (auprobe->ops->pre_xol) {
682 int err = auprobe->ops->pre_xol(auprobe, regs);
683 if (err)
684 return err;
685 }
686
697 regs->ip = utask->xol_vaddr; 687 regs->ip = utask->xol_vaddr;
698 utask->autask.saved_trap_nr = current->thread.trap_nr; 688 utask->autask.saved_trap_nr = current->thread.trap_nr;
699 current->thread.trap_nr = UPROBE_TRAP_NR; 689 current->thread.trap_nr = UPROBE_TRAP_NR;
@@ -703,8 +693,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
703 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) 693 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
704 set_task_blockstep(current, false); 694 set_task_blockstep(current, false);
705 695
706 if (auprobe->ops->pre_xol)
707 return auprobe->ops->pre_xol(auprobe, regs);
708 return 0; 696 return 0;
709} 697}
710 698
@@ -753,35 +741,38 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
753int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 741int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
754{ 742{
755 struct uprobe_task *utask = current->utask; 743 struct uprobe_task *utask = current->utask;
744 bool send_sigtrap = utask->autask.saved_tf;
745 int err = 0;
756 746
757 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); 747 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
748 current->thread.trap_nr = utask->autask.saved_trap_nr;
758 749
759 if (auprobe->ops->post_xol) { 750 if (auprobe->ops->post_xol) {
760 int err = auprobe->ops->post_xol(auprobe, regs); 751 err = auprobe->ops->post_xol(auprobe, regs);
761 if (err) { 752 if (err) {
762 arch_uprobe_abort_xol(auprobe, regs);
763 /* 753 /*
764 * Restart the probed insn. ->post_xol() must ensure 754 * Restore ->ip for restart or post mortem analysis.
765 * this is really possible if it returns -ERESTART. 755 * ->post_xol() must not return -ERESTART unless this
756 * is really possible.
766 */ 757 */
758 regs->ip = utask->vaddr;
767 if (err == -ERESTART) 759 if (err == -ERESTART)
768 return 0; 760 err = 0;
769 return err; 761 send_sigtrap = false;
770 } 762 }
771 } 763 }
772
773 current->thread.trap_nr = utask->autask.saved_trap_nr;
774 /* 764 /*
775 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP 765 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
776 * so we can get an extra SIGTRAP if we do not clear TF. We need 766 * so we can get an extra SIGTRAP if we do not clear TF. We need
777 * to examine the opcode to make it right. 767 * to examine the opcode to make it right.
778 */ 768 */
779 if (utask->autask.saved_tf) 769 if (send_sigtrap)
780 send_sig(SIGTRAP, current, 0); 770 send_sig(SIGTRAP, current, 0);
781 else if (!(auprobe->fixups & UPROBE_FIX_SETF)) 771
772 if (!utask->autask.saved_tf)
782 regs->flags &= ~X86_EFLAGS_TF; 773 regs->flags &= ~X86_EFLAGS_TF;
783 774
784 return 0; 775 return err;
785} 776}
786 777
787/* callback routine for handling exceptions. */ 778/* callback routine for handling exceptions. */
@@ -815,18 +806,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
815 806
816/* 807/*
817 * This function gets called when XOL instruction either gets trapped or 808 * This function gets called when XOL instruction either gets trapped or
818 * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. 809 * the thread has a fatal signal. Reset the instruction pointer to its
819 * Reset the instruction pointer to its probed address for the potential 810 * probed address for the potential restart or for post mortem analysis.
820 * restart or for post mortem analysis.
821 */ 811 */
822void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 812void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
823{ 813{
824 struct uprobe_task *utask = current->utask; 814 struct uprobe_task *utask = current->utask;
825 815
826 current->thread.trap_nr = utask->autask.saved_trap_nr; 816 if (auprobe->ops->abort)
827 handle_riprel_post_xol(auprobe, regs, NULL); 817 auprobe->ops->abort(auprobe, regs);
828 instruction_pointer_set(regs, utask->vaddr);
829 818
819 current->thread.trap_nr = utask->autask.saved_trap_nr;
820 regs->ip = utask->vaddr;
830 /* clear TF if it was set by us in arch_uprobe_pre_xol() */ 821 /* clear TF if it was set by us in arch_uprobe_pre_xol() */
831 if (!utask->autask.saved_tf) 822 if (!utask->autask.saved_tf)
832 regs->flags &= ~X86_EFLAGS_TF; 823 regs->flags &= ~X86_EFLAGS_TF;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d1edc5e6fd03..7716c40f2c50 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -127,7 +127,7 @@ struct xol_area {
127 */ 127 */
128static bool valid_vma(struct vm_area_struct *vma, bool is_register) 128static bool valid_vma(struct vm_area_struct *vma, bool is_register)
129{ 129{
130 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; 130 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
131 131
132 if (is_register) 132 if (is_register)
133 flags |= VM_WRITE; 133 flags |= VM_WRITE;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c082a7441345..5a7f1a6b3b8b 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1009,56 +1009,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
1009 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); 1009 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
1010} 1010}
1011 1011
1012static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) 1012static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
1013{ 1013{
1014 bool done; 1014 bool done;
1015 1015
1016 write_lock(&tu->filter.rwlock); 1016 write_lock(&tu->filter.rwlock);
1017 if (event->hw.tp_target) { 1017 if (event->hw.tp_target) {
1018 /* 1018 list_del(&event->hw.tp_list);
1019 * event->parent != NULL means copy_process(), we can avoid
1020 * uprobe_apply(). current->mm must be probed and we can rely
1021 * on dup_mmap() which preserves the already installed bp's.
1022 *
1023 * attr.enable_on_exec means that exec/mmap will install the
1024 * breakpoints we need.
1025 */
1026 done = tu->filter.nr_systemwide || 1019 done = tu->filter.nr_systemwide ||
1027 event->parent || event->attr.enable_on_exec || 1020 (event->hw.tp_target->flags & PF_EXITING) ||
1028 uprobe_filter_event(tu, event); 1021 uprobe_filter_event(tu, event);
1029 list_add(&event->hw.tp_list, &tu->filter.perf_events);
1030 } else { 1022 } else {
1023 tu->filter.nr_systemwide--;
1031 done = tu->filter.nr_systemwide; 1024 done = tu->filter.nr_systemwide;
1032 tu->filter.nr_systemwide++;
1033 } 1025 }
1034 write_unlock(&tu->filter.rwlock); 1026 write_unlock(&tu->filter.rwlock);
1035 1027
1036 if (!done) 1028 if (!done)
1037 uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); 1029 return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
1038 1030
1039 return 0; 1031 return 0;
1040} 1032}
1041 1033
1042static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) 1034static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
1043{ 1035{
1044 bool done; 1036 bool done;
1037 int err;
1045 1038
1046 write_lock(&tu->filter.rwlock); 1039 write_lock(&tu->filter.rwlock);
1047 if (event->hw.tp_target) { 1040 if (event->hw.tp_target) {
1048 list_del(&event->hw.tp_list); 1041 /*
1042 * event->parent != NULL means copy_process(), we can avoid
1043 * uprobe_apply(). current->mm must be probed and we can rely
1044 * on dup_mmap() which preserves the already installed bp's.
1045 *
1046 * attr.enable_on_exec means that exec/mmap will install the
1047 * breakpoints we need.
1048 */
1049 done = tu->filter.nr_systemwide || 1049 done = tu->filter.nr_systemwide ||
1050 (event->hw.tp_target->flags & PF_EXITING) || 1050 event->parent || event->attr.enable_on_exec ||
1051 uprobe_filter_event(tu, event); 1051 uprobe_filter_event(tu, event);
1052 list_add(&event->hw.tp_list, &tu->filter.perf_events);
1052 } else { 1053 } else {
1053 tu->filter.nr_systemwide--;
1054 done = tu->filter.nr_systemwide; 1054 done = tu->filter.nr_systemwide;
1055 tu->filter.nr_systemwide++;
1055 } 1056 }
1056 write_unlock(&tu->filter.rwlock); 1057 write_unlock(&tu->filter.rwlock);
1057 1058
1058 if (!done) 1059 err = 0;
1059 uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); 1060 if (!done) {
1060 1061 err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
1061 return 0; 1062 if (err)
1063 uprobe_perf_close(tu, event);
1064 }
1065 return err;
1062} 1066}
1063 1067
1064static bool uprobe_perf_filter(struct uprobe_consumer *uc, 1068static bool uprobe_perf_filter(struct uprobe_consumer *uc,