aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrikar Dronamraju <srikar@linux.vnet.ibm.com>2012-03-13 14:00:11 -0400
committerIngo Molnar <mingo@elte.hu>2012-03-14 02:41:36 -0400
commit0326f5a94ddea33fa331b2519f4172f4fb387baa (patch)
tree5485c637754a126c90852e5285842e8462d2826a
parentef15eda98217f5183f457e7a2de8b79555ef908b (diff)
uprobes/core: Handle breakpoint and singlestep exceptions
Uprobes uses exception notifiers to get to know if a thread hit a breakpoint or a singlestep exception. When a thread hits a uprobe or is singlestepping post a uprobe hit, the uprobe exception notifier sets its TIF_UPROBE bit, which will then be checked on its return to userspace path (do_notify_resume() ->uprobe_notify_resume()), where the consumers handlers are run (in task context) based on the defined filters. Uprobe hits are thread specific and hence we need to maintain information about if a task hit a uprobe, what uprobe was hit, the slot where the original instruction was copied for xol so that it can be singlestepped with appropriate fixups. In some cases, special care is needed for instructions that are executed out of line (xol). These are architecture specific artefacts, such as handling RIP relative instructions on x86_64. Since the instruction at which the uprobe was inserted is executed out of line, architecture specific fixups are added so that the thread continues normal execution in the presence of a uprobe. Postpone the signals until we execute the probed insn. post_xol() path does a recalc_sigpending() before return to user-mode, this ensures the signal can't be lost. Uprobes relies on DIE_DEBUG notification to notify if a singlestep is complete. Adds x86 specific uprobe exception notifiers and appropriate hooks needed to determine a uprobe hit and subsequent post processing. Add requisite x86 fixups for xol for uprobes. Specific cases needing fixups include relative jumps (x86_64), calls, etc. Where possible, we check and skip singlestepping the breakpointed instructions. For now we skip single byte as well as few multibyte nop instructions. However this can be extended to other instructions too. Credits to Oleg Nesterov for suggestions/patches related to signal, breakpoint, singlestep handling code. Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Jim Keniston <jkenisto@linux.vnet.ibm.com> Cc: Linux-mm <linux-mm@kvack.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Christoph Hellwig <hch@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20120313180011.29771.89027.sendpatchset@srdronam.in.ibm.com [ Performed various cleanliness edits ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/uprobes.h16
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/uprobes.c265
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/uprobes.h55
-rw-r--r--kernel/events/uprobes.c323
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/signal.c4
9 files changed, 664 insertions, 15 deletions
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6df8ccd715..0710c11305d4 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -85,6 +85,7 @@ struct thread_info {
85#define TIF_SECCOMP 8 /* secure computing */ 85#define TIF_SECCOMP 8 /* secure computing */
86#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ 86#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
87#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ 87#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
88#define TIF_UPROBE 12 /* breakpointed or singlestepping */
88#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 89#define TIF_NOTSC 16 /* TSC is not accessible in userland */
89#define TIF_IA32 17 /* IA32 compatibility process */ 90#define TIF_IA32 17 /* IA32 compatibility process */
90#define TIF_FORK 18 /* ret_from_fork */ 91#define TIF_FORK 18 /* ret_from_fork */
@@ -109,6 +110,7 @@ struct thread_info {
109#define _TIF_SECCOMP (1 << TIF_SECCOMP) 110#define _TIF_SECCOMP (1 << TIF_SECCOMP)
110#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) 111#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
111#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) 112#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
113#define _TIF_UPROBE (1 << TIF_UPROBE)
112#define _TIF_NOTSC (1 << TIF_NOTSC) 114#define _TIF_NOTSC (1 << TIF_NOTSC)
113#define _TIF_IA32 (1 << TIF_IA32) 115#define _TIF_IA32 (1 << TIF_IA32)
114#define _TIF_FORK (1 << TIF_FORK) 116#define _TIF_FORK (1 << TIF_FORK)
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 0500391f57d0..1e9bed14f7ae 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -23,6 +23,8 @@
23 * Jim Keniston 23 * Jim Keniston
24 */ 24 */
25 25
26#include <linux/notifier.h>
27
26typedef u8 uprobe_opcode_t; 28typedef u8 uprobe_opcode_t;
27 29
28#define MAX_UINSN_BYTES 16 30#define MAX_UINSN_BYTES 16
@@ -39,5 +41,17 @@ struct arch_uprobe {
39#endif 41#endif
40}; 42};
41 43
42extern int arch_uprobes_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm); 44struct arch_uprobe_task {
45 unsigned long saved_trap_nr;
46#ifdef CONFIG_X86_64
47 unsigned long saved_scratch_register;
48#endif
49};
50
51extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
52extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
53extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
54extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
55extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
56extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
43#endif /* _ASM_UPROBES_H */ 57#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 9c73acc1c860..b3cd6913ceea 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -18,6 +18,7 @@
18#include <linux/personality.h> 18#include <linux/personality.h>
19#include <linux/uaccess.h> 19#include <linux/uaccess.h>
20#include <linux/user-return-notifier.h> 20#include <linux/user-return-notifier.h>
21#include <linux/uprobes.h>
21 22
22#include <asm/processor.h> 23#include <asm/processor.h>
23#include <asm/ucontext.h> 24#include <asm/ucontext.h>
@@ -823,6 +824,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
823 mce_notify_process(); 824 mce_notify_process();
824#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ 825#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
825 826
827 if (thread_info_flags & _TIF_UPROBE) {
828 clear_thread_flag(TIF_UPROBE);
829 uprobe_notify_resume(regs);
830 }
831
826 /* deal with pending signal delivery */ 832 /* deal with pending signal delivery */
827 if (thread_info_flags & _TIF_SIGPENDING) 833 if (thread_info_flags & _TIF_SIGPENDING)
828 do_signal(regs); 834 do_signal(regs);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 851a11b0d38c..dc4e910a7d96 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -24,22 +24,28 @@
24#include <linux/sched.h> 24#include <linux/sched.h>
25#include <linux/ptrace.h> 25#include <linux/ptrace.h>
26#include <linux/uprobes.h> 26#include <linux/uprobes.h>
27#include <linux/uaccess.h>
27 28
28#include <linux/kdebug.h> 29#include <linux/kdebug.h>
30#include <asm/processor.h>
29#include <asm/insn.h> 31#include <asm/insn.h>
30 32
31/* Post-execution fixups. */ 33/* Post-execution fixups. */
32 34
33/* No fixup needed */ 35/* No fixup needed */
34#define UPROBE_FIX_NONE 0x0 36#define UPROBE_FIX_NONE 0x0
37
35/* Adjust IP back to vicinity of actual insn */ 38/* Adjust IP back to vicinity of actual insn */
36#define UPROBE_FIX_IP 0x1 39#define UPROBE_FIX_IP 0x1
40
37/* Adjust the return address of a call insn */ 41/* Adjust the return address of a call insn */
38#define UPROBE_FIX_CALL 0x2 42#define UPROBE_FIX_CALL 0x2
39 43
40#define UPROBE_FIX_RIP_AX 0x8000 44#define UPROBE_FIX_RIP_AX 0x8000
41#define UPROBE_FIX_RIP_CX 0x4000 45#define UPROBE_FIX_RIP_CX 0x4000
42 46
47#define UPROBE_TRAP_NR UINT_MAX
48
43/* Adaptations for mhiramat x86 decoder v14. */ 49/* Adaptations for mhiramat x86 decoder v14. */
44#define OPCODE1(insn) ((insn)->opcode.bytes[0]) 50#define OPCODE1(insn) ((insn)->opcode.bytes[0])
45#define OPCODE2(insn) ((insn)->opcode.bytes[1]) 51#define OPCODE2(insn) ((insn)->opcode.bytes[1])
@@ -221,10 +227,9 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
221} 227}
222 228
223/* 229/*
224 * Figure out which fixups post_xol() will need to perform, and annotate 230 * Figure out which fixups arch_uprobe_post_xol() will need to perform, and
225 * arch_uprobe->fixups accordingly. To start with, 231 * annotate arch_uprobe->fixups accordingly. To start with,
226 * arch_uprobe->fixups is either zero or it reflects rip-related 232 * arch_uprobe->fixups is either zero or it reflects rip-related fixups.
227 * fixups.
228 */ 233 */
229static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) 234static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
230{ 235{
@@ -401,12 +406,12 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
401#endif /* CONFIG_X86_64 */ 406#endif /* CONFIG_X86_64 */
402 407
403/** 408/**
404 * arch_uprobes_analyze_insn - instruction analysis including validity and fixups. 409 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
405 * @mm: the probed address space. 410 * @mm: the probed address space.
406 * @arch_uprobe: the probepoint information. 411 * @arch_uprobe: the probepoint information.
407 * Return 0 on success or a -ve number on error. 412 * Return 0 on success or a -ve number on error.
408 */ 413 */
409int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) 414int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
410{ 415{
411 int ret; 416 int ret;
412 struct insn insn; 417 struct insn insn;
@@ -421,3 +426,249 @@ int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
421 426
422 return 0; 427 return 0;
423} 428}
429
430#ifdef CONFIG_X86_64
431/*
432 * If we're emulating a rip-relative instruction, save the contents
433 * of the scratch register and store the target address in that register.
434 */
435static void
436pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
437 struct arch_uprobe_task *autask)
438{
439 if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
440 autask->saved_scratch_register = regs->ax;
441 regs->ax = current->utask->vaddr;
442 regs->ax += auprobe->rip_rela_target_address;
443 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
444 autask->saved_scratch_register = regs->cx;
445 regs->cx = current->utask->vaddr;
446 regs->cx += auprobe->rip_rela_target_address;
447 }
448}
449#else
450static void
451pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
452 struct arch_uprobe_task *autask)
453{
454 /* No RIP-relative addressing on 32-bit */
455}
456#endif
457
458/*
459 * arch_uprobe_pre_xol - prepare to execute out of line.
460 * @auprobe: the probepoint information.
461 * @regs: reflects the saved user state of current task.
462 */
463int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
464{
465 struct arch_uprobe_task *autask;
466
467 autask = &current->utask->autask;
468 autask->saved_trap_nr = current->thread.trap_nr;
469 current->thread.trap_nr = UPROBE_TRAP_NR;
470 regs->ip = current->utask->xol_vaddr;
471 pre_xol_rip_insn(auprobe, regs, autask);
472
473 return 0;
474}
475
476/*
477 * This function is called by arch_uprobe_post_xol() to adjust the return
478 * address pushed by a call instruction executed out of line.
479 */
480static int adjust_ret_addr(unsigned long sp, long correction)
481{
482 int rasize, ncopied;
483 long ra = 0;
484
485 if (is_ia32_task())
486 rasize = 4;
487 else
488 rasize = 8;
489
490 ncopied = copy_from_user(&ra, (void __user *)sp, rasize);
491 if (unlikely(ncopied))
492 return -EFAULT;
493
494 ra += correction;
495 ncopied = copy_to_user((void __user *)sp, &ra, rasize);
496 if (unlikely(ncopied))
497 return -EFAULT;
498
499 return 0;
500}
501
502#ifdef CONFIG_X86_64
503static bool is_riprel_insn(struct arch_uprobe *auprobe)
504{
505 return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0);
506}
507
508static void
509handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
510{
511 if (is_riprel_insn(auprobe)) {
512 struct arch_uprobe_task *autask;
513
514 autask = &current->utask->autask;
515 if (auprobe->fixups & UPROBE_FIX_RIP_AX)
516 regs->ax = autask->saved_scratch_register;
517 else
518 regs->cx = autask->saved_scratch_register;
519
520 /*
521 * The original instruction includes a displacement, and so
522 * is 4 bytes longer than what we've just single-stepped.
523 * Fall through to handle stuff like "jmpq *...(%rip)" and
524 * "callq *...(%rip)".
525 */
526 if (correction)
527 *correction += 4;
528 }
529}
530#else
531static void
532handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
533{
534 /* No RIP-relative addressing on 32-bit */
535}
536#endif
537
538/*
539 * If xol insn itself traps and generates a signal(Say,
540 * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped
541 * instruction jumps back to its own address. It is assumed that anything
542 * like do_page_fault/do_trap/etc sets thread.trap_nr != -1.
543 *
544 * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr,
545 * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
546 * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol().
547 */
548bool arch_uprobe_xol_was_trapped(struct task_struct *t)
549{
550 if (t->thread.trap_nr != UPROBE_TRAP_NR)
551 return true;
552
553 return false;
554}
555
556/*
557 * Called after single-stepping. To avoid the SMP problems that can
558 * occur when we temporarily put back the original opcode to
559 * single-step, we single-stepped a copy of the instruction.
560 *
561 * This function prepares to resume execution after the single-step.
562 * We have to fix things up as follows:
563 *
564 * Typically, the new ip is relative to the copied instruction. We need
565 * to make it relative to the original instruction (FIX_IP). Exceptions
566 * are return instructions and absolute or indirect jump or call instructions.
567 *
568 * If the single-stepped instruction was a call, the return address that
569 * is atop the stack is the address following the copied instruction. We
570 * need to make it the address following the original instruction (FIX_CALL).
571 *
572 * If the original instruction was a rip-relative instruction such as
573 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
574 * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
575 * We need to restore the contents of the scratch register and adjust
576 * the ip, keeping in mind that the instruction we executed is 4 bytes
577 * shorter than the original instruction (since we squeezed out the offset
578 * field). (FIX_RIP_AX or FIX_RIP_CX)
579 */
580int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
581{
582 struct uprobe_task *utask;
583 long correction;
584 int result = 0;
585
586 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
587
588 utask = current->utask;
589 current->thread.trap_nr = utask->autask.saved_trap_nr;
590 correction = (long)(utask->vaddr - utask->xol_vaddr);
591 handle_riprel_post_xol(auprobe, regs, &correction);
592 if (auprobe->fixups & UPROBE_FIX_IP)
593 regs->ip += correction;
594
595 if (auprobe->fixups & UPROBE_FIX_CALL)
596 result = adjust_ret_addr(regs->sp, correction);
597
598 return result;
599}
600
601/* callback routine for handling exceptions. */
602int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data)
603{
604 struct die_args *args = data;
605 struct pt_regs *regs = args->regs;
606 int ret = NOTIFY_DONE;
607
608 /* We are only interested in userspace traps */
609 if (regs && !user_mode_vm(regs))
610 return NOTIFY_DONE;
611
612 switch (val) {
613 case DIE_INT3:
614 if (uprobe_pre_sstep_notifier(regs))
615 ret = NOTIFY_STOP;
616
617 break;
618
619 case DIE_DEBUG:
620 if (uprobe_post_sstep_notifier(regs))
621 ret = NOTIFY_STOP;
622
623 default:
624 break;
625 }
626
627 return ret;
628}
629
630/*
631 * This function gets called when XOL instruction either gets trapped or
632 * the thread has a fatal signal, so reset the instruction pointer to its
633 * probed address.
634 */
635void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
636{
637 struct uprobe_task *utask = current->utask;
638
639 current->thread.trap_nr = utask->autask.saved_trap_nr;
640 handle_riprel_post_xol(auprobe, regs, NULL);
641 instruction_pointer_set(regs, utask->vaddr);
642}
643
644/*
645 * Skip these instructions as per the currently known x86 ISA.
646 * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
647 */
648bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
649{
650 int i;
651
652 for (i = 0; i < MAX_UINSN_BYTES; i++) {
653 if ((auprobe->insn[i] == 0x66))
654 continue;
655
656 if (auprobe->insn[i] == 0x90)
657 return true;
658
659 if (i == (MAX_UINSN_BYTES - 1))
660 break;
661
662 if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f))
663 return true;
664
665 if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19))
666 return true;
667
668 if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0))
669 return true;
670
671 break;
672 }
673 return false;
674}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6bfd88..8379e3771690 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1590,6 +1590,10 @@ struct task_struct {
1590#ifdef CONFIG_HAVE_HW_BREAKPOINT 1590#ifdef CONFIG_HAVE_HW_BREAKPOINT
1591 atomic_t ptrace_bp_refcnt; 1591 atomic_t ptrace_bp_refcnt;
1592#endif 1592#endif
1593#ifdef CONFIG_UPROBES
1594 struct uprobe_task *utask;
1595 int uprobe_srcu_id;
1596#endif
1593}; 1597};
1594 1598
1595/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1599/* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index eac525f41b94..5ec778fdce6f 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -28,8 +28,9 @@
28#include <linux/rbtree.h> 28#include <linux/rbtree.h>
29 29
30struct vm_area_struct; 30struct vm_area_struct;
31
31#ifdef CONFIG_ARCH_SUPPORTS_UPROBES 32#ifdef CONFIG_ARCH_SUPPORTS_UPROBES
32#include <asm/uprobes.h> 33# include <asm/uprobes.h>
33#endif 34#endif
34 35
35/* flags that denote/change uprobes behaviour */ 36/* flags that denote/change uprobes behaviour */
@@ -39,6 +40,8 @@ struct vm_area_struct;
39 40
40/* Dont run handlers when first register/ last unregister in progress*/ 41/* Dont run handlers when first register/ last unregister in progress*/
41#define UPROBE_RUN_HANDLER 0x2 42#define UPROBE_RUN_HANDLER 0x2
43/* Can skip singlestep */
44#define UPROBE_SKIP_SSTEP 0x4
42 45
43struct uprobe_consumer { 46struct uprobe_consumer {
44 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); 47 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
@@ -52,13 +55,42 @@ struct uprobe_consumer {
52}; 55};
53 56
54#ifdef CONFIG_UPROBES 57#ifdef CONFIG_UPROBES
58enum uprobe_task_state {
59 UTASK_RUNNING,
60 UTASK_BP_HIT,
61 UTASK_SSTEP,
62 UTASK_SSTEP_ACK,
63 UTASK_SSTEP_TRAPPED,
64};
65
66/*
67 * uprobe_task: Metadata of a task while it singlesteps.
68 */
69struct uprobe_task {
70 enum uprobe_task_state state;
71 struct arch_uprobe_task autask;
72
73 struct uprobe *active_uprobe;
74
75 unsigned long xol_vaddr;
76 unsigned long vaddr;
77};
78
55extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); 79extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
56extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); 80extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify);
57extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); 81extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
58extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 82extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
59extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 83extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
60extern int uprobe_mmap(struct vm_area_struct *vma); 84extern int uprobe_mmap(struct vm_area_struct *vma);
61#else /* CONFIG_UPROBES is not defined */ 85extern void uprobe_free_utask(struct task_struct *t);
86extern void uprobe_copy_process(struct task_struct *t);
87extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
88extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
89extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
90extern void uprobe_notify_resume(struct pt_regs *regs);
91extern bool uprobe_deny_signal(void);
92extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
93#else /* !CONFIG_UPROBES */
62static inline int 94static inline int
63uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) 95uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
64{ 96{
@@ -72,5 +104,22 @@ static inline int uprobe_mmap(struct vm_area_struct *vma)
72{ 104{
73 return 0; 105 return 0;
74} 106}
75#endif /* CONFIG_UPROBES */ 107static inline void uprobe_notify_resume(struct pt_regs *regs)
108{
109}
110static inline bool uprobe_deny_signal(void)
111{
112 return false;
113}
114static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
115{
116 return 0;
117}
118static inline void uprobe_free_utask(struct task_struct *t)
119{
120}
121static inline void uprobe_copy_process(struct task_struct *t)
122{
123}
124#endif /* !CONFIG_UPROBES */
76#endif /* _LINUX_UPROBES_H */ 125#endif /* _LINUX_UPROBES_H */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index e56e56aa7535..b807d1566b64 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -30,9 +30,12 @@
30#include <linux/rmap.h> /* anon_vma_prepare */ 30#include <linux/rmap.h> /* anon_vma_prepare */
31#include <linux/mmu_notifier.h> /* set_pte_at_notify */ 31#include <linux/mmu_notifier.h> /* set_pte_at_notify */
32#include <linux/swap.h> /* try_to_free_swap */ 32#include <linux/swap.h> /* try_to_free_swap */
33#include <linux/ptrace.h> /* user_enable_single_step */
34#include <linux/kdebug.h> /* notifier mechanism */
33 35
34#include <linux/uprobes.h> 36#include <linux/uprobes.h>
35 37
38static struct srcu_struct uprobes_srcu;
36static struct rb_root uprobes_tree = RB_ROOT; 39static struct rb_root uprobes_tree = RB_ROOT;
37 40
38static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ 41static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
@@ -486,6 +489,9 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
486 u = __insert_uprobe(uprobe); 489 u = __insert_uprobe(uprobe);
487 spin_unlock_irqrestore(&uprobes_treelock, flags); 490 spin_unlock_irqrestore(&uprobes_treelock, flags);
488 491
492 /* For now assume that the instruction need not be single-stepped */
493 uprobe->flags |= UPROBE_SKIP_SSTEP;
494
489 return u; 495 return u;
490} 496}
491 497
@@ -523,6 +529,21 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
523 return uprobe; 529 return uprobe;
524} 530}
525 531
532static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
533{
534 struct uprobe_consumer *uc;
535
536 if (!(uprobe->flags & UPROBE_RUN_HANDLER))
537 return;
538
539 down_read(&uprobe->consumer_rwsem);
540 for (uc = uprobe->consumers; uc; uc = uc->next) {
541 if (!uc->filter || uc->filter(uc, current))
542 uc->handler(uc, regs);
543 }
544 up_read(&uprobe->consumer_rwsem);
545}
546
526/* Returns the previous consumer */ 547/* Returns the previous consumer */
527static struct uprobe_consumer * 548static struct uprobe_consumer *
528consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) 549consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
@@ -645,7 +666,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
645 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) 666 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
646 return -EEXIST; 667 return -EEXIST;
647 668
648 ret = arch_uprobes_analyze_insn(&uprobe->arch, mm); 669 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm);
649 if (ret) 670 if (ret)
650 return ret; 671 return ret;
651 672
@@ -662,10 +683,21 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
662 set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true); 683 set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true);
663} 684}
664 685
686/*
687 * There could be threads that have hit the breakpoint and are entering the
688 * notifier code and trying to acquire the uprobes_treelock. The thread
689 * calling delete_uprobe() that is removing the uprobe from the rb_tree can
690 * race with these threads and might acquire the uprobes_treelock compared
691 * to some of the breakpoint hit threads. In such a case, the breakpoint
692 * hit threads will not find the uprobe. The current unregistering thread
693 * waits till all other threads have hit a breakpoint, to acquire the
694 * uprobes_treelock before the uprobe is removed from the rbtree.
695 */
665static void delete_uprobe(struct uprobe *uprobe) 696static void delete_uprobe(struct uprobe *uprobe)
666{ 697{
667 unsigned long flags; 698 unsigned long flags;
668 699
700 synchronize_srcu(&uprobes_srcu);
669 spin_lock_irqsave(&uprobes_treelock, flags); 701 spin_lock_irqsave(&uprobes_treelock, flags);
670 rb_erase(&uprobe->rb_node, &uprobes_tree); 702 rb_erase(&uprobe->rb_node, &uprobes_tree);
671 spin_unlock_irqrestore(&uprobes_treelock, flags); 703 spin_unlock_irqrestore(&uprobes_treelock, flags);
@@ -1010,6 +1042,288 @@ int uprobe_mmap(struct vm_area_struct *vma)
1010 return ret; 1042 return ret;
1011} 1043}
1012 1044
1045/**
1046 * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
1047 * @regs: Reflects the saved state of the task after it has hit a breakpoint
1048 * instruction.
1049 * Return the address of the breakpoint instruction.
1050 */
1051unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1052{
1053 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
1054}
1055
1056/*
1057 * Called with no locks held.
1058 * Called in context of a exiting or a exec-ing thread.
1059 */
1060void uprobe_free_utask(struct task_struct *t)
1061{
1062 struct uprobe_task *utask = t->utask;
1063
1064 if (t->uprobe_srcu_id != -1)
1065 srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id);
1066
1067 if (!utask)
1068 return;
1069
1070 if (utask->active_uprobe)
1071 put_uprobe(utask->active_uprobe);
1072
1073 kfree(utask);
1074 t->utask = NULL;
1075}
1076
1077/*
1078 * Called in context of a new clone/fork from copy_process.
1079 */
1080void uprobe_copy_process(struct task_struct *t)
1081{
1082 t->utask = NULL;
1083 t->uprobe_srcu_id = -1;
1084}
1085
1086/*
1087 * Allocate a uprobe_task object for the task.
1088 * Called when the thread hits a breakpoint for the first time.
1089 *
1090 * Returns:
1091 * - pointer to new uprobe_task on success
1092 * - NULL otherwise
1093 */
1094static struct uprobe_task *add_utask(void)
1095{
1096 struct uprobe_task *utask;
1097
1098 utask = kzalloc(sizeof *utask, GFP_KERNEL);
1099 if (unlikely(!utask))
1100 return NULL;
1101
1102 utask->active_uprobe = NULL;
1103 current->utask = utask;
1104 return utask;
1105}
1106
1107/* Prepare to single-step probed instruction out of line. */
1108static int
1109pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr)
1110{
1111 return -EFAULT;
1112}
1113
1114/*
1115 * If we are singlestepping, then ensure this thread is not connected to
1116 * non-fatal signals until completion of singlestep. When xol insn itself
1117 * triggers the signal, restart the original insn even if the task is
1118 * already SIGKILL'ed (since coredump should report the correct ip). This
1119 * is even more important if the task has a handler for SIGSEGV/etc, The
1120 * _same_ instruction should be repeated again after return from the signal
1121 * handler, and SSTEP can never finish in this case.
1122 */
1123bool uprobe_deny_signal(void)
1124{
1125 struct task_struct *t = current;
1126 struct uprobe_task *utask = t->utask;
1127
1128 if (likely(!utask || !utask->active_uprobe))
1129 return false;
1130
1131 WARN_ON_ONCE(utask->state != UTASK_SSTEP);
1132
1133 if (signal_pending(t)) {
1134 spin_lock_irq(&t->sighand->siglock);
1135 clear_tsk_thread_flag(t, TIF_SIGPENDING);
1136 spin_unlock_irq(&t->sighand->siglock);
1137
1138 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
1139 utask->state = UTASK_SSTEP_TRAPPED;
1140 set_tsk_thread_flag(t, TIF_UPROBE);
1141 set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
1142 }
1143 }
1144
1145 return true;
1146}
1147
1148/*
1149 * Avoid singlestepping the original instruction if the original instruction
1150 * is a NOP or can be emulated.
1151 */
1152static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1153{
1154 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1155 return true;
1156
1157 uprobe->flags &= ~UPROBE_SKIP_SSTEP;
1158 return false;
1159}
1160
1161/*
1162 * Run handler and ask thread to singlestep.
1163 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
1164 */
1165static void handle_swbp(struct pt_regs *regs)
1166{
1167 struct vm_area_struct *vma;
1168 struct uprobe_task *utask;
1169 struct uprobe *uprobe;
1170 struct mm_struct *mm;
1171 unsigned long bp_vaddr;
1172
1173 uprobe = NULL;
1174 bp_vaddr = uprobe_get_swbp_addr(regs);
1175 mm = current->mm;
1176 down_read(&mm->mmap_sem);
1177 vma = find_vma(mm, bp_vaddr);
1178
1179 if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) {
1180 struct inode *inode;
1181 loff_t offset;
1182
1183 inode = vma->vm_file->f_mapping->host;
1184 offset = bp_vaddr - vma->vm_start;
1185 offset += (vma->vm_pgoff << PAGE_SHIFT);
1186 uprobe = find_uprobe(inode, offset);
1187 }
1188
1189 srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
1190 current->uprobe_srcu_id = -1;
1191 up_read(&mm->mmap_sem);
1192
1193 if (!uprobe) {
1194 /* No matching uprobe; signal SIGTRAP. */
1195 send_sig(SIGTRAP, current, 0);
1196 return;
1197 }
1198
1199 utask = current->utask;
1200 if (!utask) {
1201 utask = add_utask();
1202 /* Cannot allocate; re-execute the instruction. */
1203 if (!utask)
1204 goto cleanup_ret;
1205 }
1206 utask->active_uprobe = uprobe;
1207 handler_chain(uprobe, regs);
1208 if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs))
1209 goto cleanup_ret;
1210
1211 utask->state = UTASK_SSTEP;
1212 if (!pre_ssout(uprobe, regs, bp_vaddr)) {
1213 user_enable_single_step(current);
1214 return;
1215 }
1216
1217cleanup_ret:
1218 if (utask) {
1219 utask->active_uprobe = NULL;
1220 utask->state = UTASK_RUNNING;
1221 }
1222 if (uprobe) {
1223 if (!(uprobe->flags & UPROBE_SKIP_SSTEP))
1224
1225 /*
1226 * cannot singlestep; cannot skip instruction;
1227 * re-execute the instruction.
1228 */
1229 instruction_pointer_set(regs, bp_vaddr);
1230
1231 put_uprobe(uprobe);
1232 }
1233}
1234
1235/*
1236 * Perform required fix-ups and disable singlestep.
1237 * Allow pending signals to take effect.
1238 */
1239static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1240{
1241 struct uprobe *uprobe;
1242
1243 uprobe = utask->active_uprobe;
1244 if (utask->state == UTASK_SSTEP_ACK)
1245 arch_uprobe_post_xol(&uprobe->arch, regs);
1246 else if (utask->state == UTASK_SSTEP_TRAPPED)
1247 arch_uprobe_abort_xol(&uprobe->arch, regs);
1248 else
1249 WARN_ON_ONCE(1);
1250
1251 put_uprobe(uprobe);
1252 utask->active_uprobe = NULL;
1253 utask->state = UTASK_RUNNING;
1254 user_disable_single_step(current);
1255
1256 spin_lock_irq(&current->sighand->siglock);
1257 recalc_sigpending(); /* see uprobe_deny_signal() */
1258 spin_unlock_irq(&current->sighand->siglock);
1259}
1260
1261/*
1262 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag. (and on
1263 * subsequent probe hits on the thread sets the state to UTASK_BP_HIT) and
1264 * allows the thread to return from interrupt.
1265 *
1266 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag and
1267 * also sets the state to UTASK_SSTEP_ACK and allows the thread to return from
1268 * interrupt.
1269 *
1270 * While returning to userspace, thread notices the TIF_UPROBE flag and calls
1271 * uprobe_notify_resume().
1272 */
1273void uprobe_notify_resume(struct pt_regs *regs)
1274{
1275 struct uprobe_task *utask;
1276
1277 utask = current->utask;
1278 if (!utask || utask->state == UTASK_BP_HIT)
1279 handle_swbp(regs);
1280 else
1281 handle_singlestep(utask, regs);
1282}
1283
1284/*
1285 * uprobe_pre_sstep_notifier gets called from interrupt context as part of
1286 * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit.
1287 */
1288int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1289{
1290 struct uprobe_task *utask;
1291
1292 if (!current->mm)
1293 return 0;
1294
1295 utask = current->utask;
1296 if (utask)
1297 utask->state = UTASK_BP_HIT;
1298
1299 set_thread_flag(TIF_UPROBE);
1300 current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);
1301
1302 return 1;
1303}
1304
1305/*
1306 * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier
1307 * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep.
1308 */
1309int uprobe_post_sstep_notifier(struct pt_regs *regs)
1310{
1311 struct uprobe_task *utask = current->utask;
1312
1313 if (!current->mm || !utask || !utask->active_uprobe)
1314 /* task is currently not uprobed */
1315 return 0;
1316
1317 utask->state = UTASK_SSTEP_ACK;
1318 set_thread_flag(TIF_UPROBE);
1319 return 1;
1320}
1321
1322static struct notifier_block uprobe_exception_nb = {
1323 .notifier_call = arch_uprobe_exception_notify,
1324 .priority = INT_MAX-1, /* notified after kprobes, kgdb */
1325};
1326
1013static int __init init_uprobes(void) 1327static int __init init_uprobes(void)
1014{ 1328{
1015 int i; 1329 int i;
@@ -1018,12 +1332,13 @@ static int __init init_uprobes(void)
1018 mutex_init(&uprobes_mutex[i]); 1332 mutex_init(&uprobes_mutex[i]);
1019 mutex_init(&uprobes_mmap_mutex[i]); 1333 mutex_init(&uprobes_mmap_mutex[i]);
1020 } 1334 }
1021 return 0; 1335 init_srcu_struct(&uprobes_srcu);
1336
1337 return register_die_notifier(&uprobe_exception_nb);
1022} 1338}
1339module_init(init_uprobes);
1023 1340
1024static void __exit exit_uprobes(void) 1341static void __exit exit_uprobes(void)
1025{ 1342{
1026} 1343}
1027
1028module_init(init_uprobes);
1029module_exit(exit_uprobes); 1344module_exit(exit_uprobes);
diff --git a/kernel/fork.c b/kernel/fork.c
index e2cd3e2a5ae8..eb7b63334009 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -67,6 +67,7 @@
67#include <linux/oom.h> 67#include <linux/oom.h>
68#include <linux/khugepaged.h> 68#include <linux/khugepaged.h>
69#include <linux/signalfd.h> 69#include <linux/signalfd.h>
70#include <linux/uprobes.h>
70 71
71#include <asm/pgtable.h> 72#include <asm/pgtable.h>
72#include <asm/pgalloc.h> 73#include <asm/pgalloc.h>
@@ -701,6 +702,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
701 exit_pi_state_list(tsk); 702 exit_pi_state_list(tsk);
702#endif 703#endif
703 704
705 uprobe_free_utask(tsk);
706
704 /* Get rid of any cached register state */ 707 /* Get rid of any cached register state */
705 deactivate_mm(tsk, mm); 708 deactivate_mm(tsk, mm);
706 709
@@ -1295,6 +1298,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1295 INIT_LIST_HEAD(&p->pi_state_list); 1298 INIT_LIST_HEAD(&p->pi_state_list);
1296 p->pi_state_cache = NULL; 1299 p->pi_state_cache = NULL;
1297#endif 1300#endif
1301 uprobe_copy_process(p);
1298 /* 1302 /*
1299 * sigaltstack should be cleared when sharing the same VM 1303 * sigaltstack should be cleared when sharing the same VM
1300 */ 1304 */
diff --git a/kernel/signal.c b/kernel/signal.c
index 8511e39813c7..e93ff0a719a0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -29,6 +29,7 @@
29#include <linux/pid_namespace.h> 29#include <linux/pid_namespace.h>
30#include <linux/nsproxy.h> 30#include <linux/nsproxy.h>
31#include <linux/user_namespace.h> 31#include <linux/user_namespace.h>
32#include <linux/uprobes.h>
32#define CREATE_TRACE_POINTS 33#define CREATE_TRACE_POINTS
33#include <trace/events/signal.h> 34#include <trace/events/signal.h>
34 35
@@ -2192,6 +2193,9 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
2192 struct signal_struct *signal = current->signal; 2193 struct signal_struct *signal = current->signal;
2193 int signr; 2194 int signr;
2194 2195
2196 if (unlikely(uprobe_deny_signal()))
2197 return 0;
2198
2195relock: 2199relock:
2196 /* 2200 /*
2197 * We'll jump back here after any time we were stopped in TASK_STOPPED. 2201 * We'll jump back here after any time we were stopped in TASK_STOPPED.