aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-02-11 04:41:53 -0500
committerIngo Molnar <mingo@kernel.org>2013-02-11 04:41:53 -0500
commita3d4fd7a2d81604fedfa270d29c824b8d3380c2e (patch)
tree1bd1defbfe3f285dfa7c77f94bc5523ac4a82679
parent661e591525ffbb0439270bf2a4d165c04f87543d (diff)
parentb2fe8ba674e8acbb9e8e63510b802c6d054d88a3 (diff)
Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core
Improve uprobes performance by adding 'pre-filtering' support, by Oleg Nesterov: # time perl -e 'syscall -1 for 1..100_000' real 0m0.040s user 0m0.027s sys 0m0.010s # perf probe -x /lib/libc.so.6 syscall # perf record -e probe_libc:syscall sleep 100 & Before this series: # time perl -e 'syscall -1 for 1..100_000' real 0m1.714s user 0m0.103s sys 0m1.607s After: # time perl -e 'syscall -1 for 1..100_000' real 0m0.037s user 0m0.013s sys 0m0.023s Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--include/linux/perf_event.h9
-rw-r--r--include/linux/uprobes.h23
-rw-r--r--kernel/events/core.c5
-rw-r--r--kernel/events/uprobes.c465
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_uprobe.c217
8 files changed, 456 insertions, 274 deletions
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index c71025b67462..0ba4cfb4f412 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
680 if (auprobe->insn[i] == 0x66) 680 if (auprobe->insn[i] == 0x66)
681 continue; 681 continue;
682 682
683 if (auprobe->insn[i] == 0x90) 683 if (auprobe->insn[i] == 0x90) {
684 regs->ip += i + 1;
684 return true; 685 return true;
686 }
685 687
686 break; 688 break;
687 } 689 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 42adf012145d..e47ee462c2f2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -135,16 +135,21 @@ struct hw_perf_event {
135 struct { /* software */ 135 struct { /* software */
136 struct hrtimer hrtimer; 136 struct hrtimer hrtimer;
137 }; 137 };
138 struct { /* tracepoint */
139 struct task_struct *tp_target;
140 /* for tp_event->class */
141 struct list_head tp_list;
142 };
138#ifdef CONFIG_HAVE_HW_BREAKPOINT 143#ifdef CONFIG_HAVE_HW_BREAKPOINT
139 struct { /* breakpoint */ 144 struct { /* breakpoint */
140 struct arch_hw_breakpoint info;
141 struct list_head bp_list;
142 /* 145 /*
143 * Crufty hack to avoid the chicken and egg 146 * Crufty hack to avoid the chicken and egg
144 * problem hw_breakpoint has with context 147 * problem hw_breakpoint has with context
145 * creation and event initalization. 148 * creation and event initalization.
146 */ 149 */
147 struct task_struct *bp_target; 150 struct task_struct *bp_target;
151 struct arch_hw_breakpoint info;
152 struct list_head bp_list;
148 }; 153 };
149#endif 154#endif
150 }; 155 };
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f628a6fc5b4..02b83db8e2c5 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,13 +35,20 @@ struct inode;
35# include <asm/uprobes.h> 35# include <asm/uprobes.h>
36#endif 36#endif
37 37
38#define UPROBE_HANDLER_REMOVE 1
39#define UPROBE_HANDLER_MASK 1
40
41enum uprobe_filter_ctx {
42 UPROBE_FILTER_REGISTER,
43 UPROBE_FILTER_UNREGISTER,
44 UPROBE_FILTER_MMAP,
45};
46
38struct uprobe_consumer { 47struct uprobe_consumer {
39 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); 48 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
40 /* 49 bool (*filter)(struct uprobe_consumer *self,
41 * filter is optional; If a filter exists, handler is run 50 enum uprobe_filter_ctx ctx,
42 * if and only if filter returns true. 51 struct mm_struct *mm);
43 */
44 bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
45 52
46 struct uprobe_consumer *next; 53 struct uprobe_consumer *next;
47}; 54};
@@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
94extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); 101extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
95extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); 102extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
96extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 103extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
104extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
97extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 105extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
98extern int uprobe_mmap(struct vm_area_struct *vma); 106extern int uprobe_mmap(struct vm_area_struct *vma);
99extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); 107extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
@@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
117{ 125{
118 return -ENOSYS; 126 return -ENOSYS;
119} 127}
128static inline int
129uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
130{
131 return -ENOSYS;
132}
120static inline void 133static inline void
121uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) 134uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
122{ 135{
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 301079d06f24..e2d4323c6ae6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6162,11 +6162,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
6162 6162
6163 if (task) { 6163 if (task) {
6164 event->attach_state = PERF_ATTACH_TASK; 6164 event->attach_state = PERF_ATTACH_TASK;
6165
6166 if (attr->type == PERF_TYPE_TRACEPOINT)
6167 event->hw.tp_target = task;
6165#ifdef CONFIG_HAVE_HW_BREAKPOINT 6168#ifdef CONFIG_HAVE_HW_BREAKPOINT
6166 /* 6169 /*
6167 * hw_breakpoint is a bit difficult here.. 6170 * hw_breakpoint is a bit difficult here..
6168 */ 6171 */
6169 if (attr->type == PERF_TYPE_BREAKPOINT) 6172 else if (attr->type == PERF_TYPE_BREAKPOINT)
6170 event->hw.bp_target = task; 6173 event->hw.bp_target = task;
6171#endif 6174#endif
6172 } 6175 }
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 30ea9a4f4ab4..a567c8c7ef31 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -27,6 +27,7 @@
27#include <linux/pagemap.h> /* read_mapping_page */ 27#include <linux/pagemap.h> /* read_mapping_page */
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/export.h>
30#include <linux/rmap.h> /* anon_vma_prepare */ 31#include <linux/rmap.h> /* anon_vma_prepare */
31#include <linux/mmu_notifier.h> /* set_pte_at_notify */ 32#include <linux/mmu_notifier.h> /* set_pte_at_notify */
32#include <linux/swap.h> /* try_to_free_swap */ 33#include <linux/swap.h> /* try_to_free_swap */
@@ -41,58 +42,31 @@
41#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE 42#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
42 43
43static struct rb_root uprobes_tree = RB_ROOT; 44static struct rb_root uprobes_tree = RB_ROOT;
44
45static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
46
47#define UPROBES_HASH_SZ 13
48
49/* 45/*
50 * We need separate register/unregister and mmap/munmap lock hashes because 46 * allows us to skip the uprobe_mmap if there are no uprobe events active
51 * of mmap_sem nesting. 47 * at this time. Probably a fine grained per inode count is better?
52 *
53 * uprobe_register() needs to install probes on (potentially) all processes
54 * and thus needs to acquire multiple mmap_sems (consequtively, not
55 * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
56 * for the particular process doing the mmap.
57 *
58 * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
59 * because of lock order against i_mmap_mutex. This means there's a hole in
60 * the register vma iteration where a mmap() can happen.
61 *
62 * Thus uprobe_register() can race with uprobe_mmap() and we can try and
63 * install a probe where one is already installed.
64 */ 48 */
49#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree)
65 50
66/* serialize (un)register */ 51static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
67static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
68
69#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
70 52
53#define UPROBES_HASH_SZ 13
71/* serialize uprobe->pending_list */ 54/* serialize uprobe->pending_list */
72static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; 55static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
73#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) 56#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
74 57
75static struct percpu_rw_semaphore dup_mmap_sem; 58static struct percpu_rw_semaphore dup_mmap_sem;
76 59
77/*
78 * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
79 * events active at this time. Probably a fine grained per inode count is
80 * better?
81 */
82static atomic_t uprobe_events = ATOMIC_INIT(0);
83
84/* Have a copy of original instruction */ 60/* Have a copy of original instruction */
85#define UPROBE_COPY_INSN 0 61#define UPROBE_COPY_INSN 0
86/* Dont run handlers when first register/ last unregister in progress*/
87#define UPROBE_RUN_HANDLER 1
88/* Can skip singlestep */ 62/* Can skip singlestep */
89#define UPROBE_SKIP_SSTEP 2 63#define UPROBE_SKIP_SSTEP 1
90 64
91struct uprobe { 65struct uprobe {
92 struct rb_node rb_node; /* node in the rb tree */ 66 struct rb_node rb_node; /* node in the rb tree */
93 atomic_t ref; 67 atomic_t ref;
68 struct rw_semaphore register_rwsem;
94 struct rw_semaphore consumer_rwsem; 69 struct rw_semaphore consumer_rwsem;
95 struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */
96 struct list_head pending_list; 70 struct list_head pending_list;
97 struct uprobe_consumer *consumers; 71 struct uprobe_consumer *consumers;
98 struct inode *inode; /* Also hold a ref to inode */ 72 struct inode *inode; /* Also hold a ref to inode */
@@ -430,9 +404,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
430 u = __insert_uprobe(uprobe); 404 u = __insert_uprobe(uprobe);
431 spin_unlock(&uprobes_treelock); 405 spin_unlock(&uprobes_treelock);
432 406
433 /* For now assume that the instruction need not be single-stepped */
434 __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
435
436 return u; 407 return u;
437} 408}
438 409
@@ -452,8 +423,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
452 423
453 uprobe->inode = igrab(inode); 424 uprobe->inode = igrab(inode);
454 uprobe->offset = offset; 425 uprobe->offset = offset;
426 init_rwsem(&uprobe->register_rwsem);
455 init_rwsem(&uprobe->consumer_rwsem); 427 init_rwsem(&uprobe->consumer_rwsem);
456 mutex_init(&uprobe->copy_mutex); 428 /* For now assume that the instruction need not be single-stepped */
429 __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
457 430
458 /* add to uprobes_tree, sorted on inode:offset */ 431 /* add to uprobes_tree, sorted on inode:offset */
459 cur_uprobe = insert_uprobe(uprobe); 432 cur_uprobe = insert_uprobe(uprobe);
@@ -463,38 +436,17 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
463 kfree(uprobe); 436 kfree(uprobe);
464 uprobe = cur_uprobe; 437 uprobe = cur_uprobe;
465 iput(inode); 438 iput(inode);
466 } else {
467 atomic_inc(&uprobe_events);
468 } 439 }
469 440
470 return uprobe; 441 return uprobe;
471} 442}
472 443
473static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) 444static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
474{
475 struct uprobe_consumer *uc;
476
477 if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
478 return;
479
480 down_read(&uprobe->consumer_rwsem);
481 for (uc = uprobe->consumers; uc; uc = uc->next) {
482 if (!uc->filter || uc->filter(uc, current))
483 uc->handler(uc, regs);
484 }
485 up_read(&uprobe->consumer_rwsem);
486}
487
488/* Returns the previous consumer */
489static struct uprobe_consumer *
490consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
491{ 445{
492 down_write(&uprobe->consumer_rwsem); 446 down_write(&uprobe->consumer_rwsem);
493 uc->next = uprobe->consumers; 447 uc->next = uprobe->consumers;
494 uprobe->consumers = uc; 448 uprobe->consumers = uc;
495 up_write(&uprobe->consumer_rwsem); 449 up_write(&uprobe->consumer_rwsem);
496
497 return uc->next;
498} 450}
499 451
500/* 452/*
@@ -588,7 +540,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
588 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) 540 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
589 return ret; 541 return ret;
590 542
591 mutex_lock(&uprobe->copy_mutex); 543 /* TODO: move this into _register, until then we abuse this sem. */
544 down_write(&uprobe->consumer_rwsem);
592 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) 545 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
593 goto out; 546 goto out;
594 547
@@ -612,7 +565,30 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
612 set_bit(UPROBE_COPY_INSN, &uprobe->flags); 565 set_bit(UPROBE_COPY_INSN, &uprobe->flags);
613 566
614 out: 567 out:
615 mutex_unlock(&uprobe->copy_mutex); 568 up_write(&uprobe->consumer_rwsem);
569
570 return ret;
571}
572
573static inline bool consumer_filter(struct uprobe_consumer *uc,
574 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
575{
576 return !uc->filter || uc->filter(uc, ctx, mm);
577}
578
579static bool filter_chain(struct uprobe *uprobe,
580 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
581{
582 struct uprobe_consumer *uc;
583 bool ret = false;
584
585 down_read(&uprobe->consumer_rwsem);
586 for (uc = uprobe->consumers; uc; uc = uc->next) {
587 ret = consumer_filter(uc, ctx, mm);
588 if (ret)
589 break;
590 }
591 up_read(&uprobe->consumer_rwsem);
616 592
617 return ret; 593 return ret;
618} 594}
@@ -624,16 +600,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
624 bool first_uprobe; 600 bool first_uprobe;
625 int ret; 601 int ret;
626 602
627 /*
628 * If probe is being deleted, unregister thread could be done with
629 * the vma-rmap-walk through. Adding a probe now can be fatal since
630 * nobody will be able to cleanup. Also we could be from fork or
631 * mremap path, where the probe might have already been inserted.
632 * Hence behave as if probe already existed.
633 */
634 if (!uprobe->consumers)
635 return 0;
636
637 ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); 603 ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
638 if (ret) 604 if (ret)
639 return ret; 605 return ret;
@@ -658,14 +624,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
658static int 624static int
659remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) 625remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
660{ 626{
661 /* can happen if uprobe_register() fails */
662 if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
663 return 0;
664
665 set_bit(MMF_RECALC_UPROBES, &mm->flags); 627 set_bit(MMF_RECALC_UPROBES, &mm->flags);
666 return set_orig_insn(&uprobe->arch, mm, vaddr); 628 return set_orig_insn(&uprobe->arch, mm, vaddr);
667} 629}
668 630
631static inline bool uprobe_is_active(struct uprobe *uprobe)
632{
633 return !RB_EMPTY_NODE(&uprobe->rb_node);
634}
669/* 635/*
670 * There could be threads that have already hit the breakpoint. They 636 * There could be threads that have already hit the breakpoint. They
671 * will recheck the current insn and restart if find_uprobe() fails. 637 * will recheck the current insn and restart if find_uprobe() fails.
@@ -673,12 +639,15 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad
673 */ 639 */
674static void delete_uprobe(struct uprobe *uprobe) 640static void delete_uprobe(struct uprobe *uprobe)
675{ 641{
642 if (WARN_ON(!uprobe_is_active(uprobe)))
643 return;
644
676 spin_lock(&uprobes_treelock); 645 spin_lock(&uprobes_treelock);
677 rb_erase(&uprobe->rb_node, &uprobes_tree); 646 rb_erase(&uprobe->rb_node, &uprobes_tree);
678 spin_unlock(&uprobes_treelock); 647 spin_unlock(&uprobes_treelock);
648 RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
679 iput(uprobe->inode); 649 iput(uprobe->inode);
680 put_uprobe(uprobe); 650 put_uprobe(uprobe);
681 atomic_dec(&uprobe_events);
682} 651}
683 652
684struct map_info { 653struct map_info {
@@ -764,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
764 return curr; 733 return curr;
765} 734}
766 735
767static int register_for_each_vma(struct uprobe *uprobe, bool is_register) 736static int
737register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
768{ 738{
739 bool is_register = !!new;
769 struct map_info *info; 740 struct map_info *info;
770 int err = 0; 741 int err = 0;
771 742
@@ -794,10 +765,16 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
794 vaddr_to_offset(vma, info->vaddr) != uprobe->offset) 765 vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
795 goto unlock; 766 goto unlock;
796 767
797 if (is_register) 768 if (is_register) {
798 err = install_breakpoint(uprobe, mm, vma, info->vaddr); 769 /* consult only the "caller", new consumer. */
799 else 770 if (consumer_filter(new,
800 err |= remove_breakpoint(uprobe, mm, info->vaddr); 771 UPROBE_FILTER_REGISTER, mm))
772 err = install_breakpoint(uprobe, mm, vma, info->vaddr);
773 } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
774 if (!filter_chain(uprobe,
775 UPROBE_FILTER_UNREGISTER, mm))
776 err |= remove_breakpoint(uprobe, mm, info->vaddr);
777 }
801 778
802 unlock: 779 unlock:
803 up_write(&mm->mmap_sem); 780 up_write(&mm->mmap_sem);
@@ -810,17 +787,23 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
810 return err; 787 return err;
811} 788}
812 789
813static int __uprobe_register(struct uprobe *uprobe) 790static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc)
814{ 791{
815 return register_for_each_vma(uprobe, true); 792 consumer_add(uprobe, uc);
793 return register_for_each_vma(uprobe, uc);
816} 794}
817 795
818static void __uprobe_unregister(struct uprobe *uprobe) 796static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
819{ 797{
820 if (!register_for_each_vma(uprobe, false)) 798 int err;
821 delete_uprobe(uprobe); 799
800 if (!consumer_del(uprobe, uc)) /* WARN? */
801 return;
822 802
803 err = register_for_each_vma(uprobe, NULL);
823 /* TODO : cant unregister? schedule a worker thread */ 804 /* TODO : cant unregister? schedule a worker thread */
805 if (!uprobe->consumers && !err)
806 delete_uprobe(uprobe);
824} 807}
825 808
826/* 809/*
@@ -845,31 +828,59 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
845 struct uprobe *uprobe; 828 struct uprobe *uprobe;
846 int ret; 829 int ret;
847 830
848 if (!inode || !uc || uc->next) 831 /* Racy, just to catch the obvious mistakes */
849 return -EINVAL;
850
851 if (offset > i_size_read(inode)) 832 if (offset > i_size_read(inode))
852 return -EINVAL; 833 return -EINVAL;
853 834
854 ret = 0; 835 retry:
855 mutex_lock(uprobes_hash(inode));
856 uprobe = alloc_uprobe(inode, offset); 836 uprobe = alloc_uprobe(inode, offset);
857 837 if (!uprobe)
858 if (!uprobe) { 838 return -ENOMEM;
859 ret = -ENOMEM; 839 /*
860 } else if (!consumer_add(uprobe, uc)) { 840 * We can race with uprobe_unregister()->delete_uprobe().
861 ret = __uprobe_register(uprobe); 841 * Check uprobe_is_active() and retry if it is false.
862 if (ret) { 842 */
863 uprobe->consumers = NULL; 843 down_write(&uprobe->register_rwsem);
864 __uprobe_unregister(uprobe); 844 ret = -EAGAIN;
865 } else { 845 if (likely(uprobe_is_active(uprobe))) {
866 set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); 846 ret = __uprobe_register(uprobe, uc);
867 } 847 if (ret)
848 __uprobe_unregister(uprobe, uc);
868 } 849 }
850 up_write(&uprobe->register_rwsem);
851 put_uprobe(uprobe);
869 852
870 mutex_unlock(uprobes_hash(inode)); 853 if (unlikely(ret == -EAGAIN))
871 if (uprobe) 854 goto retry;
872 put_uprobe(uprobe); 855 return ret;
856}
857EXPORT_SYMBOL_GPL(uprobe_register);
858
859/*
860 * uprobe_apply - unregister a already registered probe.
861 * @inode: the file in which the probe has to be removed.
862 * @offset: offset from the start of the file.
863 * @uc: consumer which wants to add more or remove some breakpoints
864 * @add: add or remove the breakpoints
865 */
866int uprobe_apply(struct inode *inode, loff_t offset,
867 struct uprobe_consumer *uc, bool add)
868{
869 struct uprobe *uprobe;
870 struct uprobe_consumer *con;
871 int ret = -ENOENT;
872
873 uprobe = find_uprobe(inode, offset);
874 if (!uprobe)
875 return ret;
876
877 down_write(&uprobe->register_rwsem);
878 for (con = uprobe->consumers; con && con != uc ; con = con->next)
879 ;
880 if (con)
881 ret = register_for_each_vma(uprobe, add ? uc : NULL);
882 up_write(&uprobe->register_rwsem);
883 put_uprobe(uprobe);
873 884
874 return ret; 885 return ret;
875} 886}
@@ -884,24 +895,42 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
884{ 895{
885 struct uprobe *uprobe; 896 struct uprobe *uprobe;
886 897
887 if (!inode || !uc)
888 return;
889
890 uprobe = find_uprobe(inode, offset); 898 uprobe = find_uprobe(inode, offset);
891 if (!uprobe) 899 if (!uprobe)
892 return; 900 return;
893 901
894 mutex_lock(uprobes_hash(inode)); 902 down_write(&uprobe->register_rwsem);
903 __uprobe_unregister(uprobe, uc);
904 up_write(&uprobe->register_rwsem);
905 put_uprobe(uprobe);
906}
907EXPORT_SYMBOL_GPL(uprobe_unregister);
895 908
896 if (consumer_del(uprobe, uc)) { 909static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
897 if (!uprobe->consumers) { 910{
898 __uprobe_unregister(uprobe); 911 struct vm_area_struct *vma;
899 clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); 912 int err = 0;
900 } 913
914 down_read(&mm->mmap_sem);
915 for (vma = mm->mmap; vma; vma = vma->vm_next) {
916 unsigned long vaddr;
917 loff_t offset;
918
919 if (!valid_vma(vma, false) ||
920 vma->vm_file->f_mapping->host != uprobe->inode)
921 continue;
922
923 offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
924 if (uprobe->offset < offset ||
925 uprobe->offset >= offset + vma->vm_end - vma->vm_start)
926 continue;
927
928 vaddr = offset_to_vaddr(vma, uprobe->offset);
929 err |= remove_breakpoint(uprobe, mm, vaddr);
901 } 930 }
931 up_read(&mm->mmap_sem);
902 932
903 mutex_unlock(uprobes_hash(inode)); 933 return err;
904 put_uprobe(uprobe);
905} 934}
906 935
907static struct rb_node * 936static struct rb_node *
@@ -978,7 +1007,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
978 struct uprobe *uprobe, *u; 1007 struct uprobe *uprobe, *u;
979 struct inode *inode; 1008 struct inode *inode;
980 1009
981 if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) 1010 if (no_uprobe_events() || !valid_vma(vma, true))
982 return 0; 1011 return 0;
983 1012
984 inode = vma->vm_file->f_mapping->host; 1013 inode = vma->vm_file->f_mapping->host;
@@ -987,9 +1016,14 @@ int uprobe_mmap(struct vm_area_struct *vma)
987 1016
988 mutex_lock(uprobes_mmap_hash(inode)); 1017 mutex_lock(uprobes_mmap_hash(inode));
989 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); 1018 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
990 1019 /*
1020 * We can race with uprobe_unregister(), this uprobe can be already
1021 * removed. But in this case filter_chain() must return false, all
1022 * consumers have gone away.
1023 */
991 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { 1024 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
992 if (!fatal_signal_pending(current)) { 1025 if (!fatal_signal_pending(current) &&
1026 filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
993 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); 1027 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
994 install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); 1028 install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
995 } 1029 }
@@ -1024,7 +1058,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
1024 */ 1058 */
1025void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) 1059void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1026{ 1060{
1027 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) 1061 if (no_uprobe_events() || !valid_vma(vma, false))
1028 return; 1062 return;
1029 1063
1030 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ 1064 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
@@ -1041,22 +1075,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
1041/* Slot allocation for XOL */ 1075/* Slot allocation for XOL */
1042static int xol_add_vma(struct xol_area *area) 1076static int xol_add_vma(struct xol_area *area)
1043{ 1077{
1044 struct mm_struct *mm; 1078 struct mm_struct *mm = current->mm;
1045 int ret; 1079 int ret = -EALREADY;
1046
1047 area->page = alloc_page(GFP_HIGHUSER);
1048 if (!area->page)
1049 return -ENOMEM;
1050
1051 ret = -EALREADY;
1052 mm = current->mm;
1053 1080
1054 down_write(&mm->mmap_sem); 1081 down_write(&mm->mmap_sem);
1055 if (mm->uprobes_state.xol_area) 1082 if (mm->uprobes_state.xol_area)
1056 goto fail; 1083 goto fail;
1057 1084
1058 ret = -ENOMEM; 1085 ret = -ENOMEM;
1059
1060 /* Try to map as high as possible, this is only a hint. */ 1086 /* Try to map as high as possible, this is only a hint. */
1061 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); 1087 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
1062 if (area->vaddr & ~PAGE_MASK) { 1088 if (area->vaddr & ~PAGE_MASK) {
@@ -1072,54 +1098,53 @@ static int xol_add_vma(struct xol_area *area)
1072 smp_wmb(); /* pairs with get_xol_area() */ 1098 smp_wmb(); /* pairs with get_xol_area() */
1073 mm->uprobes_state.xol_area = area; 1099 mm->uprobes_state.xol_area = area;
1074 ret = 0; 1100 ret = 0;
1075 1101 fail:
1076fail:
1077 up_write(&mm->mmap_sem); 1102 up_write(&mm->mmap_sem);
1078 if (ret)
1079 __free_page(area->page);
1080 1103
1081 return ret; 1104 return ret;
1082} 1105}
1083 1106
1084static struct xol_area *get_xol_area(struct mm_struct *mm)
1085{
1086 struct xol_area *area;
1087
1088 area = mm->uprobes_state.xol_area;
1089 smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
1090
1091 return area;
1092}
1093
1094/* 1107/*
1095 * xol_alloc_area - Allocate process's xol_area. 1108 * get_xol_area - Allocate process's xol_area if necessary.
1096 * This area will be used for storing instructions for execution out of 1109 * This area will be used for storing instructions for execution out of line.
1097 * line.
1098 * 1110 *
1099 * Returns the allocated area or NULL. 1111 * Returns the allocated area or NULL.
1100 */ 1112 */
1101static struct xol_area *xol_alloc_area(void) 1113static struct xol_area *get_xol_area(void)
1102{ 1114{
1115 struct mm_struct *mm = current->mm;
1103 struct xol_area *area; 1116 struct xol_area *area;
1104 1117
1118 area = mm->uprobes_state.xol_area;
1119 if (area)
1120 goto ret;
1121
1105 area = kzalloc(sizeof(*area), GFP_KERNEL); 1122 area = kzalloc(sizeof(*area), GFP_KERNEL);
1106 if (unlikely(!area)) 1123 if (unlikely(!area))
1107 return NULL; 1124 goto out;
1108 1125
1109 area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); 1126 area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
1110
1111 if (!area->bitmap) 1127 if (!area->bitmap)
1112 goto fail; 1128 goto free_area;
1129
1130 area->page = alloc_page(GFP_HIGHUSER);
1131 if (!area->page)
1132 goto free_bitmap;
1113 1133
1114 init_waitqueue_head(&area->wq); 1134 init_waitqueue_head(&area->wq);
1115 if (!xol_add_vma(area)) 1135 if (!xol_add_vma(area))
1116 return area; 1136 return area;
1117 1137
1118fail: 1138 __free_page(area->page);
1139 free_bitmap:
1119 kfree(area->bitmap); 1140 kfree(area->bitmap);
1141 free_area:
1120 kfree(area); 1142 kfree(area);
1121 1143 out:
1122 return get_xol_area(current->mm); 1144 area = mm->uprobes_state.xol_area;
1145 ret:
1146 smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
1147 return area;
1123} 1148}
1124 1149
1125/* 1150/*
@@ -1185,33 +1210,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
1185} 1210}
1186 1211
1187/* 1212/*
1188 * xol_get_insn_slot - If was not allocated a slot, then 1213 * xol_get_insn_slot - allocate a slot for xol.
1189 * allocate a slot.
1190 * Returns the allocated slot address or 0. 1214 * Returns the allocated slot address or 0.
1191 */ 1215 */
1192static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr) 1216static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
1193{ 1217{
1194 struct xol_area *area; 1218 struct xol_area *area;
1195 unsigned long offset; 1219 unsigned long offset;
1220 unsigned long xol_vaddr;
1196 void *vaddr; 1221 void *vaddr;
1197 1222
1198 area = get_xol_area(current->mm); 1223 area = get_xol_area();
1199 if (!area) { 1224 if (!area)
1200 area = xol_alloc_area(); 1225 return 0;
1201 if (!area)
1202 return 0;
1203 }
1204 current->utask->xol_vaddr = xol_take_insn_slot(area);
1205 1226
1206 /* 1227 xol_vaddr = xol_take_insn_slot(area);
1207 * Initialize the slot if xol_vaddr points to valid 1228 if (unlikely(!xol_vaddr))
1208 * instruction slot.
1209 */
1210 if (unlikely(!current->utask->xol_vaddr))
1211 return 0; 1229 return 0;
1212 1230
1213 current->utask->vaddr = slot_addr; 1231 /* Initialize the slot */
1214 offset = current->utask->xol_vaddr & ~PAGE_MASK; 1232 offset = xol_vaddr & ~PAGE_MASK;
1215 vaddr = kmap_atomic(area->page); 1233 vaddr = kmap_atomic(area->page);
1216 memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); 1234 memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
1217 kunmap_atomic(vaddr); 1235 kunmap_atomic(vaddr);
@@ -1221,7 +1239,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot
1221 */ 1239 */
1222 flush_dcache_page(area->page); 1240 flush_dcache_page(area->page);
1223 1241
1224 return current->utask->xol_vaddr; 1242 return xol_vaddr;
1225} 1243}
1226 1244
1227/* 1245/*
@@ -1239,8 +1257,7 @@ static void xol_free_insn_slot(struct task_struct *tsk)
1239 return; 1257 return;
1240 1258
1241 slot_addr = tsk->utask->xol_vaddr; 1259 slot_addr = tsk->utask->xol_vaddr;
1242 1260 if (unlikely(!slot_addr))
1243 if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
1244 return; 1261 return;
1245 1262
1246 area = tsk->mm->uprobes_state.xol_area; 1263 area = tsk->mm->uprobes_state.xol_area;
@@ -1302,33 +1319,48 @@ void uprobe_copy_process(struct task_struct *t)
1302} 1319}
1303 1320
1304/* 1321/*
1305 * Allocate a uprobe_task object for the task. 1322 * Allocate a uprobe_task object for the task if if necessary.
1306 * Called when the thread hits a breakpoint for the first time. 1323 * Called when the thread hits a breakpoint.
1307 * 1324 *
1308 * Returns: 1325 * Returns:
1309 * - pointer to new uprobe_task on success 1326 * - pointer to new uprobe_task on success
1310 * - NULL otherwise 1327 * - NULL otherwise
1311 */ 1328 */
1312static struct uprobe_task *add_utask(void) 1329static struct uprobe_task *get_utask(void)
1313{ 1330{
1314 struct uprobe_task *utask; 1331 if (!current->utask)
1315 1332 current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
1316 utask = kzalloc(sizeof *utask, GFP_KERNEL); 1333 return current->utask;
1317 if (unlikely(!utask))
1318 return NULL;
1319
1320 current->utask = utask;
1321 return utask;
1322} 1334}
1323 1335
1324/* Prepare to single-step probed instruction out of line. */ 1336/* Prepare to single-step probed instruction out of line. */
1325static int 1337static int
1326pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr) 1338pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
1327{ 1339{
1328 if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs)) 1340 struct uprobe_task *utask;
1329 return 0; 1341 unsigned long xol_vaddr;
1342 int err;
1343
1344 utask = get_utask();
1345 if (!utask)
1346 return -ENOMEM;
1347
1348 xol_vaddr = xol_get_insn_slot(uprobe);
1349 if (!xol_vaddr)
1350 return -ENOMEM;
1351
1352 utask->xol_vaddr = xol_vaddr;
1353 utask->vaddr = bp_vaddr;
1354
1355 err = arch_uprobe_pre_xol(&uprobe->arch, regs);
1356 if (unlikely(err)) {
1357 xol_free_insn_slot(current);
1358 return err;
1359 }
1330 1360
1331 return -EFAULT; 1361 utask->active_uprobe = uprobe;
1362 utask->state = UTASK_SSTEP;
1363 return 0;
1332} 1364}
1333 1365
1334/* 1366/*
@@ -1390,6 +1422,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
1390 * This is not strictly accurate, we can race with 1422 * This is not strictly accurate, we can race with
1391 * uprobe_unregister() and see the already removed 1423 * uprobe_unregister() and see the already removed
1392 * uprobe if delete_uprobe() was not yet called. 1424 * uprobe if delete_uprobe() was not yet called.
1425 * Or this uprobe can be filtered out.
1393 */ 1426 */
1394 if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) 1427 if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
1395 return; 1428 return;
@@ -1451,13 +1484,33 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1451 return uprobe; 1484 return uprobe;
1452} 1485}
1453 1486
1487static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
1488{
1489 struct uprobe_consumer *uc;
1490 int remove = UPROBE_HANDLER_REMOVE;
1491
1492 down_read(&uprobe->register_rwsem);
1493 for (uc = uprobe->consumers; uc; uc = uc->next) {
1494 int rc = uc->handler(uc, regs);
1495
1496 WARN(rc & ~UPROBE_HANDLER_MASK,
1497 "bad rc=0x%x from %pf()\n", rc, uc->handler);
1498 remove &= rc;
1499 }
1500
1501 if (remove && uprobe->consumers) {
1502 WARN_ON(!uprobe_is_active(uprobe));
1503 unapply_uprobe(uprobe, current->mm);
1504 }
1505 up_read(&uprobe->register_rwsem);
1506}
1507
1454/* 1508/*
1455 * Run handler and ask thread to singlestep. 1509 * Run handler and ask thread to singlestep.
1456 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1510 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
1457 */ 1511 */
1458static void handle_swbp(struct pt_regs *regs) 1512static void handle_swbp(struct pt_regs *regs)
1459{ 1513{
1460 struct uprobe_task *utask;
1461 struct uprobe *uprobe; 1514 struct uprobe *uprobe;
1462 unsigned long bp_vaddr; 1515 unsigned long bp_vaddr;
1463 int uninitialized_var(is_swbp); 1516 int uninitialized_var(is_swbp);
@@ -1482,6 +1535,10 @@ static void handle_swbp(struct pt_regs *regs)
1482 } 1535 }
1483 return; 1536 return;
1484 } 1537 }
1538
1539 /* change it in advance for ->handler() and restart */
1540 instruction_pointer_set(regs, bp_vaddr);
1541
1485 /* 1542 /*
1486 * TODO: move copy_insn/etc into _register and remove this hack. 1543 * TODO: move copy_insn/etc into _register and remove this hack.
1487 * After we hit the bp, _unregister + _register can install the 1544 * After we hit the bp, _unregister + _register can install the
@@ -1489,32 +1546,16 @@ static void handle_swbp(struct pt_regs *regs)
1489 */ 1546 */
1490 smp_rmb(); /* pairs with wmb() in install_breakpoint() */ 1547 smp_rmb(); /* pairs with wmb() in install_breakpoint() */
1491 if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) 1548 if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
1492 goto restart; 1549 goto out;
1493
1494 utask = current->utask;
1495 if (!utask) {
1496 utask = add_utask();
1497 /* Cannot allocate; re-execute the instruction. */
1498 if (!utask)
1499 goto restart;
1500 }
1501 1550
1502 handler_chain(uprobe, regs); 1551 handler_chain(uprobe, regs);
1503 if (can_skip_sstep(uprobe, regs)) 1552 if (can_skip_sstep(uprobe, regs))
1504 goto out; 1553 goto out;
1505 1554
1506 if (!pre_ssout(uprobe, regs, bp_vaddr)) { 1555 if (!pre_ssout(uprobe, regs, bp_vaddr))
1507 utask->active_uprobe = uprobe;
1508 utask->state = UTASK_SSTEP;
1509 return; 1556 return;
1510 }
1511 1557
1512restart: 1558 /* can_skip_sstep() succeeded, or restart if can't singlestep */
1513 /*
1514 * cannot singlestep; cannot skip instruction;
1515 * re-execute the instruction.
1516 */
1517 instruction_pointer_set(regs, bp_vaddr);
1518out: 1559out:
1519 put_uprobe(uprobe); 1560 put_uprobe(uprobe);
1520} 1561}
@@ -1608,10 +1649,8 @@ static int __init init_uprobes(void)
1608{ 1649{
1609 int i; 1650 int i;
1610 1651
1611 for (i = 0; i < UPROBES_HASH_SZ; i++) { 1652 for (i = 0; i < UPROBES_HASH_SZ; i++)
1612 mutex_init(&uprobes_mutex[i]);
1613 mutex_init(&uprobes_mmap_mutex[i]); 1653 mutex_init(&uprobes_mmap_mutex[i]);
1614 }
1615 1654
1616 if (percpu_init_rwsem(&dup_mmap_sem)) 1655 if (percpu_init_rwsem(&dup_mmap_sem))
1617 return -ENOMEM; 1656 return -ENOMEM;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6cbeaae4406d..acbd28424d81 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
712 kiov->iov_len, kiov->iov_base); 712 kiov->iov_len, kiov->iov_base);
713} 713}
714 714
715/*
716 * This is declared in linux/regset.h and defined in machine-dependent
717 * code. We put the export here, near the primary machine-neutral use,
718 * to ensure no machine forgets it.
719 */
720EXPORT_SYMBOL_GPL(task_user_regset_view);
715#endif 721#endif
716 722
717int ptrace_request(struct task_struct *child, long request, 723int ptrace_request(struct task_struct *child, long request,
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 933708677814..5c7e09d10d74 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -66,7 +66,6 @@
66#define TP_FLAG_TRACE 1 66#define TP_FLAG_TRACE 1
67#define TP_FLAG_PROFILE 2 67#define TP_FLAG_PROFILE 2
68#define TP_FLAG_REGISTERED 4 68#define TP_FLAG_REGISTERED 4
69#define TP_FLAG_UPROBE 8
70 69
71 70
72/* data_rloc: data relative location, compatible with u32 */ 71/* data_rloc: data relative location, compatible with u32 */
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 87b6db4ccbc5..8dad2a92dee9 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -28,20 +28,21 @@
28 28
29#define UPROBE_EVENT_SYSTEM "uprobes" 29#define UPROBE_EVENT_SYSTEM "uprobes"
30 30
31struct trace_uprobe_filter {
32 rwlock_t rwlock;
33 int nr_systemwide;
34 struct list_head perf_events;
35};
36
31/* 37/*
32 * uprobe event core functions 38 * uprobe event core functions
33 */ 39 */
34struct trace_uprobe;
35struct uprobe_trace_consumer {
36 struct uprobe_consumer cons;
37 struct trace_uprobe *tu;
38};
39
40struct trace_uprobe { 40struct trace_uprobe {
41 struct list_head list; 41 struct list_head list;
42 struct ftrace_event_class class; 42 struct ftrace_event_class class;
43 struct ftrace_event_call call; 43 struct ftrace_event_call call;
44 struct uprobe_trace_consumer *consumer; 44 struct trace_uprobe_filter filter;
45 struct uprobe_consumer consumer;
45 struct inode *inode; 46 struct inode *inode;
46 char *filename; 47 char *filename;
47 unsigned long offset; 48 unsigned long offset;
@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list);
64 65
65static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); 66static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
66 67
68static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
69{
70 rwlock_init(&filter->rwlock);
71 filter->nr_systemwide = 0;
72 INIT_LIST_HEAD(&filter->perf_events);
73}
74
75static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
76{
77 return !filter->nr_systemwide && list_empty(&filter->perf_events);
78}
79
67/* 80/*
68 * Allocate new trace_uprobe and initialize it (including uprobes). 81 * Allocate new trace_uprobe and initialize it (including uprobes).
69 */ 82 */
@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
92 goto error; 105 goto error;
93 106
94 INIT_LIST_HEAD(&tu->list); 107 INIT_LIST_HEAD(&tu->list);
108 tu->consumer.handler = uprobe_dispatcher;
109 init_trace_uprobe_filter(&tu->filter);
95 return tu; 110 return tu;
96 111
97error: 112error:
@@ -253,16 +268,18 @@ static int create_trace_uprobe(int argc, char **argv)
253 if (ret) 268 if (ret)
254 goto fail_address_parse; 269 goto fail_address_parse;
255 270
256 ret = kstrtoul(arg, 0, &offset);
257 if (ret)
258 goto fail_address_parse;
259
260 inode = igrab(path.dentry->d_inode); 271 inode = igrab(path.dentry->d_inode);
261 if (!S_ISREG(inode->i_mode)) { 272 path_put(&path);
273
274 if (!inode || !S_ISREG(inode->i_mode)) {
262 ret = -EINVAL; 275 ret = -EINVAL;
263 goto fail_address_parse; 276 goto fail_address_parse;
264 } 277 }
265 278
279 ret = kstrtoul(arg, 0, &offset);
280 if (ret)
281 goto fail_address_parse;
282
266 argc -= 2; 283 argc -= 2;
267 argv += 2; 284 argv += 2;
268 285
@@ -469,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = {
469}; 486};
470 487
471/* uprobe handler */ 488/* uprobe handler */
472static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) 489static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
473{ 490{
474 struct uprobe_trace_entry_head *entry; 491 struct uprobe_trace_entry_head *entry;
475 struct ring_buffer_event *event; 492 struct ring_buffer_event *event;
@@ -479,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
479 unsigned long irq_flags; 496 unsigned long irq_flags;
480 struct ftrace_event_call *call = &tu->call; 497 struct ftrace_event_call *call = &tu->call;
481 498
482 tu->nhit++;
483
484 local_save_flags(irq_flags); 499 local_save_flags(irq_flags);
485 pc = preempt_count(); 500 pc = preempt_count();
486 501
@@ -489,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
489 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 504 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
490 size, irq_flags, pc); 505 size, irq_flags, pc);
491 if (!event) 506 if (!event)
492 return; 507 return 0;
493 508
494 entry = ring_buffer_event_data(event); 509 entry = ring_buffer_event_data(event);
495 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 510 entry->ip = instruction_pointer(task_pt_regs(current));
496 data = (u8 *)&entry[1]; 511 data = (u8 *)&entry[1];
497 for (i = 0; i < tu->nr_args; i++) 512 for (i = 0; i < tu->nr_args; i++)
498 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 513 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
499 514
500 if (!filter_current_check_discard(buffer, call, entry, event)) 515 if (!filter_current_check_discard(buffer, call, entry, event))
501 trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 516 trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
517
518 return 0;
502} 519}
503 520
504/* Event entry printers */ 521/* Event entry printers */
@@ -537,42 +554,43 @@ partial:
537 return TRACE_TYPE_PARTIAL_LINE; 554 return TRACE_TYPE_PARTIAL_LINE;
538} 555}
539 556
540static int probe_event_enable(struct trace_uprobe *tu, int flag) 557static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
541{ 558{
542 struct uprobe_trace_consumer *utc; 559 return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
543 int ret = 0; 560}
544 561
545 if (!tu->inode || tu->consumer) 562typedef bool (*filter_func_t)(struct uprobe_consumer *self,
546 return -EINTR; 563 enum uprobe_filter_ctx ctx,
564 struct mm_struct *mm);
565
566static int
567probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
568{
569 int ret = 0;
547 570
548 utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); 571 if (is_trace_uprobe_enabled(tu))
549 if (!utc)
550 return -EINTR; 572 return -EINTR;
551 573
552 utc->cons.handler = uprobe_dispatcher; 574 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
553 utc->cons.filter = NULL;
554 ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
555 if (ret) {
556 kfree(utc);
557 return ret;
558 }
559 575
560 tu->flags |= flag; 576 tu->flags |= flag;
561 utc->tu = tu; 577 tu->consumer.filter = filter;
562 tu->consumer = utc; 578 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
579 if (ret)
580 tu->flags &= ~flag;
563 581
564 return 0; 582 return ret;
565} 583}
566 584
567static void probe_event_disable(struct trace_uprobe *tu, int flag) 585static void probe_event_disable(struct trace_uprobe *tu, int flag)
568{ 586{
569 if (!tu->inode || !tu->consumer) 587 if (!is_trace_uprobe_enabled(tu))
570 return; 588 return;
571 589
572 uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); 590 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
591
592 uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
573 tu->flags &= ~flag; 593 tu->flags &= ~flag;
574 kfree(tu->consumer);
575 tu->consumer = NULL;
576} 594}
577 595
578static int uprobe_event_define_fields(struct ftrace_event_call *event_call) 596static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -646,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu)
646} 664}
647 665
648#ifdef CONFIG_PERF_EVENTS 666#ifdef CONFIG_PERF_EVENTS
667static bool
668__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
669{
670 struct perf_event *event;
671
672 if (filter->nr_systemwide)
673 return true;
674
675 list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
676 if (event->hw.tp_target->mm == mm)
677 return true;
678 }
679
680 return false;
681}
682
683static inline bool
684uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
685{
686 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
687}
688
689static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
690{
691 bool done;
692
693 write_lock(&tu->filter.rwlock);
694 if (event->hw.tp_target) {
695 /*
696 * event->parent != NULL means copy_process(), we can avoid
697 * uprobe_apply(). current->mm must be probed and we can rely
698 * on dup_mmap() which preserves the already installed bp's.
699 *
700 * attr.enable_on_exec means that exec/mmap will install the
701 * breakpoints we need.
702 */
703 done = tu->filter.nr_systemwide ||
704 event->parent || event->attr.enable_on_exec ||
705 uprobe_filter_event(tu, event);
706 list_add(&event->hw.tp_list, &tu->filter.perf_events);
707 } else {
708 done = tu->filter.nr_systemwide;
709 tu->filter.nr_systemwide++;
710 }
711 write_unlock(&tu->filter.rwlock);
712
713 if (!done)
714 uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
715
716 return 0;
717}
718
719static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
720{
721 bool done;
722
723 write_lock(&tu->filter.rwlock);
724 if (event->hw.tp_target) {
725 list_del(&event->hw.tp_list);
726 done = tu->filter.nr_systemwide ||
727 (event->hw.tp_target->flags & PF_EXITING) ||
728 uprobe_filter_event(tu, event);
729 } else {
730 tu->filter.nr_systemwide--;
731 done = tu->filter.nr_systemwide;
732 }
733 write_unlock(&tu->filter.rwlock);
734
735 if (!done)
736 uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
737
738 return 0;
739}
740
741static bool uprobe_perf_filter(struct uprobe_consumer *uc,
742 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
743{
744 struct trace_uprobe *tu;
745 int ret;
746
747 tu = container_of(uc, struct trace_uprobe, consumer);
748 read_lock(&tu->filter.rwlock);
749 ret = __uprobe_perf_filter(&tu->filter, mm);
750 read_unlock(&tu->filter.rwlock);
751
752 return ret;
753}
754
649/* uprobe profile handler */ 755/* uprobe profile handler */
650static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) 756static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
651{ 757{
652 struct ftrace_event_call *call = &tu->call; 758 struct ftrace_event_call *call = &tu->call;
653 struct uprobe_trace_entry_head *entry; 759 struct uprobe_trace_entry_head *entry;
@@ -656,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
656 int size, __size, i; 762 int size, __size, i;
657 int rctx; 763 int rctx;
658 764
765 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
766 return UPROBE_HANDLER_REMOVE;
767
659 __size = sizeof(*entry) + tu->size; 768 __size = sizeof(*entry) + tu->size;
660 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 769 size = ALIGN(__size + sizeof(u32), sizeof(u64));
661 size -= sizeof(u32); 770 size -= sizeof(u32);
662 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 771 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
663 return; 772 return 0;
664 773
665 preempt_disable(); 774 preempt_disable();
666 775
@@ -668,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
668 if (!entry) 777 if (!entry)
669 goto out; 778 goto out;
670 779
671 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 780 entry->ip = instruction_pointer(task_pt_regs(current));
672 data = (u8 *)&entry[1]; 781 data = (u8 *)&entry[1];
673 for (i = 0; i < tu->nr_args; i++) 782 for (i = 0; i < tu->nr_args; i++)
674 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 783 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
@@ -678,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
678 787
679 out: 788 out:
680 preempt_enable(); 789 preempt_enable();
790 return 0;
681} 791}
682#endif /* CONFIG_PERF_EVENTS */ 792#endif /* CONFIG_PERF_EVENTS */
683 793
@@ -688,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
688 798
689 switch (type) { 799 switch (type) {
690 case TRACE_REG_REGISTER: 800 case TRACE_REG_REGISTER:
691 return probe_event_enable(tu, TP_FLAG_TRACE); 801 return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
692 802
693 case TRACE_REG_UNREGISTER: 803 case TRACE_REG_UNREGISTER:
694 probe_event_disable(tu, TP_FLAG_TRACE); 804 probe_event_disable(tu, TP_FLAG_TRACE);
@@ -696,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
696 806
697#ifdef CONFIG_PERF_EVENTS 807#ifdef CONFIG_PERF_EVENTS
698 case TRACE_REG_PERF_REGISTER: 808 case TRACE_REG_PERF_REGISTER:
699 return probe_event_enable(tu, TP_FLAG_PROFILE); 809 return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
700 810
701 case TRACE_REG_PERF_UNREGISTER: 811 case TRACE_REG_PERF_UNREGISTER:
702 probe_event_disable(tu, TP_FLAG_PROFILE); 812 probe_event_disable(tu, TP_FLAG_PROFILE);
703 return 0; 813 return 0;
814
815 case TRACE_REG_PERF_OPEN:
816 return uprobe_perf_open(tu, data);
817
818 case TRACE_REG_PERF_CLOSE:
819 return uprobe_perf_close(tu, data);
820
704#endif 821#endif
705 default: 822 default:
706 return 0; 823 return 0;
@@ -710,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
710 827
711static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) 828static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
712{ 829{
713 struct uprobe_trace_consumer *utc;
714 struct trace_uprobe *tu; 830 struct trace_uprobe *tu;
831 int ret = 0;
715 832
716 utc = container_of(con, struct uprobe_trace_consumer, cons); 833 tu = container_of(con, struct trace_uprobe, consumer);
717 tu = utc->tu; 834 tu->nhit++;
718 if (!tu || tu->consumer != utc)
719 return 0;
720 835
721 if (tu->flags & TP_FLAG_TRACE) 836 if (tu->flags & TP_FLAG_TRACE)
722 uprobe_trace_func(tu, regs); 837 ret |= uprobe_trace_func(tu, regs);
723 838
724#ifdef CONFIG_PERF_EVENTS 839#ifdef CONFIG_PERF_EVENTS
725 if (tu->flags & TP_FLAG_PROFILE) 840 if (tu->flags & TP_FLAG_PROFILE)
726 uprobe_perf_func(tu, regs); 841 ret |= uprobe_perf_func(tu, regs);
727#endif 842#endif
728 return 0; 843 return ret;
729} 844}
730 845
731static struct trace_event_functions uprobe_funcs = { 846static struct trace_event_functions uprobe_funcs = {