diff options
author | Srikar Dronamraju <srikar@linux.vnet.ibm.com> | 2012-03-30 14:26:46 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-03-31 05:50:02 -0400 |
commit | 682968e0c425c60f0dde37977e5beb2b12ddc4cc (patch) | |
tree | 875eb289aa0e08189b2cf7dc99ee522f91e647e7 /kernel | |
parent | d4b3b6384f98f8692ad0209891ccdbc7e78bbefe (diff) |
uprobes/core: Optimize probe hits with the help of a counter
Maintain a per-mm counter: number of uprobes that are inserted
on this process address space.
This counter can be used at probe hit time to determine if we
need a lookup in the uprobes rbtree. Everytime a probe gets
inserted successfully, the probe count is incremented and
everytime a probe gets removed, the probe count is decremented.
The new uprobe_munmap hook ensures the count is correct on a
unmap or remap of a region. We expect that once a
uprobe_munmap() is called, the vma goes away. So
uprobe_unregister() finding a probe to unregister would either
mean unmap event hasnt occurred yet or a mmap event on the same
executable file occured after a unmap event.
Additionally, uprobe_mmap hook now also gets called:
a. on every executable vma that is COWed at fork.
b. a vma of interest is newly mapped; breakpoint insertion also
happens at the required address.
On process creation, make sure the probes count in the child is
set correctly.
Special cases that are taken care include:
a. mremap
b. VM_DONTCOPY vmas on fork()
c. insertion/removal races in the parent during fork().
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@linux.vnet.ibm.com>
Cc: Linux-mm <linux-mm@kvack.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120330182646.10018.85805.sendpatchset@srdronam.in.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/uprobes.c | 119 | ||||
-rw-r--r-- | kernel/fork.c | 3 |
2 files changed, 114 insertions, 8 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b395edb97f53..29e881b0137d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -642,6 +642,29 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | |||
642 | return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); | 642 | return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); |
643 | } | 643 | } |
644 | 644 | ||
645 | /* | ||
646 | * How mm->uprobes_state.count gets updated | ||
647 | * uprobe_mmap() increments the count if | ||
648 | * - it successfully adds a breakpoint. | ||
649 | * - it cannot add a breakpoint, but sees that there is a underlying | ||
650 | * breakpoint (via a is_swbp_at_addr()). | ||
651 | * | ||
652 | * uprobe_munmap() decrements the count if | ||
653 | * - it sees a underlying breakpoint, (via is_swbp_at_addr) | ||
654 | * (Subsequent uprobe_unregister wouldnt find the breakpoint | ||
655 | * unless a uprobe_mmap kicks in, since the old vma would be | ||
656 | * dropped just after uprobe_munmap.) | ||
657 | * | ||
658 | * uprobe_register increments the count if: | ||
659 | * - it successfully adds a breakpoint. | ||
660 | * | ||
661 | * uprobe_unregister decrements the count if: | ||
662 | * - it sees a underlying breakpoint and removes successfully. | ||
663 | * (via is_swbp_at_addr) | ||
664 | * (Subsequent uprobe_munmap wouldnt find the breakpoint | ||
665 | * since there is no underlying breakpoint after the | ||
666 | * breakpoint removal.) | ||
667 | */ | ||
645 | static int | 668 | static int |
646 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | 669 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, |
647 | struct vm_area_struct *vma, loff_t vaddr) | 670 | struct vm_area_struct *vma, loff_t vaddr) |
@@ -675,7 +698,19 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
675 | 698 | ||
676 | uprobe->flags |= UPROBE_COPY_INSN; | 699 | uprobe->flags |= UPROBE_COPY_INSN; |
677 | } | 700 | } |
701 | |||
702 | /* | ||
703 | * Ideally, should be updating the probe count after the breakpoint | ||
704 | * has been successfully inserted. However a thread could hit the | ||
705 | * breakpoint we just inserted even before the probe count is | ||
706 | * incremented. If this is the first breakpoint placed, breakpoint | ||
707 | * notifier might ignore uprobes and pass the trap to the thread. | ||
708 | * Hence increment before and decrement on failure. | ||
709 | */ | ||
710 | atomic_inc(&mm->uprobes_state.count); | ||
678 | ret = set_swbp(&uprobe->arch, mm, addr); | 711 | ret = set_swbp(&uprobe->arch, mm, addr); |
712 | if (ret) | ||
713 | atomic_dec(&mm->uprobes_state.count); | ||
679 | 714 | ||
680 | return ret; | 715 | return ret; |
681 | } | 716 | } |
@@ -683,7 +718,8 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
683 | static void | 718 | static void |
684 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) | 719 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) |
685 | { | 720 | { |
686 | set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true); | 721 | if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) |
722 | atomic_dec(&mm->uprobes_state.count); | ||
687 | } | 723 | } |
688 | 724 | ||
689 | /* | 725 | /* |
@@ -1009,7 +1045,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1009 | struct list_head tmp_list; | 1045 | struct list_head tmp_list; |
1010 | struct uprobe *uprobe, *u; | 1046 | struct uprobe *uprobe, *u; |
1011 | struct inode *inode; | 1047 | struct inode *inode; |
1012 | int ret; | 1048 | int ret, count; |
1013 | 1049 | ||
1014 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) | 1050 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) |
1015 | return 0; | 1051 | return 0; |
@@ -1023,6 +1059,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1023 | build_probe_list(inode, &tmp_list); | 1059 | build_probe_list(inode, &tmp_list); |
1024 | 1060 | ||
1025 | ret = 0; | 1061 | ret = 0; |
1062 | count = 0; | ||
1026 | 1063 | ||
1027 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | 1064 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { |
1028 | loff_t vaddr; | 1065 | loff_t vaddr; |
@@ -1030,21 +1067,85 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1030 | list_del(&uprobe->pending_list); | 1067 | list_del(&uprobe->pending_list); |
1031 | if (!ret) { | 1068 | if (!ret) { |
1032 | vaddr = vma_address(vma, uprobe->offset); | 1069 | vaddr = vma_address(vma, uprobe->offset); |
1033 | if (vaddr >= vma->vm_start && vaddr < vma->vm_end) { | 1070 | |
1034 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); | 1071 | if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { |
1035 | /* Ignore double add: */ | 1072 | put_uprobe(uprobe); |
1036 | if (ret == -EEXIST) | 1073 | continue; |
1037 | ret = 0; | ||
1038 | } | 1074 | } |
1075 | |||
1076 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); | ||
1077 | |||
1078 | /* Ignore double add: */ | ||
1079 | if (ret == -EEXIST) { | ||
1080 | ret = 0; | ||
1081 | |||
1082 | if (!is_swbp_at_addr(vma->vm_mm, vaddr)) | ||
1083 | continue; | ||
1084 | |||
1085 | /* | ||
1086 | * Unable to insert a breakpoint, but | ||
1087 | * breakpoint lies underneath. Increment the | ||
1088 | * probe count. | ||
1089 | */ | ||
1090 | atomic_inc(&vma->vm_mm->uprobes_state.count); | ||
1091 | } | ||
1092 | |||
1093 | if (!ret) | ||
1094 | count++; | ||
1039 | } | 1095 | } |
1040 | put_uprobe(uprobe); | 1096 | put_uprobe(uprobe); |
1041 | } | 1097 | } |
1042 | 1098 | ||
1043 | mutex_unlock(uprobes_mmap_hash(inode)); | 1099 | mutex_unlock(uprobes_mmap_hash(inode)); |
1044 | 1100 | ||
1101 | if (ret) | ||
1102 | atomic_sub(count, &vma->vm_mm->uprobes_state.count); | ||
1103 | |||
1045 | return ret; | 1104 | return ret; |
1046 | } | 1105 | } |
1047 | 1106 | ||
1107 | /* | ||
1108 | * Called in context of a munmap of a vma. | ||
1109 | */ | ||
1110 | void uprobe_munmap(struct vm_area_struct *vma) | ||
1111 | { | ||
1112 | struct list_head tmp_list; | ||
1113 | struct uprobe *uprobe, *u; | ||
1114 | struct inode *inode; | ||
1115 | |||
1116 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) | ||
1117 | return; | ||
1118 | |||
1119 | if (!atomic_read(&vma->vm_mm->uprobes_state.count)) | ||
1120 | return; | ||
1121 | |||
1122 | inode = vma->vm_file->f_mapping->host; | ||
1123 | if (!inode) | ||
1124 | return; | ||
1125 | |||
1126 | INIT_LIST_HEAD(&tmp_list); | ||
1127 | mutex_lock(uprobes_mmap_hash(inode)); | ||
1128 | build_probe_list(inode, &tmp_list); | ||
1129 | |||
1130 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | ||
1131 | loff_t vaddr; | ||
1132 | |||
1133 | list_del(&uprobe->pending_list); | ||
1134 | vaddr = vma_address(vma, uprobe->offset); | ||
1135 | |||
1136 | if (vaddr >= vma->vm_start && vaddr < vma->vm_end) { | ||
1137 | /* | ||
1138 | * An unregister could have removed the probe before | ||
1139 | * unmap. So check before we decrement the count. | ||
1140 | */ | ||
1141 | if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) | ||
1142 | atomic_dec(&vma->vm_mm->uprobes_state.count); | ||
1143 | } | ||
1144 | put_uprobe(uprobe); | ||
1145 | } | ||
1146 | mutex_unlock(uprobes_mmap_hash(inode)); | ||
1147 | } | ||
1148 | |||
1048 | /* Slot allocation for XOL */ | 1149 | /* Slot allocation for XOL */ |
1049 | static int xol_add_vma(struct xol_area *area) | 1150 | static int xol_add_vma(struct xol_area *area) |
1050 | { | 1151 | { |
@@ -1150,6 +1251,7 @@ void uprobe_clear_state(struct mm_struct *mm) | |||
1150 | void uprobe_reset_state(struct mm_struct *mm) | 1251 | void uprobe_reset_state(struct mm_struct *mm) |
1151 | { | 1252 | { |
1152 | mm->uprobes_state.xol_area = NULL; | 1253 | mm->uprobes_state.xol_area = NULL; |
1254 | atomic_set(&mm->uprobes_state.count, 0); | ||
1153 | } | 1255 | } |
1154 | 1256 | ||
1155 | /* | 1257 | /* |
@@ -1504,7 +1606,8 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) | |||
1504 | { | 1606 | { |
1505 | struct uprobe_task *utask; | 1607 | struct uprobe_task *utask; |
1506 | 1608 | ||
1507 | if (!current->mm) | 1609 | if (!current->mm || !atomic_read(¤t->mm->uprobes_state.count)) |
1610 | /* task is currently not uprobed */ | ||
1508 | return 0; | 1611 | return 0; |
1509 | 1612 | ||
1510 | utask = current->utask; | 1613 | utask = current->utask; |
diff --git a/kernel/fork.c b/kernel/fork.c index 3133b9da59d5..26a8f5c25805 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -421,6 +421,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
421 | 421 | ||
422 | if (retval) | 422 | if (retval) |
423 | goto out; | 423 | goto out; |
424 | |||
425 | if (file && uprobe_mmap(tmp)) | ||
426 | goto out; | ||
424 | } | 427 | } |
425 | /* a new mm has just been created */ | 428 | /* a new mm has just been created */ |
426 | arch_dup_mmap(oldmm, mm); | 429 | arch_dup_mmap(oldmm, mm); |