aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/uprobes.c
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2012-06-15 11:43:33 -0400
committerIngo Molnar <mingo@kernel.org>2012-06-16 03:10:43 -0400
commit268720903f87e0b84b161626c4447b81671b5d18 (patch)
tree295b7e51c3e59a9f1baeac80dda132b0d6011b44 /kernel/events/uprobes.c
parentc1914a0936f79ed0236f670122e06e36e4d332ee (diff)
uprobes: Rework register_for_each_vma() to make it O(n)
Currently register_for_each_vma() is O(n ** 2) + O(n ** 3), every time find_next_vma_info() "restarts" the vma_prio_tree_foreach() loop and each iteration rechecks the whole try_list. This also means that try_list can grow "indefinitely" if register/unregister races with munmap/mmap activity even if the number of mapping is bounded at any time. With this patch register_for_each_vma() builds the list of mm/vaddr structures only once and does install_breakpoint() for each entry. We do not care about the new mappings which can be created after build_map_info() drops mapping->i_mmap_mutex, uprobe_mmap() should do its work. Note that we do not allocate map_info under i_mmap_mutex, this can deadlock with page reclaim (but see the next patch). So we use 2 lists, "curr" which we are going to return, and "prev" which holds the already allocated memory. The main loop deques the entry from "prev" (initially it is empty), and if "prev" becomes empty again it counts the number of entries we need to pre-allocate outside of i_mmap_mutex. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Anton Arapov <anton@redhat.com> Link: http://lkml.kernel.org/r/20120615154333.GA9581@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events/uprobes.c')
-rw-r--r--kernel/events/uprobes.c199
1 files changed, 86 insertions, 113 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ec78152e32e9..4e0db3496d70 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -60,17 +60,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
60 */ 60 */
61static atomic_t uprobe_events = ATOMIC_INIT(0); 61static atomic_t uprobe_events = ATOMIC_INIT(0);
62 62
63/*
64 * Maintain a temporary per vma info that can be used to search if a vma
65 * has already been handled. This structure is introduced since extending
66 * vm_area_struct wasnt recommended.
67 */
68struct vma_info {
69 struct list_head probe_list;
70 struct mm_struct *mm;
71 loff_t vaddr;
72};
73
74struct uprobe { 63struct uprobe {
75 struct rb_node rb_node; /* node in the rb tree */ 64 struct rb_node rb_node; /* node in the rb tree */
76 atomic_t ref; 65 atomic_t ref;
@@ -742,139 +731,123 @@ static void delete_uprobe(struct uprobe *uprobe)
742 atomic_dec(&uprobe_events); 731 atomic_dec(&uprobe_events);
743} 732}
744 733
745static struct vma_info * 734struct map_info {
746__find_next_vma_info(struct address_space *mapping, struct list_head *head, 735 struct map_info *next;
747 struct vma_info *vi, loff_t offset, bool is_register) 736 struct mm_struct *mm;
737 loff_t vaddr;
738};
739
740static inline struct map_info *free_map_info(struct map_info *info)
748{ 741{
742 struct map_info *next = info->next;
743 kfree(info);
744 return next;
745}
746
747static struct map_info *
748build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
749{
750 unsigned long pgoff = offset >> PAGE_SHIFT;
749 struct prio_tree_iter iter; 751 struct prio_tree_iter iter;
750 struct vm_area_struct *vma; 752 struct vm_area_struct *vma;
751 struct vma_info *tmpvi; 753 struct map_info *curr = NULL;
752 unsigned long pgoff; 754 struct map_info *prev = NULL;
753 int existing_vma; 755 struct map_info *info;
754 loff_t vaddr; 756 int more = 0;
755
756 pgoff = offset >> PAGE_SHIFT;
757 757
758 again:
759 mutex_lock(&mapping->i_mmap_mutex);
758 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 760 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
759 if (!valid_vma(vma, is_register)) 761 if (!valid_vma(vma, is_register))
760 continue; 762 continue;
761 763
762 existing_vma = 0; 764 if (!prev) {
763 vaddr = vma_address(vma, offset); 765 more++;
764 766 continue;
765 list_for_each_entry(tmpvi, head, probe_list) {
766 if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
767 existing_vma = 1;
768 break;
769 }
770 }
771
772 /*
773 * Another vma needs a probe to be installed. However skip
774 * installing the probe if the vma is about to be unlinked.
775 */
776 if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
777 vi->mm = vma->vm_mm;
778 vi->vaddr = vaddr;
779 list_add(&vi->probe_list, head);
780
781 return vi;
782 } 767 }
783 }
784
785 return NULL;
786}
787 768
788/* 769 if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
789 * Iterate in the rmap prio tree and find a vma where a probe has not 770 continue;
790 * yet been inserted.
791 */
792static struct vma_info *
793find_next_vma_info(struct address_space *mapping, struct list_head *head,
794 loff_t offset, bool is_register)
795{
796 struct vma_info *vi, *retvi;
797 771
798 vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); 772 info = prev;
799 if (!vi) 773 prev = prev->next;
800 return ERR_PTR(-ENOMEM); 774 info->next = curr;
775 curr = info;
801 776
802 mutex_lock(&mapping->i_mmap_mutex); 777 info->mm = vma->vm_mm;
803 retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); 778 info->vaddr = vma_address(vma, offset);
779 }
804 mutex_unlock(&mapping->i_mmap_mutex); 780 mutex_unlock(&mapping->i_mmap_mutex);
805 781
806 if (!retvi) 782 if (!more)
807 kfree(vi); 783 goto out;
784
785 prev = curr;
786 while (curr) {
787 mmput(curr->mm);
788 curr = curr->next;
789 }
808 790
809 return retvi; 791 do {
792 info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
793 if (!info) {
794 curr = ERR_PTR(-ENOMEM);
795 goto out;
796 }
797 info->next = prev;
798 prev = info;
799 } while (--more);
800
801 goto again;
802 out:
803 while (prev)
804 prev = free_map_info(prev);
805 return curr;
810} 806}
811 807
812static int register_for_each_vma(struct uprobe *uprobe, bool is_register) 808static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
813{ 809{
814 struct list_head try_list; 810 struct map_info *info;
815 struct vm_area_struct *vma; 811 int err = 0;
816 struct address_space *mapping;
817 struct vma_info *vi, *tmpvi;
818 struct mm_struct *mm;
819 loff_t vaddr;
820 int ret;
821 812
822 mapping = uprobe->inode->i_mapping; 813 info = build_map_info(uprobe->inode->i_mapping,
823 INIT_LIST_HEAD(&try_list); 814 uprobe->offset, is_register);
824 815 if (IS_ERR(info))
825 ret = 0; 816 return PTR_ERR(info);
826 817
827 for (;;) { 818 while (info) {
828 vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); 819 struct mm_struct *mm = info->mm;
829 if (!vi) 820 struct vm_area_struct *vma;
830 break; 821 loff_t vaddr;
831 822
832 if (IS_ERR(vi)) { 823 if (err)
833 ret = PTR_ERR(vi); 824 goto free;
834 break;
835 }
836 825
837 mm = vi->mm;
838 down_write(&mm->mmap_sem); 826 down_write(&mm->mmap_sem);
839 vma = find_vma(mm, (unsigned long)vi->vaddr); 827 vma = find_vma(mm, (unsigned long)info->vaddr);
840 if (!vma || !valid_vma(vma, is_register)) { 828 if (!vma || !valid_vma(vma, is_register))
841 list_del(&vi->probe_list); 829 goto unlock;
842 kfree(vi); 830
843 up_write(&mm->mmap_sem);
844 mmput(mm);
845 continue;
846 }
847 vaddr = vma_address(vma, uprobe->offset); 831 vaddr = vma_address(vma, uprobe->offset);
848 if (vma->vm_file->f_mapping->host != uprobe->inode || 832 if (vma->vm_file->f_mapping->host != uprobe->inode ||
849 vaddr != vi->vaddr) { 833 vaddr != info->vaddr)
850 list_del(&vi->probe_list); 834 goto unlock;
851 kfree(vi);
852 up_write(&mm->mmap_sem);
853 mmput(mm);
854 continue;
855 }
856 835
857 if (is_register)
858 ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
859 else
860 remove_breakpoint(uprobe, mm, vi->vaddr);
861
862 up_write(&mm->mmap_sem);
863 mmput(mm);
864 if (is_register) { 836 if (is_register) {
865 if (ret && ret == -EEXIST) 837 err = install_breakpoint(uprobe, mm, vma, info->vaddr);
866 ret = 0; 838 if (err == -EEXIST)
867 if (ret) 839 err = 0;
868 break; 840 } else {
841 remove_breakpoint(uprobe, mm, info->vaddr);
869 } 842 }
843 unlock:
844 up_write(&mm->mmap_sem);
845 free:
846 mmput(mm);
847 info = free_map_info(info);
870 } 848 }
871 849
872 list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { 850 return err;
873 list_del(&vi->probe_list);
874 kfree(vi);
875 }
876
877 return ret;
878} 851}
879 852
880static int __uprobe_register(struct uprobe *uprobe) 853static int __uprobe_register(struct uprobe *uprobe)