diff options
author | Oleg Nesterov <oleg@redhat.com> | 2012-06-15 11:43:33 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-06-16 03:10:43 -0400 |
commit | 268720903f87e0b84b161626c4447b81671b5d18 (patch) | |
tree | 295b7e51c3e59a9f1baeac80dda132b0d6011b44 /kernel/events/uprobes.c | |
parent | c1914a0936f79ed0236f670122e06e36e4d332ee (diff) |
uprobes: Rework register_for_each_vma() to make it O(n)
Currently register_for_each_vma() is O(n ** 2) + O(n ** 3),
every time find_next_vma_info() "restarts" the
vma_prio_tree_foreach() loop and each iteration rechecks the
whole try_list. This also means that try_list can grow
"indefinitely" if register/unregister races with munmap/mmap
activity even if the number of mapping is bounded at any time.
With this patch register_for_each_vma() builds the list of
mm/vaddr structures only once and does install_breakpoint() for
each entry.
We do not care about the new mappings which can be created after
build_map_info() drops mapping->i_mmap_mutex, uprobe_mmap()
should do its work.
Note that we do not allocate map_info under i_mmap_mutex, this
can deadlock with page reclaim (but see the next patch). So we
use 2 lists, "curr" which we are going to return, and "prev"
which holds the already allocated memory. The main loop deques
the entry from "prev" (initially it is empty), and if "prev"
becomes empty again it counts the number of entries we need to
pre-allocate outside of i_mmap_mutex.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Link: http://lkml.kernel.org/r/20120615154333.GA9581@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events/uprobes.c')
-rw-r--r-- | kernel/events/uprobes.c | 199 |
1 files changed, 86 insertions, 113 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ec78152e32e9..4e0db3496d70 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -60,17 +60,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; | |||
60 | */ | 60 | */ |
61 | static atomic_t uprobe_events = ATOMIC_INIT(0); | 61 | static atomic_t uprobe_events = ATOMIC_INIT(0); |
62 | 62 | ||
63 | /* | ||
64 | * Maintain a temporary per vma info that can be used to search if a vma | ||
65 | * has already been handled. This structure is introduced since extending | ||
66 | * vm_area_struct wasnt recommended. | ||
67 | */ | ||
68 | struct vma_info { | ||
69 | struct list_head probe_list; | ||
70 | struct mm_struct *mm; | ||
71 | loff_t vaddr; | ||
72 | }; | ||
73 | |||
74 | struct uprobe { | 63 | struct uprobe { |
75 | struct rb_node rb_node; /* node in the rb tree */ | 64 | struct rb_node rb_node; /* node in the rb tree */ |
76 | atomic_t ref; | 65 | atomic_t ref; |
@@ -742,139 +731,123 @@ static void delete_uprobe(struct uprobe *uprobe) | |||
742 | atomic_dec(&uprobe_events); | 731 | atomic_dec(&uprobe_events); |
743 | } | 732 | } |
744 | 733 | ||
745 | static struct vma_info * | 734 | struct map_info { |
746 | __find_next_vma_info(struct address_space *mapping, struct list_head *head, | 735 | struct map_info *next; |
747 | struct vma_info *vi, loff_t offset, bool is_register) | 736 | struct mm_struct *mm; |
737 | loff_t vaddr; | ||
738 | }; | ||
739 | |||
740 | static inline struct map_info *free_map_info(struct map_info *info) | ||
748 | { | 741 | { |
742 | struct map_info *next = info->next; | ||
743 | kfree(info); | ||
744 | return next; | ||
745 | } | ||
746 | |||
747 | static struct map_info * | ||
748 | build_map_info(struct address_space *mapping, loff_t offset, bool is_register) | ||
749 | { | ||
750 | unsigned long pgoff = offset >> PAGE_SHIFT; | ||
749 | struct prio_tree_iter iter; | 751 | struct prio_tree_iter iter; |
750 | struct vm_area_struct *vma; | 752 | struct vm_area_struct *vma; |
751 | struct vma_info *tmpvi; | 753 | struct map_info *curr = NULL; |
752 | unsigned long pgoff; | 754 | struct map_info *prev = NULL; |
753 | int existing_vma; | 755 | struct map_info *info; |
754 | loff_t vaddr; | 756 | int more = 0; |
755 | |||
756 | pgoff = offset >> PAGE_SHIFT; | ||
757 | 757 | ||
758 | again: | ||
759 | mutex_lock(&mapping->i_mmap_mutex); | ||
758 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 760 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
759 | if (!valid_vma(vma, is_register)) | 761 | if (!valid_vma(vma, is_register)) |
760 | continue; | 762 | continue; |
761 | 763 | ||
762 | existing_vma = 0; | 764 | if (!prev) { |
763 | vaddr = vma_address(vma, offset); | 765 | more++; |
764 | 766 | continue; | |
765 | list_for_each_entry(tmpvi, head, probe_list) { | ||
766 | if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { | ||
767 | existing_vma = 1; | ||
768 | break; | ||
769 | } | ||
770 | } | ||
771 | |||
772 | /* | ||
773 | * Another vma needs a probe to be installed. However skip | ||
774 | * installing the probe if the vma is about to be unlinked. | ||
775 | */ | ||
776 | if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { | ||
777 | vi->mm = vma->vm_mm; | ||
778 | vi->vaddr = vaddr; | ||
779 | list_add(&vi->probe_list, head); | ||
780 | |||
781 | return vi; | ||
782 | } | 767 | } |
783 | } | ||
784 | |||
785 | return NULL; | ||
786 | } | ||
787 | 768 | ||
788 | /* | 769 | if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) |
789 | * Iterate in the rmap prio tree and find a vma where a probe has not | 770 | continue; |
790 | * yet been inserted. | ||
791 | */ | ||
792 | static struct vma_info * | ||
793 | find_next_vma_info(struct address_space *mapping, struct list_head *head, | ||
794 | loff_t offset, bool is_register) | ||
795 | { | ||
796 | struct vma_info *vi, *retvi; | ||
797 | 771 | ||
798 | vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); | 772 | info = prev; |
799 | if (!vi) | 773 | prev = prev->next; |
800 | return ERR_PTR(-ENOMEM); | 774 | info->next = curr; |
775 | curr = info; | ||
801 | 776 | ||
802 | mutex_lock(&mapping->i_mmap_mutex); | 777 | info->mm = vma->vm_mm; |
803 | retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); | 778 | info->vaddr = vma_address(vma, offset); |
779 | } | ||
804 | mutex_unlock(&mapping->i_mmap_mutex); | 780 | mutex_unlock(&mapping->i_mmap_mutex); |
805 | 781 | ||
806 | if (!retvi) | 782 | if (!more) |
807 | kfree(vi); | 783 | goto out; |
784 | |||
785 | prev = curr; | ||
786 | while (curr) { | ||
787 | mmput(curr->mm); | ||
788 | curr = curr->next; | ||
789 | } | ||
808 | 790 | ||
809 | return retvi; | 791 | do { |
792 | info = kmalloc(sizeof(struct map_info), GFP_KERNEL); | ||
793 | if (!info) { | ||
794 | curr = ERR_PTR(-ENOMEM); | ||
795 | goto out; | ||
796 | } | ||
797 | info->next = prev; | ||
798 | prev = info; | ||
799 | } while (--more); | ||
800 | |||
801 | goto again; | ||
802 | out: | ||
803 | while (prev) | ||
804 | prev = free_map_info(prev); | ||
805 | return curr; | ||
810 | } | 806 | } |
811 | 807 | ||
812 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | 808 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) |
813 | { | 809 | { |
814 | struct list_head try_list; | 810 | struct map_info *info; |
815 | struct vm_area_struct *vma; | 811 | int err = 0; |
816 | struct address_space *mapping; | ||
817 | struct vma_info *vi, *tmpvi; | ||
818 | struct mm_struct *mm; | ||
819 | loff_t vaddr; | ||
820 | int ret; | ||
821 | 812 | ||
822 | mapping = uprobe->inode->i_mapping; | 813 | info = build_map_info(uprobe->inode->i_mapping, |
823 | INIT_LIST_HEAD(&try_list); | 814 | uprobe->offset, is_register); |
824 | 815 | if (IS_ERR(info)) | |
825 | ret = 0; | 816 | return PTR_ERR(info); |
826 | 817 | ||
827 | for (;;) { | 818 | while (info) { |
828 | vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); | 819 | struct mm_struct *mm = info->mm; |
829 | if (!vi) | 820 | struct vm_area_struct *vma; |
830 | break; | 821 | loff_t vaddr; |
831 | 822 | ||
832 | if (IS_ERR(vi)) { | 823 | if (err) |
833 | ret = PTR_ERR(vi); | 824 | goto free; |
834 | break; | ||
835 | } | ||
836 | 825 | ||
837 | mm = vi->mm; | ||
838 | down_write(&mm->mmap_sem); | 826 | down_write(&mm->mmap_sem); |
839 | vma = find_vma(mm, (unsigned long)vi->vaddr); | 827 | vma = find_vma(mm, (unsigned long)info->vaddr); |
840 | if (!vma || !valid_vma(vma, is_register)) { | 828 | if (!vma || !valid_vma(vma, is_register)) |
841 | list_del(&vi->probe_list); | 829 | goto unlock; |
842 | kfree(vi); | 830 | |
843 | up_write(&mm->mmap_sem); | ||
844 | mmput(mm); | ||
845 | continue; | ||
846 | } | ||
847 | vaddr = vma_address(vma, uprobe->offset); | 831 | vaddr = vma_address(vma, uprobe->offset); |
848 | if (vma->vm_file->f_mapping->host != uprobe->inode || | 832 | if (vma->vm_file->f_mapping->host != uprobe->inode || |
849 | vaddr != vi->vaddr) { | 833 | vaddr != info->vaddr) |
850 | list_del(&vi->probe_list); | 834 | goto unlock; |
851 | kfree(vi); | ||
852 | up_write(&mm->mmap_sem); | ||
853 | mmput(mm); | ||
854 | continue; | ||
855 | } | ||
856 | 835 | ||
857 | if (is_register) | ||
858 | ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); | ||
859 | else | ||
860 | remove_breakpoint(uprobe, mm, vi->vaddr); | ||
861 | |||
862 | up_write(&mm->mmap_sem); | ||
863 | mmput(mm); | ||
864 | if (is_register) { | 836 | if (is_register) { |
865 | if (ret && ret == -EEXIST) | 837 | err = install_breakpoint(uprobe, mm, vma, info->vaddr); |
866 | ret = 0; | 838 | if (err == -EEXIST) |
867 | if (ret) | 839 | err = 0; |
868 | break; | 840 | } else { |
841 | remove_breakpoint(uprobe, mm, info->vaddr); | ||
869 | } | 842 | } |
843 | unlock: | ||
844 | up_write(&mm->mmap_sem); | ||
845 | free: | ||
846 | mmput(mm); | ||
847 | info = free_map_info(info); | ||
870 | } | 848 | } |
871 | 849 | ||
872 | list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { | 850 | return err; |
873 | list_del(&vi->probe_list); | ||
874 | kfree(vi); | ||
875 | } | ||
876 | |||
877 | return ret; | ||
878 | } | 851 | } |
879 | 852 | ||
880 | static int __uprobe_register(struct uprobe *uprobe) | 853 | static int __uprobe_register(struct uprobe *uprobe) |