diff options
| author | Oleg Nesterov <oleg@redhat.com> | 2012-11-25 16:48:37 -0500 |
|---|---|---|
| committer | Oleg Nesterov <oleg@redhat.com> | 2013-02-08 11:47:10 -0500 |
| commit | 66d06dffa5ef6f3544997440af63a91ef36a2171 (patch) | |
| tree | a95c81e492b5c714caa7f24f48fdb20fe0a33024 | |
| parent | 06b7bcd8cbd7eb1af331e437ec3d8f5182ae1b7e (diff) | |
uprobes: Kill uprobes_mutex[], separate alloc_uprobe() and __uprobe_register()
uprobe_register() and uprobe_unregister() are the only users of
mutex_lock(uprobes_hash(inode)), and the only reason why we can't
simply remove it is that we need to ensure that delete_uprobe() is
not possible after alloc_uprobe() and before consumer_add().
IOW, we need to ensure that when we take uprobe->register_rwsem
this uprobe is still valid and we didn't race with _unregister()
which called delete_uprobe() in between.
With this patch uprobe_register() simply checks uprobe_is_active()
and retries if it hits this very unlikely race. uprobes_mutex[] is
no longer needed and can be removed.
There is another reason for this change, prepare_uprobe() should be
folded into alloc_uprobe() and we do not want to hold the extra locks
around read_mapping_page/etc.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Anton Arapov <anton@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
| -rw-r--r-- | kernel/events/uprobes.c | 51 |
1 files changed, 15 insertions, 36 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 358baddc8ac2..c3b65d1c8443 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -50,29 +50,6 @@ static struct rb_root uprobes_tree = RB_ROOT; | |||
| 50 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ | 50 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ |
| 51 | 51 | ||
| 52 | #define UPROBES_HASH_SZ 13 | 52 | #define UPROBES_HASH_SZ 13 |
| 53 | |||
| 54 | /* | ||
| 55 | * We need separate register/unregister and mmap/munmap lock hashes because | ||
| 56 | * of mmap_sem nesting. | ||
| 57 | * | ||
| 58 | * uprobe_register() needs to install probes on (potentially) all processes | ||
| 59 | * and thus needs to acquire multiple mmap_sems (consequtively, not | ||
| 60 | * concurrently), whereas uprobe_mmap() is called while holding mmap_sem | ||
| 61 | * for the particular process doing the mmap. | ||
| 62 | * | ||
| 63 | * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem | ||
| 64 | * because of lock order against i_mmap_mutex. This means there's a hole in | ||
| 65 | * the register vma iteration where a mmap() can happen. | ||
| 66 | * | ||
| 67 | * Thus uprobe_register() can race with uprobe_mmap() and we can try and | ||
| 68 | * install a probe where one is already installed. | ||
| 69 | */ | ||
| 70 | |||
| 71 | /* serialize (un)register */ | ||
| 72 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; | ||
| 73 | |||
| 74 | #define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) | ||
| 75 | |||
| 76 | /* serialize uprobe->pending_list */ | 53 | /* serialize uprobe->pending_list */ |
| 77 | static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; | 54 | static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; |
| 78 | #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) | 55 | #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) |
| @@ -865,20 +842,26 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * | |||
| 865 | if (offset > i_size_read(inode)) | 842 | if (offset > i_size_read(inode)) |
| 866 | return -EINVAL; | 843 | return -EINVAL; |
| 867 | 844 | ||
| 868 | ret = -ENOMEM; | 845 | retry: |
| 869 | mutex_lock(uprobes_hash(inode)); | ||
| 870 | uprobe = alloc_uprobe(inode, offset); | 846 | uprobe = alloc_uprobe(inode, offset); |
| 871 | if (uprobe) { | 847 | if (!uprobe) |
| 872 | down_write(&uprobe->register_rwsem); | 848 | return -ENOMEM; |
| 849 | /* | ||
| 850 | * We can race with uprobe_unregister()->delete_uprobe(). | ||
| 851 | * Check uprobe_is_active() and retry if it is false. | ||
| 852 | */ | ||
| 853 | down_write(&uprobe->register_rwsem); | ||
| 854 | ret = -EAGAIN; | ||
| 855 | if (likely(uprobe_is_active(uprobe))) { | ||
| 873 | ret = __uprobe_register(uprobe, uc); | 856 | ret = __uprobe_register(uprobe, uc); |
| 874 | if (ret) | 857 | if (ret) |
| 875 | __uprobe_unregister(uprobe, uc); | 858 | __uprobe_unregister(uprobe, uc); |
| 876 | up_write(&uprobe->register_rwsem); | ||
| 877 | } | 859 | } |
| 878 | mutex_unlock(uprobes_hash(inode)); | 860 | up_write(&uprobe->register_rwsem); |
| 879 | if (uprobe) | 861 | put_uprobe(uprobe); |
| 880 | put_uprobe(uprobe); | ||
| 881 | 862 | ||
| 863 | if (unlikely(ret == -EAGAIN)) | ||
| 864 | goto retry; | ||
| 882 | return ret; | 865 | return ret; |
| 883 | } | 866 | } |
| 884 | 867 | ||
| @@ -896,11 +879,9 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume | |||
| 896 | if (!uprobe) | 879 | if (!uprobe) |
| 897 | return; | 880 | return; |
| 898 | 881 | ||
| 899 | mutex_lock(uprobes_hash(inode)); | ||
| 900 | down_write(&uprobe->register_rwsem); | 882 | down_write(&uprobe->register_rwsem); |
| 901 | __uprobe_unregister(uprobe, uc); | 883 | __uprobe_unregister(uprobe, uc); |
| 902 | up_write(&uprobe->register_rwsem); | 884 | up_write(&uprobe->register_rwsem); |
| 903 | mutex_unlock(uprobes_hash(inode)); | ||
| 904 | put_uprobe(uprobe); | 885 | put_uprobe(uprobe); |
| 905 | } | 886 | } |
| 906 | 887 | ||
| @@ -1609,10 +1590,8 @@ static int __init init_uprobes(void) | |||
| 1609 | { | 1590 | { |
| 1610 | int i; | 1591 | int i; |
| 1611 | 1592 | ||
| 1612 | for (i = 0; i < UPROBES_HASH_SZ; i++) { | 1593 | for (i = 0; i < UPROBES_HASH_SZ; i++) |
| 1613 | mutex_init(&uprobes_mutex[i]); | ||
| 1614 | mutex_init(&uprobes_mmap_mutex[i]); | 1594 | mutex_init(&uprobes_mmap_mutex[i]); |
| 1615 | } | ||
| 1616 | 1595 | ||
| 1617 | if (percpu_init_rwsem(&dup_mmap_sem)) | 1596 | if (percpu_init_rwsem(&dup_mmap_sem)) |
| 1618 | return -ENOMEM; | 1597 | return -ENOMEM; |
