aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2012-11-14 13:03:42 -0500
committerOleg Nesterov <oleg@redhat.com>2012-11-16 08:52:51 -0500
commit32cdba1e05418909708a17e52505e8b2ba4381d1 (patch)
tree63ab2edf02cbf72e71f4ba3d6c751eaf956d4e91
parent65b6ecc03838fd263cf7fafdfa6cf13012b91d56 (diff)
uprobes: Use percpu_rw_semaphore to fix register/unregister vs dup_mmap() race
This was always racy, but 268720903f87e0b84b161626c4447b81671b5d18 "uprobes: Rework register_for_each_vma() to make it O(n)" should be blamed anyway, it made everything worse and I didn't notice. register/unregister call build_map_info() and then do install/remove breakpoint for every mm which mmaps inode/offset. This can obviously race with fork()->dup_mmap() in between and we can miss the child. uprobe_register() could be easily fixed but unregister is much worse, the new mm inherits "int3" from parent and there is no way to detect this if uprobe goes away. So this patch simply adds percpu_down_read/up_read around dup_mmap(), and percpu_down_write/up_write into register_for_each_vma(). This adds 2 new hooks into dup_mmap() but we can kill uprobe_dup_mmap() and fold it into uprobe_end_dup_mmap(). Reported-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
-rw-r--r--include/linux/uprobes.h8
-rw-r--r--kernel/events/uprobes.c26
-rw-r--r--kernel/fork.c2
3 files changed, 33 insertions, 3 deletions
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 2615c4d7788d..4f628a6fc5b4 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -97,6 +97,8 @@ extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_con
97extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 97extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
98extern int uprobe_mmap(struct vm_area_struct *vma); 98extern int uprobe_mmap(struct vm_area_struct *vma);
99extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); 99extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
100extern void uprobe_start_dup_mmap(void);
101extern void uprobe_end_dup_mmap(void);
100extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); 102extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
101extern void uprobe_free_utask(struct task_struct *t); 103extern void uprobe_free_utask(struct task_struct *t);
102extern void uprobe_copy_process(struct task_struct *t); 104extern void uprobe_copy_process(struct task_struct *t);
@@ -127,6 +129,12 @@ static inline void
127uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) 129uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
128{ 130{
129} 131}
132static inline void uprobe_start_dup_mmap(void)
133{
134}
135static inline void uprobe_end_dup_mmap(void)
136{
137}
130static inline void 138static inline void
131uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) 139uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
132{ 140{
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 5ce99cfd2e6e..dea7acfbb071 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -33,6 +33,7 @@
33#include <linux/ptrace.h> /* user_enable_single_step */ 33#include <linux/ptrace.h> /* user_enable_single_step */
34#include <linux/kdebug.h> /* notifier mechanism */ 34#include <linux/kdebug.h> /* notifier mechanism */
35#include "../../mm/internal.h" /* munlock_vma_page */ 35#include "../../mm/internal.h" /* munlock_vma_page */
36#include <linux/percpu-rwsem.h>
36 37
37#include <linux/uprobes.h> 38#include <linux/uprobes.h>
38 39
@@ -71,6 +72,8 @@ static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
71static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; 72static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
72#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) 73#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
73 74
75static struct percpu_rw_semaphore dup_mmap_sem;
76
74/* 77/*
75 * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe 78 * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
76 * events active at this time. Probably a fine grained per inode count is 79 * events active at this time. Probably a fine grained per inode count is
@@ -766,10 +769,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
766 struct map_info *info; 769 struct map_info *info;
767 int err = 0; 770 int err = 0;
768 771
772 percpu_down_write(&dup_mmap_sem);
769 info = build_map_info(uprobe->inode->i_mapping, 773 info = build_map_info(uprobe->inode->i_mapping,
770 uprobe->offset, is_register); 774 uprobe->offset, is_register);
771 if (IS_ERR(info)) 775 if (IS_ERR(info)) {
772 return PTR_ERR(info); 776 err = PTR_ERR(info);
777 goto out;
778 }
773 779
774 while (info) { 780 while (info) {
775 struct mm_struct *mm = info->mm; 781 struct mm_struct *mm = info->mm;
@@ -799,7 +805,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
799 mmput(mm); 805 mmput(mm);
800 info = free_map_info(info); 806 info = free_map_info(info);
801 } 807 }
802 808 out:
809 percpu_up_write(&dup_mmap_sem);
803 return err; 810 return err;
804} 811}
805 812
@@ -1131,6 +1138,16 @@ void uprobe_clear_state(struct mm_struct *mm)
1131 kfree(area); 1138 kfree(area);
1132} 1139}
1133 1140
1141void uprobe_start_dup_mmap(void)
1142{
1143 percpu_down_read(&dup_mmap_sem);
1144}
1145
1146void uprobe_end_dup_mmap(void)
1147{
1148 percpu_up_read(&dup_mmap_sem);
1149}
1150
1134void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) 1151void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
1135{ 1152{
1136 newmm->uprobes_state.xol_area = NULL; 1153 newmm->uprobes_state.xol_area = NULL;
@@ -1597,6 +1614,9 @@ static int __init init_uprobes(void)
1597 mutex_init(&uprobes_mmap_mutex[i]); 1614 mutex_init(&uprobes_mmap_mutex[i]);
1598 } 1615 }
1599 1616
1617 if (percpu_init_rwsem(&dup_mmap_sem))
1618 return -ENOMEM;
1619
1600 return register_die_notifier(&uprobe_exception_nb); 1620 return register_die_notifier(&uprobe_exception_nb);
1601} 1621}
1602module_init(init_uprobes); 1622module_init(init_uprobes);
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7d3aa2..c497e57aa654 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
352 unsigned long charge; 352 unsigned long charge;
353 struct mempolicy *pol; 353 struct mempolicy *pol;
354 354
355 uprobe_start_dup_mmap();
355 down_write(&oldmm->mmap_sem); 356 down_write(&oldmm->mmap_sem);
356 flush_cache_dup_mm(oldmm); 357 flush_cache_dup_mm(oldmm);
357 uprobe_dup_mmap(oldmm, mm); 358 uprobe_dup_mmap(oldmm, mm);
@@ -469,6 +470,7 @@ out:
469 up_write(&mm->mmap_sem); 470 up_write(&mm->mmap_sem);
470 flush_tlb_mm(oldmm); 471 flush_tlb_mm(oldmm);
471 up_write(&oldmm->mmap_sem); 472 up_write(&oldmm->mmap_sem);
473 uprobe_end_dup_mmap();
472 return retval; 474 return retval;
473fail_nomem_anon_vma_fork: 475fail_nomem_anon_vma_fork:
474 mpol_put(pol); 476 mpol_put(pol);