aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/uprobes.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/uprobes.c')
-rw-r--r--kernel/events/uprobes.c622
1 files changed, 299 insertions, 323 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 985be4d80fe8..c08a22d02f72 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -32,19 +32,36 @@
32#include <linux/swap.h> /* try_to_free_swap */ 32#include <linux/swap.h> /* try_to_free_swap */
33#include <linux/ptrace.h> /* user_enable_single_step */ 33#include <linux/ptrace.h> /* user_enable_single_step */
34#include <linux/kdebug.h> /* notifier mechanism */ 34#include <linux/kdebug.h> /* notifier mechanism */
35#include "../../mm/internal.h" /* munlock_vma_page */
35 36
36#include <linux/uprobes.h> 37#include <linux/uprobes.h>
37 38
38#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) 39#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
39#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE 40#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
40 41
41static struct srcu_struct uprobes_srcu;
42static struct rb_root uprobes_tree = RB_ROOT; 42static struct rb_root uprobes_tree = RB_ROOT;
43 43
44static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ 44static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
45 45
46#define UPROBES_HASH_SZ 13 46#define UPROBES_HASH_SZ 13
47 47
48/*
49 * We need separate register/unregister and mmap/munmap lock hashes because
50 * of mmap_sem nesting.
51 *
52 * uprobe_register() needs to install probes on (potentially) all processes
53 * and thus needs to acquire multiple mmap_sems (consequtively, not
54 * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
55 * for the particular process doing the mmap.
56 *
57 * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
58 * because of lock order against i_mmap_mutex. This means there's a hole in
59 * the register vma iteration where a mmap() can happen.
60 *
61 * Thus uprobe_register() can race with uprobe_mmap() and we can try and
62 * install a probe where one is already installed.
63 */
64
48/* serialize (un)register */ 65/* serialize (un)register */
49static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; 66static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
50 67
@@ -61,17 +78,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
61 */ 78 */
62static atomic_t uprobe_events = ATOMIC_INIT(0); 79static atomic_t uprobe_events = ATOMIC_INIT(0);
63 80
64/*
65 * Maintain a temporary per vma info that can be used to search if a vma
66 * has already been handled. This structure is introduced since extending
67 * vm_area_struct wasnt recommended.
68 */
69struct vma_info {
70 struct list_head probe_list;
71 struct mm_struct *mm;
72 loff_t vaddr;
73};
74
75struct uprobe { 81struct uprobe {
76 struct rb_node rb_node; /* node in the rb tree */ 82 struct rb_node rb_node; /* node in the rb tree */
77 atomic_t ref; 83 atomic_t ref;
@@ -100,20 +106,21 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register)
100 if (!is_register) 106 if (!is_register)
101 return true; 107 return true;
102 108
103 if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) 109 if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED))
110 == (VM_READ|VM_EXEC))
104 return true; 111 return true;
105 112
106 return false; 113 return false;
107} 114}
108 115
109static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) 116static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
110{ 117{
111 loff_t vaddr; 118 return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
112 119}
113 vaddr = vma->vm_start + offset;
114 vaddr -= vma->vm_pgoff << PAGE_SHIFT;
115 120
116 return vaddr; 121static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
122{
123 return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start);
117} 124}
118 125
119/** 126/**
@@ -121,41 +128,27 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
121 * based on replace_page in mm/ksm.c 128 * based on replace_page in mm/ksm.c
122 * 129 *
123 * @vma: vma that holds the pte pointing to page 130 * @vma: vma that holds the pte pointing to page
131 * @addr: address the old @page is mapped at
124 * @page: the cowed page we are replacing by kpage 132 * @page: the cowed page we are replacing by kpage
125 * @kpage: the modified page we replace page by 133 * @kpage: the modified page we replace page by
126 * 134 *
127 * Returns 0 on success, -EFAULT on failure. 135 * Returns 0 on success, -EFAULT on failure.
128 */ 136 */
129static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) 137static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
138 struct page *page, struct page *kpage)
130{ 139{
131 struct mm_struct *mm = vma->vm_mm; 140 struct mm_struct *mm = vma->vm_mm;
132 pgd_t *pgd;
133 pud_t *pud;
134 pmd_t *pmd;
135 pte_t *ptep;
136 spinlock_t *ptl; 141 spinlock_t *ptl;
137 unsigned long addr; 142 pte_t *ptep;
138 int err = -EFAULT; 143 int err;
139
140 addr = page_address_in_vma(page, vma);
141 if (addr == -EFAULT)
142 goto out;
143
144 pgd = pgd_offset(mm, addr);
145 if (!pgd_present(*pgd))
146 goto out;
147
148 pud = pud_offset(pgd, addr);
149 if (!pud_present(*pud))
150 goto out;
151 144
152 pmd = pmd_offset(pud, addr); 145 /* For try_to_free_swap() and munlock_vma_page() below */
153 if (!pmd_present(*pmd)) 146 lock_page(page);
154 goto out;
155 147
156 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); 148 err = -EAGAIN;
149 ptep = page_check_address(page, mm, addr, &ptl, 0);
157 if (!ptep) 150 if (!ptep)
158 goto out; 151 goto unlock;
159 152
160 get_page(kpage); 153 get_page(kpage);
161 page_add_new_anon_rmap(kpage, vma, addr); 154 page_add_new_anon_rmap(kpage, vma, addr);
@@ -172,11 +165,15 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct
172 page_remove_rmap(page); 165 page_remove_rmap(page);
173 if (!page_mapped(page)) 166 if (!page_mapped(page))
174 try_to_free_swap(page); 167 try_to_free_swap(page);
175 put_page(page);
176 pte_unmap_unlock(ptep, ptl); 168 pte_unmap_unlock(ptep, ptl);
177 err = 0;
178 169
179out: 170 if (vma->vm_flags & VM_LOCKED)
171 munlock_vma_page(page);
172 put_page(page);
173
174 err = 0;
175 unlock:
176 unlock_page(page);
180 return err; 177 return err;
181} 178}
182 179
@@ -218,79 +215,46 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
218 unsigned long vaddr, uprobe_opcode_t opcode) 215 unsigned long vaddr, uprobe_opcode_t opcode)
219{ 216{
220 struct page *old_page, *new_page; 217 struct page *old_page, *new_page;
221 struct address_space *mapping;
222 void *vaddr_old, *vaddr_new; 218 void *vaddr_old, *vaddr_new;
223 struct vm_area_struct *vma; 219 struct vm_area_struct *vma;
224 struct uprobe *uprobe;
225 loff_t addr;
226 int ret; 220 int ret;
227 221
222retry:
228 /* Read the page with vaddr into memory */ 223 /* Read the page with vaddr into memory */
229 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); 224 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
230 if (ret <= 0) 225 if (ret <= 0)
231 return ret; 226 return ret;
232 227
233 ret = -EINVAL;
234
235 /*
236 * We are interested in text pages only. Our pages of interest
237 * should be mapped for read and execute only. We desist from
238 * adding probes in write mapped pages since the breakpoints
239 * might end up in the file copy.
240 */
241 if (!valid_vma(vma, is_swbp_insn(&opcode)))
242 goto put_out;
243
244 uprobe = container_of(auprobe, struct uprobe, arch);
245 mapping = uprobe->inode->i_mapping;
246 if (mapping != vma->vm_file->f_mapping)
247 goto put_out;
248
249 addr = vma_address(vma, uprobe->offset);
250 if (vaddr != (unsigned long)addr)
251 goto put_out;
252
253 ret = -ENOMEM; 228 ret = -ENOMEM;
254 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 229 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
255 if (!new_page) 230 if (!new_page)
256 goto put_out; 231 goto put_old;
257 232
258 __SetPageUptodate(new_page); 233 __SetPageUptodate(new_page);
259 234
260 /*
261 * lock page will serialize against do_wp_page()'s
262 * PageAnon() handling
263 */
264 lock_page(old_page);
265 /* copy the page now that we've got it stable */ 235 /* copy the page now that we've got it stable */
266 vaddr_old = kmap_atomic(old_page); 236 vaddr_old = kmap_atomic(old_page);
267 vaddr_new = kmap_atomic(new_page); 237 vaddr_new = kmap_atomic(new_page);
268 238
269 memcpy(vaddr_new, vaddr_old, PAGE_SIZE); 239 memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
270 240 memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
271 /* poke the new insn in, ASSUMES we don't cross page boundary */
272 vaddr &= ~PAGE_MASK;
273 BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
274 memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
275 241
276 kunmap_atomic(vaddr_new); 242 kunmap_atomic(vaddr_new);
277 kunmap_atomic(vaddr_old); 243 kunmap_atomic(vaddr_old);
278 244
279 ret = anon_vma_prepare(vma); 245 ret = anon_vma_prepare(vma);
280 if (ret) 246 if (ret)
281 goto unlock_out; 247 goto put_new;
282 248
283 lock_page(new_page); 249 ret = __replace_page(vma, vaddr, old_page, new_page);
284 ret = __replace_page(vma, old_page, new_page);
285 unlock_page(new_page);
286 250
287unlock_out: 251put_new:
288 unlock_page(old_page);
289 page_cache_release(new_page); 252 page_cache_release(new_page);
290 253put_old:
291put_out:
292 put_page(old_page); 254 put_page(old_page);
293 255
256 if (unlikely(ret == -EAGAIN))
257 goto retry;
294 return ret; 258 return ret;
295} 259}
296 260
@@ -312,7 +276,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_
312 void *vaddr_new; 276 void *vaddr_new;
313 int ret; 277 int ret;
314 278
315 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); 279 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
316 if (ret <= 0) 280 if (ret <= 0)
317 return ret; 281 return ret;
318 282
@@ -333,10 +297,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
333 uprobe_opcode_t opcode; 297 uprobe_opcode_t opcode;
334 int result; 298 int result;
335 299
300 if (current->mm == mm) {
301 pagefault_disable();
302 result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
303 sizeof(opcode));
304 pagefault_enable();
305
306 if (likely(result == 0))
307 goto out;
308 }
309
336 result = read_opcode(mm, vaddr, &opcode); 310 result = read_opcode(mm, vaddr, &opcode);
337 if (result) 311 if (result)
338 return result; 312 return result;
339 313out:
340 if (is_swbp_insn(&opcode)) 314 if (is_swbp_insn(&opcode))
341 return 1; 315 return 1;
342 316
@@ -355,7 +329,9 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
355int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) 329int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
356{ 330{
357 int result; 331 int result;
358 332 /*
333 * See the comment near uprobes_hash().
334 */
359 result = is_swbp_at_addr(mm, vaddr); 335 result = is_swbp_at_addr(mm, vaddr);
360 if (result == 1) 336 if (result == 1)
361 return -EEXIST; 337 return -EEXIST;
@@ -520,7 +496,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
520 uprobe->inode = igrab(inode); 496 uprobe->inode = igrab(inode);
521 uprobe->offset = offset; 497 uprobe->offset = offset;
522 init_rwsem(&uprobe->consumer_rwsem); 498 init_rwsem(&uprobe->consumer_rwsem);
523 INIT_LIST_HEAD(&uprobe->pending_list);
524 499
525 /* add to uprobes_tree, sorted on inode:offset */ 500 /* add to uprobes_tree, sorted on inode:offset */
526 cur_uprobe = insert_uprobe(uprobe); 501 cur_uprobe = insert_uprobe(uprobe);
@@ -588,20 +563,22 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
588} 563}
589 564
590static int 565static int
591__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, 566__copy_insn(struct address_space *mapping, struct file *filp, char *insn,
592 unsigned long nbytes, unsigned long offset) 567 unsigned long nbytes, loff_t offset)
593{ 568{
594 struct file *filp = vma->vm_file;
595 struct page *page; 569 struct page *page;
596 void *vaddr; 570 void *vaddr;
597 unsigned long off1; 571 unsigned long off;
598 unsigned long idx; 572 pgoff_t idx;
599 573
600 if (!filp) 574 if (!filp)
601 return -EINVAL; 575 return -EINVAL;
602 576
603 idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); 577 if (!mapping->a_ops->readpage)
604 off1 = offset &= ~PAGE_MASK; 578 return -EIO;
579
580 idx = offset >> PAGE_CACHE_SHIFT;
581 off = offset & ~PAGE_MASK;
605 582
606 /* 583 /*
607 * Ensure that the page that has the original instruction is 584 * Ensure that the page that has the original instruction is
@@ -612,22 +589,20 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins
612 return PTR_ERR(page); 589 return PTR_ERR(page);
613 590
614 vaddr = kmap_atomic(page); 591 vaddr = kmap_atomic(page);
615 memcpy(insn, vaddr + off1, nbytes); 592 memcpy(insn, vaddr + off, nbytes);
616 kunmap_atomic(vaddr); 593 kunmap_atomic(vaddr);
617 page_cache_release(page); 594 page_cache_release(page);
618 595
619 return 0; 596 return 0;
620} 597}
621 598
622static int 599static int copy_insn(struct uprobe *uprobe, struct file *filp)
623copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
624{ 600{
625 struct address_space *mapping; 601 struct address_space *mapping;
626 unsigned long nbytes; 602 unsigned long nbytes;
627 int bytes; 603 int bytes;
628 604
629 addr &= ~PAGE_MASK; 605 nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);
630 nbytes = PAGE_SIZE - addr;
631 mapping = uprobe->inode->i_mapping; 606 mapping = uprobe->inode->i_mapping;
632 607
633 /* Instruction at end of binary; copy only available bytes */ 608 /* Instruction at end of binary; copy only available bytes */
@@ -638,13 +613,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
638 613
639 /* Instruction at the page-boundary; copy bytes in second page */ 614 /* Instruction at the page-boundary; copy bytes in second page */
640 if (nbytes < bytes) { 615 if (nbytes < bytes) {
641 if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, 616 int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
642 bytes - nbytes, uprobe->offset + nbytes)) 617 bytes - nbytes, uprobe->offset + nbytes);
643 return -ENOMEM; 618 if (err)
644 619 return err;
645 bytes = nbytes; 620 bytes = nbytes;
646 } 621 }
647 return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); 622 return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
648} 623}
649 624
650/* 625/*
@@ -672,9 +647,8 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
672 */ 647 */
673static int 648static int
674install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, 649install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
675 struct vm_area_struct *vma, loff_t vaddr) 650 struct vm_area_struct *vma, unsigned long vaddr)
676{ 651{
677 unsigned long addr;
678 int ret; 652 int ret;
679 653
680 /* 654 /*
@@ -687,20 +661,22 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
687 if (!uprobe->consumers) 661 if (!uprobe->consumers)
688 return -EEXIST; 662 return -EEXIST;
689 663
690 addr = (unsigned long)vaddr;
691
692 if (!(uprobe->flags & UPROBE_COPY_INSN)) { 664 if (!(uprobe->flags & UPROBE_COPY_INSN)) {
693 ret = copy_insn(uprobe, vma, addr); 665 ret = copy_insn(uprobe, vma->vm_file);
694 if (ret) 666 if (ret)
695 return ret; 667 return ret;
696 668
697 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) 669 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
698 return -EEXIST; 670 return -ENOTSUPP;
699 671
700 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm); 672 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
701 if (ret) 673 if (ret)
702 return ret; 674 return ret;
703 675
676 /* write_opcode() assumes we don't cross page boundary */
677 BUG_ON((uprobe->offset & ~PAGE_MASK) +
678 UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
679
704 uprobe->flags |= UPROBE_COPY_INSN; 680 uprobe->flags |= UPROBE_COPY_INSN;
705 } 681 }
706 682
@@ -713,7 +689,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
713 * Hence increment before and decrement on failure. 689 * Hence increment before and decrement on failure.
714 */ 690 */
715 atomic_inc(&mm->uprobes_state.count); 691 atomic_inc(&mm->uprobes_state.count);
716 ret = set_swbp(&uprobe->arch, mm, addr); 692 ret = set_swbp(&uprobe->arch, mm, vaddr);
717 if (ret) 693 if (ret)
718 atomic_dec(&mm->uprobes_state.count); 694 atomic_dec(&mm->uprobes_state.count);
719 695
@@ -721,27 +697,21 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
721} 697}
722 698
723static void 699static void
724remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) 700remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
725{ 701{
726 if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) 702 if (!set_orig_insn(&uprobe->arch, mm, vaddr, true))
727 atomic_dec(&mm->uprobes_state.count); 703 atomic_dec(&mm->uprobes_state.count);
728} 704}
729 705
730/* 706/*
731 * There could be threads that have hit the breakpoint and are entering the 707 * There could be threads that have already hit the breakpoint. They
732 * notifier code and trying to acquire the uprobes_treelock. The thread 708 * will recheck the current insn and restart if find_uprobe() fails.
733 * calling delete_uprobe() that is removing the uprobe from the rb_tree can 709 * See find_active_uprobe().
734 * race with these threads and might acquire the uprobes_treelock compared
735 * to some of the breakpoint hit threads. In such a case, the breakpoint
736 * hit threads will not find the uprobe. The current unregistering thread
737 * waits till all other threads have hit a breakpoint, to acquire the
738 * uprobes_treelock before the uprobe is removed from the rbtree.
739 */ 710 */
740static void delete_uprobe(struct uprobe *uprobe) 711static void delete_uprobe(struct uprobe *uprobe)
741{ 712{
742 unsigned long flags; 713 unsigned long flags;
743 714
744 synchronize_srcu(&uprobes_srcu);
745 spin_lock_irqsave(&uprobes_treelock, flags); 715 spin_lock_irqsave(&uprobes_treelock, flags);
746 rb_erase(&uprobe->rb_node, &uprobes_tree); 716 rb_erase(&uprobe->rb_node, &uprobes_tree);
747 spin_unlock_irqrestore(&uprobes_treelock, flags); 717 spin_unlock_irqrestore(&uprobes_treelock, flags);
@@ -750,139 +720,136 @@ static void delete_uprobe(struct uprobe *uprobe)
750 atomic_dec(&uprobe_events); 720 atomic_dec(&uprobe_events);
751} 721}
752 722
753static struct vma_info * 723struct map_info {
754__find_next_vma_info(struct address_space *mapping, struct list_head *head, 724 struct map_info *next;
755 struct vma_info *vi, loff_t offset, bool is_register) 725 struct mm_struct *mm;
726 unsigned long vaddr;
727};
728
729static inline struct map_info *free_map_info(struct map_info *info)
730{
731 struct map_info *next = info->next;
732 kfree(info);
733 return next;
734}
735
736static struct map_info *
737build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
756{ 738{
739 unsigned long pgoff = offset >> PAGE_SHIFT;
757 struct prio_tree_iter iter; 740 struct prio_tree_iter iter;
758 struct vm_area_struct *vma; 741 struct vm_area_struct *vma;
759 struct vma_info *tmpvi; 742 struct map_info *curr = NULL;
760 unsigned long pgoff; 743 struct map_info *prev = NULL;
761 int existing_vma; 744 struct map_info *info;
762 loff_t vaddr; 745 int more = 0;
763
764 pgoff = offset >> PAGE_SHIFT;
765 746
747 again:
748 mutex_lock(&mapping->i_mmap_mutex);
766 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 749 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
767 if (!valid_vma(vma, is_register)) 750 if (!valid_vma(vma, is_register))
768 continue; 751 continue;
769 752
770 existing_vma = 0; 753 if (!prev && !more) {
771 vaddr = vma_address(vma, offset); 754 /*
772 755 * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
773 list_for_each_entry(tmpvi, head, probe_list) { 756 * reclaim. This is optimistic, no harm done if it fails.
774 if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { 757 */
775 existing_vma = 1; 758 prev = kmalloc(sizeof(struct map_info),
776 break; 759 GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
777 } 760 if (prev)
761 prev->next = NULL;
778 } 762 }
779 763 if (!prev) {
780 /* 764 more++;
781 * Another vma needs a probe to be installed. However skip 765 continue;
782 * installing the probe if the vma is about to be unlinked.
783 */
784 if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
785 vi->mm = vma->vm_mm;
786 vi->vaddr = vaddr;
787 list_add(&vi->probe_list, head);
788
789 return vi;
790 } 766 }
791 }
792
793 return NULL;
794}
795 767
796/* 768 if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
797 * Iterate in the rmap prio tree and find a vma where a probe has not 769 continue;
798 * yet been inserted.
799 */
800static struct vma_info *
801find_next_vma_info(struct address_space *mapping, struct list_head *head,
802 loff_t offset, bool is_register)
803{
804 struct vma_info *vi, *retvi;
805 770
806 vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); 771 info = prev;
807 if (!vi) 772 prev = prev->next;
808 return ERR_PTR(-ENOMEM); 773 info->next = curr;
774 curr = info;
809 775
810 mutex_lock(&mapping->i_mmap_mutex); 776 info->mm = vma->vm_mm;
811 retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); 777 info->vaddr = offset_to_vaddr(vma, offset);
778 }
812 mutex_unlock(&mapping->i_mmap_mutex); 779 mutex_unlock(&mapping->i_mmap_mutex);
813 780
814 if (!retvi) 781 if (!more)
815 kfree(vi); 782 goto out;
783
784 prev = curr;
785 while (curr) {
786 mmput(curr->mm);
787 curr = curr->next;
788 }
816 789
817 return retvi; 790 do {
791 info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
792 if (!info) {
793 curr = ERR_PTR(-ENOMEM);
794 goto out;
795 }
796 info->next = prev;
797 prev = info;
798 } while (--more);
799
800 goto again;
801 out:
802 while (prev)
803 prev = free_map_info(prev);
804 return curr;
818} 805}
819 806
820static int register_for_each_vma(struct uprobe *uprobe, bool is_register) 807static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
821{ 808{
822 struct list_head try_list; 809 struct map_info *info;
823 struct vm_area_struct *vma; 810 int err = 0;
824 struct address_space *mapping;
825 struct vma_info *vi, *tmpvi;
826 struct mm_struct *mm;
827 loff_t vaddr;
828 int ret;
829
830 mapping = uprobe->inode->i_mapping;
831 INIT_LIST_HEAD(&try_list);
832 811
833 ret = 0; 812 info = build_map_info(uprobe->inode->i_mapping,
813 uprobe->offset, is_register);
814 if (IS_ERR(info))
815 return PTR_ERR(info);
834 816
835 for (;;) { 817 while (info) {
836 vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); 818 struct mm_struct *mm = info->mm;
837 if (!vi) 819 struct vm_area_struct *vma;
838 break;
839 820
840 if (IS_ERR(vi)) { 821 if (err)
841 ret = PTR_ERR(vi); 822 goto free;
842 break;
843 }
844 823
845 mm = vi->mm; 824 down_write(&mm->mmap_sem);
846 down_read(&mm->mmap_sem); 825 vma = find_vma(mm, info->vaddr);
847 vma = find_vma(mm, (unsigned long)vi->vaddr); 826 if (!vma || !valid_vma(vma, is_register) ||
848 if (!vma || !valid_vma(vma, is_register)) { 827 vma->vm_file->f_mapping->host != uprobe->inode)
849 list_del(&vi->probe_list); 828 goto unlock;
850 kfree(vi);
851 up_read(&mm->mmap_sem);
852 mmput(mm);
853 continue;
854 }
855 vaddr = vma_address(vma, uprobe->offset);
856 if (vma->vm_file->f_mapping->host != uprobe->inode ||
857 vaddr != vi->vaddr) {
858 list_del(&vi->probe_list);
859 kfree(vi);
860 up_read(&mm->mmap_sem);
861 mmput(mm);
862 continue;
863 }
864 829
865 if (is_register) 830 if (vma->vm_start > info->vaddr ||
866 ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); 831 vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
867 else 832 goto unlock;
868 remove_breakpoint(uprobe, mm, vi->vaddr);
869 833
870 up_read(&mm->mmap_sem);
871 mmput(mm);
872 if (is_register) { 834 if (is_register) {
873 if (ret && ret == -EEXIST) 835 err = install_breakpoint(uprobe, mm, vma, info->vaddr);
874 ret = 0; 836 /*
875 if (ret) 837 * We can race against uprobe_mmap(), see the
876 break; 838 * comment near uprobe_hash().
839 */
840 if (err == -EEXIST)
841 err = 0;
842 } else {
843 remove_breakpoint(uprobe, mm, info->vaddr);
877 } 844 }
845 unlock:
846 up_write(&mm->mmap_sem);
847 free:
848 mmput(mm);
849 info = free_map_info(info);
878 } 850 }
879 851
880 list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { 852 return err;
881 list_del(&vi->probe_list);
882 kfree(vi);
883 }
884
885 return ret;
886} 853}
887 854
888static int __uprobe_register(struct uprobe *uprobe) 855static int __uprobe_register(struct uprobe *uprobe)
@@ -977,59 +944,66 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
977 put_uprobe(uprobe); 944 put_uprobe(uprobe);
978} 945}
979 946
980/* 947static struct rb_node *
981 * Of all the nodes that correspond to the given inode, return the node 948find_node_in_range(struct inode *inode, loff_t min, loff_t max)
982 * with the least offset.
983 */
984static struct rb_node *find_least_offset_node(struct inode *inode)
985{ 949{
986 struct uprobe u = { .inode = inode, .offset = 0};
987 struct rb_node *n = uprobes_tree.rb_node; 950 struct rb_node *n = uprobes_tree.rb_node;
988 struct rb_node *close_node = NULL;
989 struct uprobe *uprobe;
990 int match;
991 951
992 while (n) { 952 while (n) {
993 uprobe = rb_entry(n, struct uprobe, rb_node); 953 struct uprobe *u = rb_entry(n, struct uprobe, rb_node);
994 match = match_uprobe(&u, uprobe);
995 954
996 if (uprobe->inode == inode) 955 if (inode < u->inode) {
997 close_node = n;
998
999 if (!match)
1000 return close_node;
1001
1002 if (match < 0)
1003 n = n->rb_left; 956 n = n->rb_left;
1004 else 957 } else if (inode > u->inode) {
1005 n = n->rb_right; 958 n = n->rb_right;
959 } else {
960 if (max < u->offset)
961 n = n->rb_left;
962 else if (min > u->offset)
963 n = n->rb_right;
964 else
965 break;
966 }
1006 } 967 }
1007 968
1008 return close_node; 969 return n;
1009} 970}
1010 971
1011/* 972/*
1012 * For a given inode, build a list of probes that need to be inserted. 973 * For a given range in vma, build a list of probes that need to be inserted.
1013 */ 974 */
1014static void build_probe_list(struct inode *inode, struct list_head *head) 975static void build_probe_list(struct inode *inode,
976 struct vm_area_struct *vma,
977 unsigned long start, unsigned long end,
978 struct list_head *head)
1015{ 979{
1016 struct uprobe *uprobe; 980 loff_t min, max;
1017 unsigned long flags; 981 unsigned long flags;
1018 struct rb_node *n; 982 struct rb_node *n, *t;
1019 983 struct uprobe *u;
1020 spin_lock_irqsave(&uprobes_treelock, flags);
1021
1022 n = find_least_offset_node(inode);
1023 984
1024 for (; n; n = rb_next(n)) { 985 INIT_LIST_HEAD(head);
1025 uprobe = rb_entry(n, struct uprobe, rb_node); 986 min = vaddr_to_offset(vma, start);
1026 if (uprobe->inode != inode) 987 max = min + (end - start) - 1;
1027 break;
1028 988
1029 list_add(&uprobe->pending_list, head); 989 spin_lock_irqsave(&uprobes_treelock, flags);
1030 atomic_inc(&uprobe->ref); 990 n = find_node_in_range(inode, min, max);
991 if (n) {
992 for (t = n; t; t = rb_prev(t)) {
993 u = rb_entry(t, struct uprobe, rb_node);
994 if (u->inode != inode || u->offset < min)
995 break;
996 list_add(&u->pending_list, head);
997 atomic_inc(&u->ref);
998 }
999 for (t = n; (t = rb_next(t)); ) {
1000 u = rb_entry(t, struct uprobe, rb_node);
1001 if (u->inode != inode || u->offset > max)
1002 break;
1003 list_add(&u->pending_list, head);
1004 atomic_inc(&u->ref);
1005 }
1031 } 1006 }
1032
1033 spin_unlock_irqrestore(&uprobes_treelock, flags); 1007 spin_unlock_irqrestore(&uprobes_treelock, flags);
1034} 1008}
1035 1009
@@ -1059,28 +1033,21 @@ int uprobe_mmap(struct vm_area_struct *vma)
1059 if (!inode) 1033 if (!inode)
1060 return 0; 1034 return 0;
1061 1035
1062 INIT_LIST_HEAD(&tmp_list);
1063 mutex_lock(uprobes_mmap_hash(inode)); 1036 mutex_lock(uprobes_mmap_hash(inode));
1064 build_probe_list(inode, &tmp_list); 1037 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
1065 1038
1066 ret = 0; 1039 ret = 0;
1067 count = 0; 1040 count = 0;
1068 1041
1069 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { 1042 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
1070 loff_t vaddr;
1071
1072 list_del(&uprobe->pending_list);
1073 if (!ret) { 1043 if (!ret) {
1074 vaddr = vma_address(vma, uprobe->offset); 1044 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
1075
1076 if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {
1077 put_uprobe(uprobe);
1078 continue;
1079 }
1080 1045
1081 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); 1046 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
1082 1047 /*
1083 /* Ignore double add: */ 1048 * We can race against uprobe_register(), see the
1049 * comment near uprobe_hash().
1050 */
1084 if (ret == -EEXIST) { 1051 if (ret == -EEXIST) {
1085 ret = 0; 1052 ret = 0;
1086 1053
@@ -1121,6 +1088,9 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
1121 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) 1088 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
1122 return; 1089 return;
1123 1090
1091 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
1092 return;
1093
1124 if (!atomic_read(&vma->vm_mm->uprobes_state.count)) 1094 if (!atomic_read(&vma->vm_mm->uprobes_state.count))
1125 return; 1095 return;
1126 1096
@@ -1128,24 +1098,17 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
1128 if (!inode) 1098 if (!inode)
1129 return; 1099 return;
1130 1100
1131 INIT_LIST_HEAD(&tmp_list);
1132 mutex_lock(uprobes_mmap_hash(inode)); 1101 mutex_lock(uprobes_mmap_hash(inode));
1133 build_probe_list(inode, &tmp_list); 1102 build_probe_list(inode, vma, start, end, &tmp_list);
1134 1103
1135 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { 1104 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
1136 loff_t vaddr; 1105 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
1137 1106 /*
1138 list_del(&uprobe->pending_list); 1107 * An unregister could have removed the probe before
1139 vaddr = vma_address(vma, uprobe->offset); 1108 * unmap. So check before we decrement the count.
1140 1109 */
1141 if (vaddr >= start && vaddr < end) { 1110 if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1)
1142 /* 1111 atomic_dec(&vma->vm_mm->uprobes_state.count);
1143 * An unregister could have removed the probe before
1144 * unmap. So check before we decrement the count.
1145 */
1146 if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1)
1147 atomic_dec(&vma->vm_mm->uprobes_state.count);
1148 }
1149 put_uprobe(uprobe); 1112 put_uprobe(uprobe);
1150 } 1113 }
1151 mutex_unlock(uprobes_mmap_hash(inode)); 1114 mutex_unlock(uprobes_mmap_hash(inode));
@@ -1378,9 +1341,6 @@ void uprobe_free_utask(struct task_struct *t)
1378{ 1341{
1379 struct uprobe_task *utask = t->utask; 1342 struct uprobe_task *utask = t->utask;
1380 1343
1381 if (t->uprobe_srcu_id != -1)
1382 srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id);
1383
1384 if (!utask) 1344 if (!utask)
1385 return; 1345 return;
1386 1346
@@ -1398,7 +1358,6 @@ void uprobe_free_utask(struct task_struct *t)
1398void uprobe_copy_process(struct task_struct *t) 1358void uprobe_copy_process(struct task_struct *t)
1399{ 1359{
1400 t->utask = NULL; 1360 t->utask = NULL;
1401 t->uprobe_srcu_id = -1;
1402} 1361}
1403 1362
1404/* 1363/*
@@ -1417,7 +1376,6 @@ static struct uprobe_task *add_utask(void)
1417 if (unlikely(!utask)) 1376 if (unlikely(!utask))
1418 return NULL; 1377 return NULL;
1419 1378
1420 utask->active_uprobe = NULL;
1421 current->utask = utask; 1379 current->utask = utask;
1422 return utask; 1380 return utask;
1423} 1381}
@@ -1479,41 +1437,61 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1479 return false; 1437 return false;
1480} 1438}
1481 1439
1440static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1441{
1442 struct mm_struct *mm = current->mm;
1443 struct uprobe *uprobe = NULL;
1444 struct vm_area_struct *vma;
1445
1446 down_read(&mm->mmap_sem);
1447 vma = find_vma(mm, bp_vaddr);
1448 if (vma && vma->vm_start <= bp_vaddr) {
1449 if (valid_vma(vma, false)) {
1450 struct inode *inode = vma->vm_file->f_mapping->host;
1451 loff_t offset = vaddr_to_offset(vma, bp_vaddr);
1452
1453 uprobe = find_uprobe(inode, offset);
1454 }
1455
1456 if (!uprobe)
1457 *is_swbp = is_swbp_at_addr(mm, bp_vaddr);
1458 } else {
1459 *is_swbp = -EFAULT;
1460 }
1461 up_read(&mm->mmap_sem);
1462
1463 return uprobe;
1464}
1465
1482/* 1466/*
1483 * Run handler and ask thread to singlestep. 1467 * Run handler and ask thread to singlestep.
1484 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1468 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
1485 */ 1469 */
1486static void handle_swbp(struct pt_regs *regs) 1470static void handle_swbp(struct pt_regs *regs)
1487{ 1471{
1488 struct vm_area_struct *vma;
1489 struct uprobe_task *utask; 1472 struct uprobe_task *utask;
1490 struct uprobe *uprobe; 1473 struct uprobe *uprobe;
1491 struct mm_struct *mm;
1492 unsigned long bp_vaddr; 1474 unsigned long bp_vaddr;
1475 int uninitialized_var(is_swbp);
1493 1476
1494 uprobe = NULL;
1495 bp_vaddr = uprobe_get_swbp_addr(regs); 1477 bp_vaddr = uprobe_get_swbp_addr(regs);
1496 mm = current->mm; 1478 uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
1497 down_read(&mm->mmap_sem);
1498 vma = find_vma(mm, bp_vaddr);
1499
1500 if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) {
1501 struct inode *inode;
1502 loff_t offset;
1503
1504 inode = vma->vm_file->f_mapping->host;
1505 offset = bp_vaddr - vma->vm_start;
1506 offset += (vma->vm_pgoff << PAGE_SHIFT);
1507 uprobe = find_uprobe(inode, offset);
1508 }
1509
1510 srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
1511 current->uprobe_srcu_id = -1;
1512 up_read(&mm->mmap_sem);
1513 1479
1514 if (!uprobe) { 1480 if (!uprobe) {
1515 /* No matching uprobe; signal SIGTRAP. */ 1481 if (is_swbp > 0) {
1516 send_sig(SIGTRAP, current, 0); 1482 /* No matching uprobe; signal SIGTRAP. */
1483 send_sig(SIGTRAP, current, 0);
1484 } else {
1485 /*
1486 * Either we raced with uprobe_unregister() or we can't
1487 * access this memory. The latter is only possible if
1488 * another thread plays with our ->mm. In both cases
1489 * we can simply restart. If this vma was unmapped we
1490 * can pretend this insn was not executed yet and get
1491 * the (correct) SIGSEGV after restart.
1492 */
1493 instruction_pointer_set(regs, bp_vaddr);
1494 }
1517 return; 1495 return;
1518 } 1496 }
1519 1497
@@ -1620,7 +1598,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1620 utask->state = UTASK_BP_HIT; 1598 utask->state = UTASK_BP_HIT;
1621 1599
1622 set_thread_flag(TIF_UPROBE); 1600 set_thread_flag(TIF_UPROBE);
1623 current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);
1624 1601
1625 return 1; 1602 return 1;
1626} 1603}
@@ -1655,7 +1632,6 @@ static int __init init_uprobes(void)
1655 mutex_init(&uprobes_mutex[i]); 1632 mutex_init(&uprobes_mutex[i]);
1656 mutex_init(&uprobes_mmap_mutex[i]); 1633 mutex_init(&uprobes_mmap_mutex[i]);
1657 } 1634 }
1658 init_srcu_struct(&uprobes_srcu);
1659 1635
1660 return register_die_notifier(&uprobe_exception_nb); 1636 return register_die_notifier(&uprobe_exception_nb);
1661} 1637}