aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2012-02-22 05:01:49 -0500
committerIngo Molnar <mingo@elte.hu>2012-02-22 05:08:00 -0500
commita5f4374a9610fd7286c2164d4e680436727eff71 (patch)
tree8ccabfd8afe9f58a847ab80b8a6a10eee08009a8 /kernel/events
parent7b2d81d48a2d8e37efb6ce7b4d5ef58822b30d89 (diff)
uprobes: Move to kernel/events/
Consolidate the uprobes code under kernel/events/, where the various core kernel event handling routines live. Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Jim Keniston <jkenisto@us.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Anton Arapov <anton@redhat.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Link: http://lkml.kernel.org/n/tip-biuyhhwohxgbp2vzbap5yr8o@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/Makefile3
-rw-r--r--kernel/events/uprobes.c1011
2 files changed, 1014 insertions, 0 deletions
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 22d901f9caf4..103f5d147b2f 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -3,4 +3,7 @@ CFLAGS_REMOVE_core.o = -pg
3endif 3endif
4 4
5obj-y := core.o ring_buffer.o callchain.o 5obj-y := core.o ring_buffer.o callchain.o
6
6obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 7obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
8obj-$(CONFIG_UPROBES) += uprobes.o
9
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
new file mode 100644
index 000000000000..884817f1b0d3
--- /dev/null
+++ b/kernel/events/uprobes.c
@@ -0,0 +1,1011 @@
1/*
2 * User-space Probes (UProbes)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2011
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 */
23
24#include <linux/kernel.h>
25#include <linux/highmem.h>
26#include <linux/pagemap.h> /* read_mapping_page */
27#include <linux/slab.h>
28#include <linux/sched.h>
29#include <linux/rmap.h> /* anon_vma_prepare */
30#include <linux/mmu_notifier.h> /* set_pte_at_notify */
31#include <linux/swap.h> /* try_to_free_swap */
32
33#include <linux/uprobes.h>
34
35static struct rb_root uprobes_tree = RB_ROOT;
36
37static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
38
39#define UPROBES_HASH_SZ 13
40
41/* serialize (un)register */
42static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
43
44#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
45
46/* serialize uprobe->pending_list */
47static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
48#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
49
50/*
51 * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
52 * events active at this time. Probably a fine grained per inode count is
53 * better?
54 */
55static atomic_t uprobe_events = ATOMIC_INIT(0);
56
57/*
58 * Maintain a temporary per vma info that can be used to search if a vma
59 * has already been handled. This structure is introduced since extending
60 * vm_area_struct wasnt recommended.
61 */
62struct vma_info {
63 struct list_head probe_list;
64 struct mm_struct *mm;
65 loff_t vaddr;
66};
67
68/*
69 * valid_vma: Verify if the specified vma is an executable vma
70 * Relax restrictions while unregistering: vm_flags might have
71 * changed after breakpoint was inserted.
72 * - is_register: indicates if we are in register context.
73 * - Return 1 if the specified virtual address is in an
74 * executable vma.
75 */
76static bool valid_vma(struct vm_area_struct *vma, bool is_register)
77{
78 if (!vma->vm_file)
79 return false;
80
81 if (!is_register)
82 return true;
83
84 if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC))
85 return true;
86
87 return false;
88}
89
90static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
91{
92 loff_t vaddr;
93
94 vaddr = vma->vm_start + offset;
95 vaddr -= vma->vm_pgoff << PAGE_SHIFT;
96
97 return vaddr;
98}
99
100/**
101 * __replace_page - replace page in vma by new page.
102 * based on replace_page in mm/ksm.c
103 *
104 * @vma: vma that holds the pte pointing to page
105 * @page: the cowed page we are replacing by kpage
106 * @kpage: the modified page we replace page by
107 *
108 * Returns 0 on success, -EFAULT on failure.
109 */
110static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
111{
112 struct mm_struct *mm = vma->vm_mm;
113 pgd_t *pgd;
114 pud_t *pud;
115 pmd_t *pmd;
116 pte_t *ptep;
117 spinlock_t *ptl;
118 unsigned long addr;
119 int err = -EFAULT;
120
121 addr = page_address_in_vma(page, vma);
122 if (addr == -EFAULT)
123 goto out;
124
125 pgd = pgd_offset(mm, addr);
126 if (!pgd_present(*pgd))
127 goto out;
128
129 pud = pud_offset(pgd, addr);
130 if (!pud_present(*pud))
131 goto out;
132
133 pmd = pmd_offset(pud, addr);
134 if (!pmd_present(*pmd))
135 goto out;
136
137 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
138 if (!ptep)
139 goto out;
140
141 get_page(kpage);
142 page_add_new_anon_rmap(kpage, vma, addr);
143
144 flush_cache_page(vma, addr, pte_pfn(*ptep));
145 ptep_clear_flush(vma, addr, ptep);
146 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
147
148 page_remove_rmap(page);
149 if (!page_mapped(page))
150 try_to_free_swap(page);
151 put_page(page);
152 pte_unmap_unlock(ptep, ptl);
153 err = 0;
154
155out:
156 return err;
157}
158
159/**
160 * is_bkpt_insn - check if instruction is breakpoint instruction.
161 * @insn: instruction to be checked.
162 * Default implementation of is_bkpt_insn
163 * Returns true if @insn is a breakpoint instruction.
164 */
165bool __weak is_bkpt_insn(uprobe_opcode_t *insn)
166{
167 return *insn == UPROBES_BKPT_INSN;
168}
169
170/*
171 * NOTE:
172 * Expect the breakpoint instruction to be the smallest size instruction for
173 * the architecture. If an arch has variable length instruction and the
174 * breakpoint instruction is not of the smallest length instruction
175 * supported by that architecture then we need to modify read_opcode /
176 * write_opcode accordingly. This would never be a problem for archs that
177 * have fixed length instructions.
178 */
179
180/*
181 * write_opcode - write the opcode at a given virtual address.
182 * @mm: the probed process address space.
183 * @uprobe: the breakpointing information.
184 * @vaddr: the virtual address to store the opcode.
185 * @opcode: opcode to be written at @vaddr.
186 *
187 * Called with mm->mmap_sem held (for read and with a reference to
188 * mm).
189 *
190 * For mm @mm, write the opcode at @vaddr.
191 * Return 0 (success) or a negative errno.
192 */
193static int write_opcode(struct mm_struct *mm, struct uprobe *uprobe,
194 unsigned long vaddr, uprobe_opcode_t opcode)
195{
196 struct page *old_page, *new_page;
197 struct address_space *mapping;
198 void *vaddr_old, *vaddr_new;
199 struct vm_area_struct *vma;
200 loff_t addr;
201 int ret;
202
203 /* Read the page with vaddr into memory */
204 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
205 if (ret <= 0)
206 return ret;
207
208 ret = -EINVAL;
209
210 /*
211 * We are interested in text pages only. Our pages of interest
212 * should be mapped for read and execute only. We desist from
213 * adding probes in write mapped pages since the breakpoints
214 * might end up in the file copy.
215 */
216 if (!valid_vma(vma, is_bkpt_insn(&opcode)))
217 goto put_out;
218
219 mapping = uprobe->inode->i_mapping;
220 if (mapping != vma->vm_file->f_mapping)
221 goto put_out;
222
223 addr = vma_address(vma, uprobe->offset);
224 if (vaddr != (unsigned long)addr)
225 goto put_out;
226
227 ret = -ENOMEM;
228 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
229 if (!new_page)
230 goto put_out;
231
232 __SetPageUptodate(new_page);
233
234 /*
235 * lock page will serialize against do_wp_page()'s
236 * PageAnon() handling
237 */
238 lock_page(old_page);
239 /* copy the page now that we've got it stable */
240 vaddr_old = kmap_atomic(old_page);
241 vaddr_new = kmap_atomic(new_page);
242
243 memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
244
245 /* poke the new insn in, ASSUMES we don't cross page boundary */
246 vaddr &= ~PAGE_MASK;
247 BUG_ON(vaddr + uprobe_opcode_sz > PAGE_SIZE);
248 memcpy(vaddr_new + vaddr, &opcode, uprobe_opcode_sz);
249
250 kunmap_atomic(vaddr_new);
251 kunmap_atomic(vaddr_old);
252
253 ret = anon_vma_prepare(vma);
254 if (ret)
255 goto unlock_out;
256
257 lock_page(new_page);
258 ret = __replace_page(vma, old_page, new_page);
259 unlock_page(new_page);
260
261unlock_out:
262 unlock_page(old_page);
263 page_cache_release(new_page);
264
265put_out:
266 put_page(old_page);
267
268 return ret;
269}
270
271/**
272 * read_opcode - read the opcode at a given virtual address.
273 * @mm: the probed process address space.
274 * @vaddr: the virtual address to read the opcode.
275 * @opcode: location to store the read opcode.
276 *
277 * Called with mm->mmap_sem held (for read and with a reference to
278 * mm.
279 *
280 * For mm @mm, read the opcode at @vaddr and store it in @opcode.
281 * Return 0 (success) or a negative errno.
282 */
283static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode)
284{
285 struct page *page;
286 void *vaddr_new;
287 int ret;
288
289 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL);
290 if (ret <= 0)
291 return ret;
292
293 lock_page(page);
294 vaddr_new = kmap_atomic(page);
295 vaddr &= ~PAGE_MASK;
296 memcpy(opcode, vaddr_new + vaddr, uprobe_opcode_sz);
297 kunmap_atomic(vaddr_new);
298 unlock_page(page);
299
300 put_page(page);
301
302 return 0;
303}
304
305static int is_bkpt_at_addr(struct mm_struct *mm, unsigned long vaddr)
306{
307 uprobe_opcode_t opcode;
308 int result;
309
310 result = read_opcode(mm, vaddr, &opcode);
311 if (result)
312 return result;
313
314 if (is_bkpt_insn(&opcode))
315 return 1;
316
317 return 0;
318}
319
320/**
321 * set_bkpt - store breakpoint at a given address.
322 * @mm: the probed process address space.
323 * @uprobe: the probepoint information.
324 * @vaddr: the virtual address to insert the opcode.
325 *
326 * For mm @mm, store the breakpoint instruction at @vaddr.
327 * Return 0 (success) or a negative errno.
328 */
329int __weak set_bkpt(struct mm_struct *mm, struct uprobe *uprobe, unsigned long vaddr)
330{
331 int result;
332
333 result = is_bkpt_at_addr(mm, vaddr);
334 if (result == 1)
335 return -EEXIST;
336
337 if (result)
338 return result;
339
340 return write_opcode(mm, uprobe, vaddr, UPROBES_BKPT_INSN);
341}
342
343/**
344 * set_orig_insn - Restore the original instruction.
345 * @mm: the probed process address space.
346 * @uprobe: the probepoint information.
347 * @vaddr: the virtual address to insert the opcode.
348 * @verify: if true, verify existance of breakpoint instruction.
349 *
350 * For mm @mm, restore the original opcode (opcode) at @vaddr.
351 * Return 0 (success) or a negative errno.
352 */
353int __weak
354set_orig_insn(struct mm_struct *mm, struct uprobe *uprobe, unsigned long vaddr, bool verify)
355{
356 if (verify) {
357 int result;
358
359 result = is_bkpt_at_addr(mm, vaddr);
360 if (!result)
361 return -EINVAL;
362
363 if (result != 1)
364 return result;
365 }
366 return write_opcode(mm, uprobe, vaddr, *(uprobe_opcode_t *)uprobe->insn);
367}
368
369static int match_uprobe(struct uprobe *l, struct uprobe *r)
370{
371 if (l->inode < r->inode)
372 return -1;
373
374 if (l->inode > r->inode)
375 return 1;
376
377 if (l->offset < r->offset)
378 return -1;
379
380 if (l->offset > r->offset)
381 return 1;
382
383 return 0;
384}
385
386static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
387{
388 struct uprobe u = { .inode = inode, .offset = offset };
389 struct rb_node *n = uprobes_tree.rb_node;
390 struct uprobe *uprobe;
391 int match;
392
393 while (n) {
394 uprobe = rb_entry(n, struct uprobe, rb_node);
395 match = match_uprobe(&u, uprobe);
396 if (!match) {
397 atomic_inc(&uprobe->ref);
398 return uprobe;
399 }
400
401 if (match < 0)
402 n = n->rb_left;
403 else
404 n = n->rb_right;
405 }
406 return NULL;
407}
408
409/*
410 * Find a uprobe corresponding to a given inode:offset
411 * Acquires uprobes_treelock
412 */
413static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
414{
415 struct uprobe *uprobe;
416 unsigned long flags;
417
418 spin_lock_irqsave(&uprobes_treelock, flags);
419 uprobe = __find_uprobe(inode, offset);
420 spin_unlock_irqrestore(&uprobes_treelock, flags);
421
422 return uprobe;
423}
424
425static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
426{
427 struct rb_node **p = &uprobes_tree.rb_node;
428 struct rb_node *parent = NULL;
429 struct uprobe *u;
430 int match;
431
432 while (*p) {
433 parent = *p;
434 u = rb_entry(parent, struct uprobe, rb_node);
435 match = match_uprobe(uprobe, u);
436 if (!match) {
437 atomic_inc(&u->ref);
438 return u;
439 }
440
441 if (match < 0)
442 p = &parent->rb_left;
443 else
444 p = &parent->rb_right;
445
446 }
447
448 u = NULL;
449 rb_link_node(&uprobe->rb_node, parent, p);
450 rb_insert_color(&uprobe->rb_node, &uprobes_tree);
451 /* get access + creation ref */
452 atomic_set(&uprobe->ref, 2);
453
454 return u;
455}
456
457/*
458 * Acquire uprobes_treelock.
459 * Matching uprobe already exists in rbtree;
460 * increment (access refcount) and return the matching uprobe.
461 *
462 * No matching uprobe; insert the uprobe in rb_tree;
463 * get a double refcount (access + creation) and return NULL.
464 */
465static struct uprobe *insert_uprobe(struct uprobe *uprobe)
466{
467 unsigned long flags;
468 struct uprobe *u;
469
470 spin_lock_irqsave(&uprobes_treelock, flags);
471 u = __insert_uprobe(uprobe);
472 spin_unlock_irqrestore(&uprobes_treelock, flags);
473
474 return u;
475}
476
477static void put_uprobe(struct uprobe *uprobe)
478{
479 if (atomic_dec_and_test(&uprobe->ref))
480 kfree(uprobe);
481}
482
483static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
484{
485 struct uprobe *uprobe, *cur_uprobe;
486
487 uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL);
488 if (!uprobe)
489 return NULL;
490
491 uprobe->inode = igrab(inode);
492 uprobe->offset = offset;
493 init_rwsem(&uprobe->consumer_rwsem);
494 INIT_LIST_HEAD(&uprobe->pending_list);
495
496 /* add to uprobes_tree, sorted on inode:offset */
497 cur_uprobe = insert_uprobe(uprobe);
498
499 /* a uprobe exists for this inode:offset combination */
500 if (cur_uprobe) {
501 kfree(uprobe);
502 uprobe = cur_uprobe;
503 iput(inode);
504 } else {
505 atomic_inc(&uprobe_events);
506 }
507
508 return uprobe;
509}
510
511/* Returns the previous consumer */
512static struct uprobe_consumer *
513consumer_add(struct uprobe *uprobe, struct uprobe_consumer *consumer)
514{
515 down_write(&uprobe->consumer_rwsem);
516 consumer->next = uprobe->consumers;
517 uprobe->consumers = consumer;
518 up_write(&uprobe->consumer_rwsem);
519
520 return consumer->next;
521}
522
523/*
524 * For uprobe @uprobe, delete the consumer @consumer.
525 * Return true if the @consumer is deleted successfully
526 * or return false.
527 */
528static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *consumer)
529{
530 struct uprobe_consumer **con;
531 bool ret = false;
532
533 down_write(&uprobe->consumer_rwsem);
534 for (con = &uprobe->consumers; *con; con = &(*con)->next) {
535 if (*con == consumer) {
536 *con = consumer->next;
537 ret = true;
538 break;
539 }
540 }
541 up_write(&uprobe->consumer_rwsem);
542
543 return ret;
544}
545
546static int __copy_insn(struct address_space *mapping,
547 struct vm_area_struct *vma, char *insn,
548 unsigned long nbytes, unsigned long offset)
549{
550 struct file *filp = vma->vm_file;
551 struct page *page;
552 void *vaddr;
553 unsigned long off1;
554 unsigned long idx;
555
556 if (!filp)
557 return -EINVAL;
558
559 idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT);
560 off1 = offset &= ~PAGE_MASK;
561
562 /*
563 * Ensure that the page that has the original instruction is
564 * populated and in page-cache.
565 */
566 page = read_mapping_page(mapping, idx, filp);
567 if (IS_ERR(page))
568 return PTR_ERR(page);
569
570 vaddr = kmap_atomic(page);
571 memcpy(insn, vaddr + off1, nbytes);
572 kunmap_atomic(vaddr);
573 page_cache_release(page);
574
575 return 0;
576}
577
578static int copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
579{
580 struct address_space *mapping;
581 unsigned long nbytes;
582 int bytes;
583
584 addr &= ~PAGE_MASK;
585 nbytes = PAGE_SIZE - addr;
586 mapping = uprobe->inode->i_mapping;
587
588 /* Instruction at end of binary; copy only available bytes */
589 if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size)
590 bytes = uprobe->inode->i_size - uprobe->offset;
591 else
592 bytes = MAX_UINSN_BYTES;
593
594 /* Instruction at the page-boundary; copy bytes in second page */
595 if (nbytes < bytes) {
596 if (__copy_insn(mapping, vma, uprobe->insn + nbytes,
597 bytes - nbytes, uprobe->offset + nbytes))
598 return -ENOMEM;
599
600 bytes = nbytes;
601 }
602 return __copy_insn(mapping, vma, uprobe->insn, bytes, uprobe->offset);
603}
604
605static int install_breakpoint(struct mm_struct *mm, struct uprobe *uprobe,
606 struct vm_area_struct *vma, loff_t vaddr)
607{
608 unsigned long addr;
609 int ret;
610
611 /*
612 * If probe is being deleted, unregister thread could be done with
613 * the vma-rmap-walk through. Adding a probe now can be fatal since
614 * nobody will be able to cleanup. Also we could be from fork or
615 * mremap path, where the probe might have already been inserted.
616 * Hence behave as if probe already existed.
617 */
618 if (!uprobe->consumers)
619 return -EEXIST;
620
621 addr = (unsigned long)vaddr;
622
623 if (!(uprobe->flags & UPROBES_COPY_INSN)) {
624 ret = copy_insn(uprobe, vma, addr);
625 if (ret)
626 return ret;
627
628 if (is_bkpt_insn((uprobe_opcode_t *)uprobe->insn))
629 return -EEXIST;
630
631 ret = arch_uprobes_analyze_insn(mm, uprobe);
632 if (ret)
633 return ret;
634
635 uprobe->flags |= UPROBES_COPY_INSN;
636 }
637 ret = set_bkpt(mm, uprobe, addr);
638
639 return ret;
640}
641
642static void remove_breakpoint(struct mm_struct *mm, struct uprobe *uprobe, loff_t vaddr)
643{
644 set_orig_insn(mm, uprobe, (unsigned long)vaddr, true);
645}
646
647static void delete_uprobe(struct uprobe *uprobe)
648{
649 unsigned long flags;
650
651 spin_lock_irqsave(&uprobes_treelock, flags);
652 rb_erase(&uprobe->rb_node, &uprobes_tree);
653 spin_unlock_irqrestore(&uprobes_treelock, flags);
654 iput(uprobe->inode);
655 put_uprobe(uprobe);
656 atomic_dec(&uprobe_events);
657}
658
659static struct vma_info *__find_next_vma_info(struct list_head *head,
660 loff_t offset, struct address_space *mapping,
661 struct vma_info *vi, bool is_register)
662{
663 struct prio_tree_iter iter;
664 struct vm_area_struct *vma;
665 struct vma_info *tmpvi;
666 unsigned long pgoff;
667 int existing_vma;
668 loff_t vaddr;
669
670 pgoff = offset >> PAGE_SHIFT;
671
672 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
673 if (!valid_vma(vma, is_register))
674 continue;
675
676 existing_vma = 0;
677 vaddr = vma_address(vma, offset);
678
679 list_for_each_entry(tmpvi, head, probe_list) {
680 if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
681 existing_vma = 1;
682 break;
683 }
684 }
685
686 /*
687 * Another vma needs a probe to be installed. However skip
688 * installing the probe if the vma is about to be unlinked.
689 */
690 if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
691 vi->mm = vma->vm_mm;
692 vi->vaddr = vaddr;
693 list_add(&vi->probe_list, head);
694
695 return vi;
696 }
697 }
698
699 return NULL;
700}
701
702/*
703 * Iterate in the rmap prio tree and find a vma where a probe has not
704 * yet been inserted.
705 */
706static struct vma_info *
707find_next_vma_info(struct list_head *head, loff_t offset, struct address_space *mapping,
708 bool is_register)
709{
710 struct vma_info *vi, *retvi;
711
712 vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL);
713 if (!vi)
714 return ERR_PTR(-ENOMEM);
715
716 mutex_lock(&mapping->i_mmap_mutex);
717 retvi = __find_next_vma_info(head, offset, mapping, vi, is_register);
718 mutex_unlock(&mapping->i_mmap_mutex);
719
720 if (!retvi)
721 kfree(vi);
722
723 return retvi;
724}
725
726static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
727{
728 struct list_head try_list;
729 struct vm_area_struct *vma;
730 struct address_space *mapping;
731 struct vma_info *vi, *tmpvi;
732 struct mm_struct *mm;
733 loff_t vaddr;
734 int ret;
735
736 mapping = uprobe->inode->i_mapping;
737 INIT_LIST_HEAD(&try_list);
738
739 ret = 0;
740
741 for (;;) {
742 vi = find_next_vma_info(&try_list, uprobe->offset, mapping, is_register);
743 if (!vi)
744 break;
745
746 if (IS_ERR(vi)) {
747 ret = PTR_ERR(vi);
748 break;
749 }
750
751 mm = vi->mm;
752 down_read(&mm->mmap_sem);
753 vma = find_vma(mm, (unsigned long)vi->vaddr);
754 if (!vma || !valid_vma(vma, is_register)) {
755 list_del(&vi->probe_list);
756 kfree(vi);
757 up_read(&mm->mmap_sem);
758 mmput(mm);
759 continue;
760 }
761 vaddr = vma_address(vma, uprobe->offset);
762 if (vma->vm_file->f_mapping->host != uprobe->inode ||
763 vaddr != vi->vaddr) {
764 list_del(&vi->probe_list);
765 kfree(vi);
766 up_read(&mm->mmap_sem);
767 mmput(mm);
768 continue;
769 }
770
771 if (is_register)
772 ret = install_breakpoint(mm, uprobe, vma, vi->vaddr);
773 else
774 remove_breakpoint(mm, uprobe, vi->vaddr);
775
776 up_read(&mm->mmap_sem);
777 mmput(mm);
778 if (is_register) {
779 if (ret && ret == -EEXIST)
780 ret = 0;
781 if (ret)
782 break;
783 }
784 }
785
786 list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) {
787 list_del(&vi->probe_list);
788 kfree(vi);
789 }
790
791 return ret;
792}
793
794static int __uprobe_register(struct uprobe *uprobe)
795{
796 return register_for_each_vma(uprobe, true);
797}
798
799static void __uprobe_unregister(struct uprobe *uprobe)
800{
801 if (!register_for_each_vma(uprobe, false))
802 delete_uprobe(uprobe);
803
804 /* TODO : cant unregister? schedule a worker thread */
805}
806
807/*
808 * uprobe_register - register a probe
809 * @inode: the file in which the probe has to be placed.
810 * @offset: offset from the start of the file.
811 * @consumer: information on howto handle the probe..
812 *
813 * Apart from the access refcount, uprobe_register() takes a creation
814 * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
815 * inserted into the rbtree (i.e first consumer for a @inode:@offset
816 * tuple). Creation refcount stops uprobe_unregister from freeing the
817 * @uprobe even before the register operation is complete. Creation
818 * refcount is released when the last @consumer for the @uprobe
819 * unregisters.
820 *
821 * Return errno if it cannot successully install probes
822 * else return 0 (success)
823 */
824int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *consumer)
825{
826 struct uprobe *uprobe;
827 int ret;
828
829 if (!inode || !consumer || consumer->next)
830 return -EINVAL;
831
832 if (offset > i_size_read(inode))
833 return -EINVAL;
834
835 ret = 0;
836 mutex_lock(uprobes_hash(inode));
837 uprobe = alloc_uprobe(inode, offset);
838
839 if (uprobe && !consumer_add(uprobe, consumer)) {
840 ret = __uprobe_register(uprobe);
841 if (ret) {
842 uprobe->consumers = NULL;
843 __uprobe_unregister(uprobe);
844 } else {
845 uprobe->flags |= UPROBES_RUN_HANDLER;
846 }
847 }
848
849 mutex_unlock(uprobes_hash(inode));
850 put_uprobe(uprobe);
851
852 return ret;
853}
854
855/*
856 * uprobe_unregister - unregister a already registered probe.
857 * @inode: the file in which the probe has to be removed.
858 * @offset: offset from the start of the file.
859 * @consumer: identify which probe if multiple probes are colocated.
860 */
861void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *consumer)
862{
863 struct uprobe *uprobe;
864
865 if (!inode || !consumer)
866 return;
867
868 uprobe = find_uprobe(inode, offset);
869 if (!uprobe)
870 return;
871
872 mutex_lock(uprobes_hash(inode));
873
874 if (consumer_del(uprobe, consumer)) {
875 if (!uprobe->consumers) {
876 __uprobe_unregister(uprobe);
877 uprobe->flags &= ~UPROBES_RUN_HANDLER;
878 }
879 }
880
881 mutex_unlock(uprobes_hash(inode));
882 if (uprobe)
883 put_uprobe(uprobe);
884}
885
886/*
887 * Of all the nodes that correspond to the given inode, return the node
888 * with the least offset.
889 */
890static struct rb_node *find_least_offset_node(struct inode *inode)
891{
892 struct uprobe u = { .inode = inode, .offset = 0};
893 struct rb_node *n = uprobes_tree.rb_node;
894 struct rb_node *close_node = NULL;
895 struct uprobe *uprobe;
896 int match;
897
898 while (n) {
899 uprobe = rb_entry(n, struct uprobe, rb_node);
900 match = match_uprobe(&u, uprobe);
901
902 if (uprobe->inode == inode)
903 close_node = n;
904
905 if (!match)
906 return close_node;
907
908 if (match < 0)
909 n = n->rb_left;
910 else
911 n = n->rb_right;
912 }
913
914 return close_node;
915}
916
917/*
918 * For a given inode, build a list of probes that need to be inserted.
919 */
920static void build_probe_list(struct inode *inode, struct list_head *head)
921{
922 struct uprobe *uprobe;
923 unsigned long flags;
924 struct rb_node *n;
925
926 spin_lock_irqsave(&uprobes_treelock, flags);
927
928 n = find_least_offset_node(inode);
929
930 for (; n; n = rb_next(n)) {
931 uprobe = rb_entry(n, struct uprobe, rb_node);
932 if (uprobe->inode != inode)
933 break;
934
935 list_add(&uprobe->pending_list, head);
936 atomic_inc(&uprobe->ref);
937 }
938
939 spin_unlock_irqrestore(&uprobes_treelock, flags);
940}
941
942/*
943 * Called from mmap_region.
944 * called with mm->mmap_sem acquired.
945 *
946 * Return -ve no if we fail to insert probes and we cannot
947 * bail-out.
948 * Return 0 otherwise. i.e:
949 *
950 * - successful insertion of probes
951 * - (or) no possible probes to be inserted.
952 * - (or) insertion of probes failed but we can bail-out.
953 */
954int uprobe_mmap(struct vm_area_struct *vma)
955{
956 struct list_head tmp_list;
957 struct uprobe *uprobe, *u;
958 struct inode *inode;
959 int ret;
960
961 if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
962 return 0;
963
964 inode = vma->vm_file->f_mapping->host;
965 if (!inode)
966 return 0;
967
968 INIT_LIST_HEAD(&tmp_list);
969 mutex_lock(uprobes_mmap_hash(inode));
970 build_probe_list(inode, &tmp_list);
971
972 ret = 0;
973
974 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
975 loff_t vaddr;
976
977 list_del(&uprobe->pending_list);
978 if (!ret) {
979 vaddr = vma_address(vma, uprobe->offset);
980 if (vaddr >= vma->vm_start && vaddr < vma->vm_end) {
981 ret = install_breakpoint(vma->vm_mm, uprobe, vma, vaddr);
982 /* Ignore double add: */
983 if (ret == -EEXIST)
984 ret = 0;
985 }
986 }
987 put_uprobe(uprobe);
988 }
989
990 mutex_unlock(uprobes_mmap_hash(inode));
991
992 return ret;
993}
994
995static int __init init_uprobes(void)
996{
997 int i;
998
999 for (i = 0; i < UPROBES_HASH_SZ; i++) {
1000 mutex_init(&uprobes_mutex[i]);
1001 mutex_init(&uprobes_mmap_mutex[i]);
1002 }
1003 return 0;
1004}
1005
1006static void __exit exit_uprobes(void)
1007{
1008}
1009
1010module_init(init_uprobes);
1011module_exit(exit_uprobes);