aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 14:39:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 14:39:34 -0400
commit654443e20dfc0617231f28a07c96a979ee1a0239 (patch)
treea0dc3f093eb13892539082e663607c34b4fc2d07 /mm/mmap.c
parent2c01e7bc46f10e9190818437e564f7e0db875ae9 (diff)
parent9cba26e66d09bf394ae5a739627a1dc8b7cae6f4 (diff)
Merge branch 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull user-space probe instrumentation from Ingo Molnar: "The uprobes code originates from SystemTap and has been used for years in Fedora and RHEL kernels. This version is much rewritten, reviews from PeterZ, Oleg and myself shaped the end result. This tree includes uprobes support in 'perf probe' - but SystemTap (and other tools) can take advantage of user probe points as well. Sample usage of uprobes via perf, for example to profile malloc() calls without modifying user-space binaries. First boot a new kernel with CONFIG_UPROBE_EVENT=y enabled. If you don't know which function you want to probe you can pick one from 'perf top' or can get a list all functions that can be probed within libc (binaries can be specified as well): $ perf probe -F -x /lib/libc.so.6 To probe libc's malloc(): $ perf probe -x /lib64/libc.so.6 malloc Added new event: probe_libc:malloc (on 0x7eac0) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 Make use of it to create a call graph (as the flat profile is going to look very boring): $ perf record -e probe_libc:malloc -gR make [ perf record: Woken up 173 times to write data ] [ perf record: Captured and wrote 44.190 MB perf.data (~1930712 $ perf report | less 32.03% git libc-2.15.so [.] malloc | --- malloc 29.49% cc1 libc-2.15.so [.] malloc | --- malloc | |--0.95%-- 0x208eb1000000000 | |--0.63%-- htab_traverse_noresize 11.04% as libc-2.15.so [.] malloc | --- malloc | 7.15% ld libc-2.15.so [.] malloc | --- malloc | 5.07% sh libc-2.15.so [.] malloc | --- malloc | 4.99% python-config libc-2.15.so [.] malloc | --- malloc | 4.54% make libc-2.15.so [.] malloc | --- malloc | |--7.34%-- glob | | | |--93.18%-- 0x41588f | | | --6.82%-- glob | 0x41588f ... Or: $ perf report -g flat | less # Overhead Command Shared Object Symbol # ........ ............. ............. .......... # 32.03% git libc-2.15.so [.] malloc 27.19% malloc 29.49% cc1 libc-2.15.so [.] malloc 24.77% malloc 11.04% as libc-2.15.so [.] malloc 11.02% malloc 7.15% ld libc-2.15.so [.] malloc 6.57% malloc ... The core uprobes design is fairly straightforward: uprobes probe points register themselves at (inode:offset) addresses of libraries/binaries, after which all existing (or new) vmas that map that address will have a software breakpoint injected at that address. vmas are COW-ed to preserve original content. The probe points are kept in an rbtree. If user-space executes the probed inode:offset instruction address then an event is generated which can be recovered from the regular perf event channels and mmap-ed ring-buffer. Multiple probes at the same address are supported, they create a dynamic callback list of event consumers. The basic model is further complicated by the XOL speedup: the original instruction that is probed is copied (in an architecture specific fashion) and executed out of line when the probe triggers. The XOL area is a single vma per process, with a fixed number of entries (which limits probe execution parallelism). The API: uprobes are installed/removed via /sys/kernel/debug/tracing/uprobe_events, the API is integrated to align with the kprobes interface as much as possible, but is separate to it. Injecting a probe point is privileged operation, which can be relaxed by setting perf_paranoid to -1. You can use multiple probes as well and mix them with kprobes and regular PMU events or tracepoints, when instrumenting a task." Fix up trivial conflicts in mm/memory.c due to previous cleanup of unmap_single_vma(). * 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) perf probe: Detect probe target when m/x options are absent perf probe: Provide perf interface for uprobes tracing: Fix kconfig warning due to a typo tracing: Provide trace events interface for uprobes tracing: Extract out common code for kprobes/uprobes trace events tracing: Modify is_delete, is_return from int to bool uprobes/core: Decrement uprobe count before the pages are unmapped uprobes/core: Make background page replacement logic account for rss_stat counters uprobes/core: Optimize probe hits with the help of a counter uprobes/core: Allocate XOL slots for uprobes use uprobes/core: Handle breakpoint and singlestep exceptions uprobes/core: Rename bkpt to swbp uprobes/core: Make order of function parameters consistent across functions uprobes/core: Make macro names consistent uprobes: Update copyright notices uprobes/core: Move insn to arch specific structure uprobes/core: Remove uprobe_opcode_sz uprobes/core: Make instruction tables volatile uprobes: Move to kernel/events/ uprobes/core: Clean up, refactor and improve the code ...
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c33
1 files changed, 32 insertions, 1 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 69a1889f3790..e8dcfc7de866 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -30,6 +30,7 @@
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/audit.h> 31#include <linux/audit.h>
32#include <linux/khugepaged.h> 32#include <linux/khugepaged.h>
33#include <linux/uprobes.h>
33 34
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35#include <asm/cacheflush.h> 36#include <asm/cacheflush.h>
@@ -546,8 +547,15 @@ again: remove_next = 1 + (end > next->vm_end);
546 547
547 if (file) { 548 if (file) {
548 mapping = file->f_mapping; 549 mapping = file->f_mapping;
549 if (!(vma->vm_flags & VM_NONLINEAR)) 550 if (!(vma->vm_flags & VM_NONLINEAR)) {
550 root = &mapping->i_mmap; 551 root = &mapping->i_mmap;
552 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
553
554 if (adjust_next)
555 uprobe_munmap(next, next->vm_start,
556 next->vm_end);
557 }
558
551 mutex_lock(&mapping->i_mmap_mutex); 559 mutex_lock(&mapping->i_mmap_mutex);
552 if (insert) { 560 if (insert) {
553 /* 561 /*
@@ -617,8 +625,16 @@ again: remove_next = 1 + (end > next->vm_end);
617 if (mapping) 625 if (mapping)
618 mutex_unlock(&mapping->i_mmap_mutex); 626 mutex_unlock(&mapping->i_mmap_mutex);
619 627
628 if (root) {
629 uprobe_mmap(vma);
630
631 if (adjust_next)
632 uprobe_mmap(next);
633 }
634
620 if (remove_next) { 635 if (remove_next) {
621 if (file) { 636 if (file) {
637 uprobe_munmap(next, next->vm_start, next->vm_end);
622 fput(file); 638 fput(file);
623 if (next->vm_flags & VM_EXECUTABLE) 639 if (next->vm_flags & VM_EXECUTABLE)
624 removed_exe_file_vma(mm); 640 removed_exe_file_vma(mm);
@@ -638,6 +654,8 @@ again: remove_next = 1 + (end > next->vm_end);
638 goto again; 654 goto again;
639 } 655 }
640 } 656 }
657 if (insert && file)
658 uprobe_mmap(insert);
641 659
642 validate_mm(mm); 660 validate_mm(mm);
643 661
@@ -1371,6 +1389,11 @@ out:
1371 mm->locked_vm += (len >> PAGE_SHIFT); 1389 mm->locked_vm += (len >> PAGE_SHIFT);
1372 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) 1390 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1373 make_pages_present(addr, addr + len); 1391 make_pages_present(addr, addr + len);
1392
1393 if (file && uprobe_mmap(vma))
1394 /* matching probes but cannot insert */
1395 goto unmap_and_free_vma;
1396
1374 return addr; 1397 return addr;
1375 1398
1376unmap_and_free_vma: 1399unmap_and_free_vma:
@@ -2358,6 +2381,10 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2358 if ((vma->vm_flags & VM_ACCOUNT) && 2381 if ((vma->vm_flags & VM_ACCOUNT) &&
2359 security_vm_enough_memory_mm(mm, vma_pages(vma))) 2382 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2360 return -ENOMEM; 2383 return -ENOMEM;
2384
2385 if (vma->vm_file && uprobe_mmap(vma))
2386 return -EINVAL;
2387
2361 vma_link(mm, vma, prev, rb_link, rb_parent); 2388 vma_link(mm, vma, prev, rb_link, rb_parent);
2362 return 0; 2389 return 0;
2363} 2390}
@@ -2427,6 +2454,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2427 new_vma->vm_pgoff = pgoff; 2454 new_vma->vm_pgoff = pgoff;
2428 if (new_vma->vm_file) { 2455 if (new_vma->vm_file) {
2429 get_file(new_vma->vm_file); 2456 get_file(new_vma->vm_file);
2457
2458 if (uprobe_mmap(new_vma))
2459 goto out_free_mempol;
2460
2430 if (vma->vm_flags & VM_EXECUTABLE) 2461 if (vma->vm_flags & VM_EXECUTABLE)
2431 added_exe_file_vma(mm); 2462 added_exe_file_vma(mm);
2432 } 2463 }