diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-24 14:39:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-24 14:39:34 -0400 |
commit | 654443e20dfc0617231f28a07c96a979ee1a0239 (patch) | |
tree | a0dc3f093eb13892539082e663607c34b4fc2d07 /kernel/fork.c | |
parent | 2c01e7bc46f10e9190818437e564f7e0db875ae9 (diff) | |
parent | 9cba26e66d09bf394ae5a739627a1dc8b7cae6f4 (diff) |
Merge branch 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull user-space probe instrumentation from Ingo Molnar:
"The uprobes code originates from SystemTap and has been used for years
in Fedora and RHEL kernels. This version is much rewritten, reviews
from PeterZ, Oleg and myself shaped the end result.
This tree includes uprobes support in 'perf probe' - but SystemTap
(and other tools) can take advantage of user probe points as well.
Sample usage of uprobes via perf, for example to profile malloc()
calls without modifying user-space binaries.
First boot a new kernel with CONFIG_UPROBE_EVENT=y enabled.
If you don't know which function you want to probe you can pick one
from 'perf top' or can get a list all functions that can be probed
within libc (binaries can be specified as well):
$ perf probe -F -x /lib/libc.so.6
To probe libc's malloc():
$ perf probe -x /lib64/libc.so.6 malloc
Added new event:
probe_libc:malloc (on 0x7eac0)
You can now use it in all perf tools, such as:
perf record -e probe_libc:malloc -aR sleep 1
Make use of it to create a call graph (as the flat profile is going to
look very boring):
$ perf record -e probe_libc:malloc -gR make
[ perf record: Woken up 173 times to write data ]
[ perf record: Captured and wrote 44.190 MB perf.data (~1930712
$ perf report | less
32.03% git libc-2.15.so [.] malloc
|
--- malloc
29.49% cc1 libc-2.15.so [.] malloc
|
--- malloc
|
|--0.95%-- 0x208eb1000000000
|
|--0.63%-- htab_traverse_noresize
11.04% as libc-2.15.so [.] malloc
|
--- malloc
|
7.15% ld libc-2.15.so [.] malloc
|
--- malloc
|
5.07% sh libc-2.15.so [.] malloc
|
--- malloc
|
4.99% python-config libc-2.15.so [.] malloc
|
--- malloc
|
4.54% make libc-2.15.so [.] malloc
|
--- malloc
|
|--7.34%-- glob
| |
| |--93.18%-- 0x41588f
| |
| --6.82%-- glob
| 0x41588f
...
Or:
$ perf report -g flat | less
# Overhead Command Shared Object Symbol
# ........ ............. ............. ..........
#
32.03% git libc-2.15.so [.] malloc
27.19%
malloc
29.49% cc1 libc-2.15.so [.] malloc
24.77%
malloc
11.04% as libc-2.15.so [.] malloc
11.02%
malloc
7.15% ld libc-2.15.so [.] malloc
6.57%
malloc
...
The core uprobes design is fairly straightforward: uprobes probe
points register themselves at (inode:offset) addresses of
libraries/binaries, after which all existing (or new) vmas that map
that address will have a software breakpoint injected at that address.
vmas are COW-ed to preserve original content. The probe points are
kept in an rbtree.
If user-space executes the probed inode:offset instruction address
then an event is generated which can be recovered from the regular
perf event channels and mmap-ed ring-buffer.
Multiple probes at the same address are supported, they create a
dynamic callback list of event consumers.
The basic model is further complicated by the XOL speedup: the
original instruction that is probed is copied (in an architecture
specific fashion) and executed out of line when the probe triggers.
The XOL area is a single vma per process, with a fixed number of
entries (which limits probe execution parallelism).
The API: uprobes are installed/removed via
/sys/kernel/debug/tracing/uprobe_events, the API is integrated to
align with the kprobes interface as much as possible, but is separate
to it.
Injecting a probe point is privileged operation, which can be relaxed
by setting perf_paranoid to -1.
You can use multiple probes as well and mix them with kprobes and
regular PMU events or tracepoints, when instrumenting a task."
Fix up trivial conflicts in mm/memory.c due to previous cleanup of
unmap_single_vma().
* 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
perf probe: Detect probe target when m/x options are absent
perf probe: Provide perf interface for uprobes
tracing: Fix kconfig warning due to a typo
tracing: Provide trace events interface for uprobes
tracing: Extract out common code for kprobes/uprobes trace events
tracing: Modify is_delete, is_return from int to bool
uprobes/core: Decrement uprobe count before the pages are unmapped
uprobes/core: Make background page replacement logic account for rss_stat counters
uprobes/core: Optimize probe hits with the help of a counter
uprobes/core: Allocate XOL slots for uprobes use
uprobes/core: Handle breakpoint and singlestep exceptions
uprobes/core: Rename bkpt to swbp
uprobes/core: Make order of function parameters consistent across functions
uprobes/core: Make macro names consistent
uprobes: Update copyright notices
uprobes/core: Move insn to arch specific structure
uprobes/core: Remove uprobe_opcode_sz
uprobes/core: Make instruction tables volatile
uprobes: Move to kernel/events/
uprobes/core: Clean up, refactor and improve the code
...
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 05c813dc9ecc..47b4e4f379f9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -69,6 +69,7 @@ | |||
69 | #include <linux/oom.h> | 69 | #include <linux/oom.h> |
70 | #include <linux/khugepaged.h> | 70 | #include <linux/khugepaged.h> |
71 | #include <linux/signalfd.h> | 71 | #include <linux/signalfd.h> |
72 | #include <linux/uprobes.h> | ||
72 | 73 | ||
73 | #include <asm/pgtable.h> | 74 | #include <asm/pgtable.h> |
74 | #include <asm/pgalloc.h> | 75 | #include <asm/pgalloc.h> |
@@ -451,6 +452,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
451 | 452 | ||
452 | if (retval) | 453 | if (retval) |
453 | goto out; | 454 | goto out; |
455 | |||
456 | if (file && uprobe_mmap(tmp)) | ||
457 | goto out; | ||
454 | } | 458 | } |
455 | /* a new mm has just been created */ | 459 | /* a new mm has just been created */ |
456 | arch_dup_mmap(oldmm, mm); | 460 | arch_dup_mmap(oldmm, mm); |
@@ -599,6 +603,7 @@ void mmput(struct mm_struct *mm) | |||
599 | might_sleep(); | 603 | might_sleep(); |
600 | 604 | ||
601 | if (atomic_dec_and_test(&mm->mm_users)) { | 605 | if (atomic_dec_and_test(&mm->mm_users)) { |
606 | uprobe_clear_state(mm); | ||
602 | exit_aio(mm); | 607 | exit_aio(mm); |
603 | ksm_exit(mm); | 608 | ksm_exit(mm); |
604 | khugepaged_exit(mm); /* must run before exit_mmap */ | 609 | khugepaged_exit(mm); /* must run before exit_mmap */ |
@@ -777,6 +782,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
777 | exit_pi_state_list(tsk); | 782 | exit_pi_state_list(tsk); |
778 | #endif | 783 | #endif |
779 | 784 | ||
785 | uprobe_free_utask(tsk); | ||
786 | |||
780 | /* Get rid of any cached register state */ | 787 | /* Get rid of any cached register state */ |
781 | deactivate_mm(tsk, mm); | 788 | deactivate_mm(tsk, mm); |
782 | 789 | ||
@@ -831,6 +838,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
831 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 838 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
832 | mm->pmd_huge_pte = NULL; | 839 | mm->pmd_huge_pte = NULL; |
833 | #endif | 840 | #endif |
841 | uprobe_reset_state(mm); | ||
834 | 842 | ||
835 | if (!mm_init(mm, tsk)) | 843 | if (!mm_init(mm, tsk)) |
836 | goto fail_nomem; | 844 | goto fail_nomem; |
@@ -1373,6 +1381,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1373 | INIT_LIST_HEAD(&p->pi_state_list); | 1381 | INIT_LIST_HEAD(&p->pi_state_list); |
1374 | p->pi_state_cache = NULL; | 1382 | p->pi_state_cache = NULL; |
1375 | #endif | 1383 | #endif |
1384 | uprobe_copy_process(p); | ||
1376 | /* | 1385 | /* |
1377 | * sigaltstack should be cleared when sharing the same VM | 1386 | * sigaltstack should be cleared when sharing the same VM |
1378 | */ | 1387 | */ |