aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 14:39:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 14:39:34 -0400
commit654443e20dfc0617231f28a07c96a979ee1a0239 (patch)
treea0dc3f093eb13892539082e663607c34b4fc2d07 /kernel/fork.c
parent2c01e7bc46f10e9190818437e564f7e0db875ae9 (diff)
parent9cba26e66d09bf394ae5a739627a1dc8b7cae6f4 (diff)
Merge branch 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull user-space probe instrumentation from Ingo Molnar: "The uprobes code originates from SystemTap and has been used for years in Fedora and RHEL kernels. This version is much rewritten, reviews from PeterZ, Oleg and myself shaped the end result. This tree includes uprobes support in 'perf probe' - but SystemTap (and other tools) can take advantage of user probe points as well. Sample usage of uprobes via perf, for example to profile malloc() calls without modifying user-space binaries. First boot a new kernel with CONFIG_UPROBE_EVENT=y enabled. If you don't know which function you want to probe you can pick one from 'perf top' or can get a list all functions that can be probed within libc (binaries can be specified as well): $ perf probe -F -x /lib/libc.so.6 To probe libc's malloc(): $ perf probe -x /lib64/libc.so.6 malloc Added new event: probe_libc:malloc (on 0x7eac0) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 Make use of it to create a call graph (as the flat profile is going to look very boring): $ perf record -e probe_libc:malloc -gR make [ perf record: Woken up 173 times to write data ] [ perf record: Captured and wrote 44.190 MB perf.data (~1930712 $ perf report | less 32.03% git libc-2.15.so [.] malloc | --- malloc 29.49% cc1 libc-2.15.so [.] malloc | --- malloc | |--0.95%-- 0x208eb1000000000 | |--0.63%-- htab_traverse_noresize 11.04% as libc-2.15.so [.] malloc | --- malloc | 7.15% ld libc-2.15.so [.] malloc | --- malloc | 5.07% sh libc-2.15.so [.] malloc | --- malloc | 4.99% python-config libc-2.15.so [.] malloc | --- malloc | 4.54% make libc-2.15.so [.] malloc | --- malloc | |--7.34%-- glob | | | |--93.18%-- 0x41588f | | | --6.82%-- glob | 0x41588f ... Or: $ perf report -g flat | less # Overhead Command Shared Object Symbol # ........ ............. ............. .......... # 32.03% git libc-2.15.so [.] malloc 27.19% malloc 29.49% cc1 libc-2.15.so [.] malloc 24.77% malloc 11.04% as libc-2.15.so [.] malloc 11.02% malloc 7.15% ld libc-2.15.so [.] malloc 6.57% malloc ... The core uprobes design is fairly straightforward: uprobes probe points register themselves at (inode:offset) addresses of libraries/binaries, after which all existing (or new) vmas that map that address will have a software breakpoint injected at that address. vmas are COW-ed to preserve original content. The probe points are kept in an rbtree. If user-space executes the probed inode:offset instruction address then an event is generated which can be recovered from the regular perf event channels and mmap-ed ring-buffer. Multiple probes at the same address are supported, they create a dynamic callback list of event consumers. The basic model is further complicated by the XOL speedup: the original instruction that is probed is copied (in an architecture specific fashion) and executed out of line when the probe triggers. The XOL area is a single vma per process, with a fixed number of entries (which limits probe execution parallelism). The API: uprobes are installed/removed via /sys/kernel/debug/tracing/uprobe_events, the API is integrated to align with the kprobes interface as much as possible, but is separate to it. Injecting a probe point is privileged operation, which can be relaxed by setting perf_paranoid to -1. You can use multiple probes as well and mix them with kprobes and regular PMU events or tracepoints, when instrumenting a task." Fix up trivial conflicts in mm/memory.c due to previous cleanup of unmap_single_vma(). * 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) perf probe: Detect probe target when m/x options are absent perf probe: Provide perf interface for uprobes tracing: Fix kconfig warning due to a typo tracing: Provide trace events interface for uprobes tracing: Extract out common code for kprobes/uprobes trace events tracing: Modify is_delete, is_return from int to bool uprobes/core: Decrement uprobe count before the pages are unmapped uprobes/core: Make background page replacement logic account for rss_stat counters uprobes/core: Optimize probe hits with the help of a counter uprobes/core: Allocate XOL slots for uprobes use uprobes/core: Handle breakpoint and singlestep exceptions uprobes/core: Rename bkpt to swbp uprobes/core: Make order of function parameters consistent across functions uprobes/core: Make macro names consistent uprobes: Update copyright notices uprobes/core: Move insn to arch specific structure uprobes/core: Remove uprobe_opcode_sz uprobes/core: Make instruction tables volatile uprobes: Move to kernel/events/ uprobes/core: Clean up, refactor and improve the code ...
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 05c813dc9ecc..47b4e4f379f9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -69,6 +69,7 @@
69#include <linux/oom.h> 69#include <linux/oom.h>
70#include <linux/khugepaged.h> 70#include <linux/khugepaged.h>
71#include <linux/signalfd.h> 71#include <linux/signalfd.h>
72#include <linux/uprobes.h>
72 73
73#include <asm/pgtable.h> 74#include <asm/pgtable.h>
74#include <asm/pgalloc.h> 75#include <asm/pgalloc.h>
@@ -451,6 +452,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
451 452
452 if (retval) 453 if (retval)
453 goto out; 454 goto out;
455
456 if (file && uprobe_mmap(tmp))
457 goto out;
454 } 458 }
455 /* a new mm has just been created */ 459 /* a new mm has just been created */
456 arch_dup_mmap(oldmm, mm); 460 arch_dup_mmap(oldmm, mm);
@@ -599,6 +603,7 @@ void mmput(struct mm_struct *mm)
599 might_sleep(); 603 might_sleep();
600 604
601 if (atomic_dec_and_test(&mm->mm_users)) { 605 if (atomic_dec_and_test(&mm->mm_users)) {
606 uprobe_clear_state(mm);
602 exit_aio(mm); 607 exit_aio(mm);
603 ksm_exit(mm); 608 ksm_exit(mm);
604 khugepaged_exit(mm); /* must run before exit_mmap */ 609 khugepaged_exit(mm); /* must run before exit_mmap */
@@ -777,6 +782,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
777 exit_pi_state_list(tsk); 782 exit_pi_state_list(tsk);
778#endif 783#endif
779 784
785 uprobe_free_utask(tsk);
786
780 /* Get rid of any cached register state */ 787 /* Get rid of any cached register state */
781 deactivate_mm(tsk, mm); 788 deactivate_mm(tsk, mm);
782 789
@@ -831,6 +838,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
831#ifdef CONFIG_TRANSPARENT_HUGEPAGE 838#ifdef CONFIG_TRANSPARENT_HUGEPAGE
832 mm->pmd_huge_pte = NULL; 839 mm->pmd_huge_pte = NULL;
833#endif 840#endif
841 uprobe_reset_state(mm);
834 842
835 if (!mm_init(mm, tsk)) 843 if (!mm_init(mm, tsk))
836 goto fail_nomem; 844 goto fail_nomem;
@@ -1373,6 +1381,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1373 INIT_LIST_HEAD(&p->pi_state_list); 1381 INIT_LIST_HEAD(&p->pi_state_list);
1374 p->pi_state_cache = NULL; 1382 p->pi_state_cache = NULL;
1375#endif 1383#endif
1384 uprobe_copy_process(p);
1376 /* 1385 /*
1377 * sigaltstack should be cleared when sharing the same VM 1386 * sigaltstack should be cleared when sharing the same VM
1378 */ 1387 */