aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/uprobetracer.txt113
-rw-r--r--arch/Kconfig17
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/uprobes.h57
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/uprobes.c674
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/uprobes.h165
-rw-r--r--kernel/events/Makefile3
-rw-r--r--kernel/events/uprobes.c1667
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/trace/Kconfig20
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_kprobe.c899
-rw-r--r--kernel/trace/trace_probe.c839
-rw-r--r--kernel/trace/trace_probe.h161
-rw-r--r--kernel/trace/trace_uprobe.c788
-rw-r--r--mm/memory.c3
-rw-r--r--mm/mmap.c33
-rw-r--r--tools/perf/Documentation/perf-probe.txt19
-rw-r--r--tools/perf/builtin-probe.c86
-rw-r--r--tools/perf/util/probe-event.c422
-rw-r--r--tools/perf/util/probe-event.h12
-rw-r--r--tools/perf/util/symbol.c8
-rw-r--r--tools/perf/util/symbol.h1
30 files changed, 5048 insertions, 979 deletions
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
new file mode 100644
index 000000000000..24ce6823a09e
--- /dev/null
+++ b/Documentation/trace/uprobetracer.txt
@@ -0,0 +1,113 @@
1 Uprobe-tracer: Uprobe-based Event Tracing
2 =========================================
3 Documentation written by Srikar Dronamraju
4
5Overview
6--------
7Uprobe based trace events are similar to kprobe based trace events.
8To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
9
10Similar to the kprobe-event tracer, this doesn't need to be activated via
11current_tracer. Instead of that, add probe points via
12/sys/kernel/debug/tracing/uprobe_events, and enable it via
13/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled.
14
15However unlike kprobe-event tracer, the uprobe event interface expects the
16user to calculate the offset of the probepoint in the object
17
18Synopsis of uprobe_tracer
19-------------------------
20 p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe
21
22 GRP : Group name. If omitted, use "uprobes" for it.
23 EVENT : Event name. If omitted, the event name is generated
24 based on SYMBOL+offs.
25 PATH : path to an executable or a library.
26 SYMBOL[+offs] : Symbol+offset where the probe is inserted.
27
28 FETCHARGS : Arguments. Each probe can have up to 128 args.
29 %REG : Fetch register REG
30
31Event Profiling
32---------------
33 You can check the total number of probe hits and probe miss-hits via
34/sys/kernel/debug/tracing/uprobe_profile.
35 The first column is event name, the second is the number of probe hits,
36the third is the number of probe miss-hits.
37
38Usage examples
39--------------
40To add a probe as a new event, write a new definition to uprobe_events
41as below.
42
43 echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
44
45 This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash
46
47 echo > /sys/kernel/debug/tracing/uprobe_events
48
49 This clears all probe points.
50
51The following example shows how to dump the instruction pointer and %ax
52a register at the probed text address. Here we are trying to probe
53function zfree in /bin/zsh
54
55 # cd /sys/kernel/debug/tracing/
56 # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
57 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
58 # objdump -T /bin/zsh | grep -w zfree
59 0000000000446420 g DF .text 0000000000000012 Base zfree
60
610x46420 is the offset of zfree in object /bin/zsh that is loaded at
620x00400000. Hence the command to probe would be :
63
64 # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events
65
66Please note: User has to explicitly calculate the offset of the probepoint
67in the object. We can see the events that are registered by looking at the
68uprobe_events file.
69
70 # cat uprobe_events
71 p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax
72
73The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format
74
75 # cat events/uprobes/p_zsh_0x46420/format
76 name: p_zsh_0x46420
77 ID: 922
78 format:
79 field:unsigned short common_type; offset:0; size:2; signed:0;
80 field:unsigned char common_flags; offset:2; size:1; signed:0;
81 field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
82 field:int common_pid; offset:4; size:4; signed:1;
83 field:int common_padding; offset:8; size:4; signed:1;
84
85 field:unsigned long __probe_ip; offset:12; size:4; signed:0;
86 field:u32 arg1; offset:16; size:4; signed:0;
87 field:u32 arg2; offset:20; size:4; signed:0;
88
89 print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2
90
91Right after definition, each event is disabled by default. For tracing these
92events, you need to enable it by:
93
94 # echo 1 > events/uprobes/enable
95
96Lets disable the event after sleeping for some time.
97 # sleep 20
98 # echo 0 > events/uprobes/enable
99
100And you can see the traced information via /sys/kernel/debug/tracing/trace.
101
102 # cat trace
103 # tracer: nop
104 #
105 # TASK-PID CPU# TIMESTAMP FUNCTION
106 # | | | | |
107 zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
108 zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
109 zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
110 zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
111
112Each line shows us probes were triggered for a pid 24842 with ip being
1130x446421 and contents of ax register being 79.
diff --git a/arch/Kconfig b/arch/Kconfig
index 1f9461b9cc89..e9a910876cda 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -76,6 +76,23 @@ config OPTPROBES
76 depends on KPROBES && HAVE_OPTPROBES 76 depends on KPROBES && HAVE_OPTPROBES
77 depends on !PREEMPT 77 depends on !PREEMPT
78 78
79config UPROBES
80 bool "Transparent user-space probes (EXPERIMENTAL)"
81 depends on UPROBE_EVENT && PERF_EVENTS
82 default n
83 help
84 Uprobes is the user-space counterpart to kprobes: they
85 enable instrumentation applications (such as 'perf probe')
86 to establish unintrusive probes in user-space binaries and
87 libraries, by executing handler functions when the probes
88 are hit by user-space applications.
89
90 ( These probes come in the form of single-byte breakpoints,
91 managed by the kernel and kept transparent to the probed
92 application. )
93
94 If in doubt, say "N".
95
79config HAVE_EFFICIENT_UNALIGNED_ACCESS 96config HAVE_EFFICIENT_UNALIGNED_ACCESS
80 bool 97 bool
81 help 98 help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d6168994e115..0b2d5f24c946 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -87,7 +87,7 @@ config X86
87 select BUILDTIME_EXTABLE_SORT 87 select BUILDTIME_EXTABLE_SORT
88 88
89config INSTRUCTION_DECODER 89config INSTRUCTION_DECODER
90 def_bool (KPROBES || PERF_EVENTS) 90 def_bool (KPROBES || PERF_EVENTS || UPROBES)
91 91
92config OUTPUT_FORMAT 92config OUTPUT_FORMAT
93 string 93 string
@@ -243,6 +243,9 @@ config ARCH_CPU_PROBE_RELEASE
243 def_bool y 243 def_bool y
244 depends on HOTPLUG_CPU 244 depends on HOTPLUG_CPU
245 245
246config ARCH_SUPPORTS_UPROBES
247 def_bool y
248
246source "init/Kconfig" 249source "init/Kconfig"
247source "kernel/Kconfig.freezer" 250source "kernel/Kconfig.freezer"
248 251
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 3c9aebc00d39..5c25de07cba8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -85,6 +85,7 @@ struct thread_info {
85#define TIF_SECCOMP 8 /* secure computing */ 85#define TIF_SECCOMP 8 /* secure computing */
86#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ 86#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
87#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ 87#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
88#define TIF_UPROBE 12 /* breakpointed or singlestepping */
88#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 89#define TIF_NOTSC 16 /* TSC is not accessible in userland */
89#define TIF_IA32 17 /* IA32 compatibility process */ 90#define TIF_IA32 17 /* IA32 compatibility process */
90#define TIF_FORK 18 /* ret_from_fork */ 91#define TIF_FORK 18 /* ret_from_fork */
@@ -109,6 +110,7 @@ struct thread_info {
109#define _TIF_SECCOMP (1 << TIF_SECCOMP) 110#define _TIF_SECCOMP (1 << TIF_SECCOMP)
110#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) 111#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
111#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) 112#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
113#define _TIF_UPROBE (1 << TIF_UPROBE)
112#define _TIF_NOTSC (1 << TIF_NOTSC) 114#define _TIF_NOTSC (1 << TIF_NOTSC)
113#define _TIF_IA32 (1 << TIF_IA32) 115#define _TIF_IA32 (1 << TIF_IA32)
114#define _TIF_FORK (1 << TIF_FORK) 116#define _TIF_FORK (1 << TIF_FORK)
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
new file mode 100644
index 000000000000..1e9bed14f7ae
--- /dev/null
+++ b/arch/x86/include/asm/uprobes.h
@@ -0,0 +1,57 @@
1#ifndef _ASM_UPROBES_H
2#define _ASM_UPROBES_H
3/*
4 * User-space Probes (UProbes) for x86
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright (C) IBM Corporation, 2008-2011
21 * Authors:
22 * Srikar Dronamraju
23 * Jim Keniston
24 */
25
26#include <linux/notifier.h>
27
28typedef u8 uprobe_opcode_t;
29
30#define MAX_UINSN_BYTES 16
31#define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */
32
33#define UPROBE_SWBP_INSN 0xcc
34#define UPROBE_SWBP_INSN_SIZE 1
35
36struct arch_uprobe {
37 u16 fixups;
38 u8 insn[MAX_UINSN_BYTES];
39#ifdef CONFIG_X86_64
40 unsigned long rip_rela_target_address;
41#endif
42};
43
44struct arch_uprobe_task {
45 unsigned long saved_trap_nr;
46#ifdef CONFIG_X86_64
47 unsigned long saved_scratch_register;
48#endif
49};
50
51extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
52extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
53extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
54extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
55extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
56extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
57#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bb8529275aab..9bba5b79902b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
100 100
101obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 101obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
102obj-$(CONFIG_OF) += devicetree.o 102obj-$(CONFIG_OF) += devicetree.o
103obj-$(CONFIG_UPROBES) += uprobes.o
103 104
104### 105###
105# 64 bit specific files 106# 64 bit specific files
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b68ccadd2ff4..965dfda0fd5e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -18,6 +18,7 @@
18#include <linux/personality.h> 18#include <linux/personality.h>
19#include <linux/uaccess.h> 19#include <linux/uaccess.h>
20#include <linux/user-return-notifier.h> 20#include <linux/user-return-notifier.h>
21#include <linux/uprobes.h>
21 22
22#include <asm/processor.h> 23#include <asm/processor.h>
23#include <asm/ucontext.h> 24#include <asm/ucontext.h>
@@ -814,6 +815,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
814 mce_notify_process(); 815 mce_notify_process();
815#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ 816#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
816 817
818 if (thread_info_flags & _TIF_UPROBE) {
819 clear_thread_flag(TIF_UPROBE);
820 uprobe_notify_resume(regs);
821 }
822
817 /* deal with pending signal delivery */ 823 /* deal with pending signal delivery */
818 if (thread_info_flags & _TIF_SIGPENDING) 824 if (thread_info_flags & _TIF_SIGPENDING)
819 do_signal(regs); 825 do_signal(regs);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
new file mode 100644
index 000000000000..dc4e910a7d96
--- /dev/null
+++ b/arch/x86/kernel/uprobes.c
@@ -0,0 +1,674 @@
1/*
2 * User-space Probes (UProbes) for x86
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2011
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 */
23#include <linux/kernel.h>
24#include <linux/sched.h>
25#include <linux/ptrace.h>
26#include <linux/uprobes.h>
27#include <linux/uaccess.h>
28
29#include <linux/kdebug.h>
30#include <asm/processor.h>
31#include <asm/insn.h>
32
33/* Post-execution fixups. */
34
35/* No fixup needed */
36#define UPROBE_FIX_NONE 0x0
37
38/* Adjust IP back to vicinity of actual insn */
39#define UPROBE_FIX_IP 0x1
40
41/* Adjust the return address of a call insn */
42#define UPROBE_FIX_CALL 0x2
43
44#define UPROBE_FIX_RIP_AX 0x8000
45#define UPROBE_FIX_RIP_CX 0x4000
46
47#define UPROBE_TRAP_NR UINT_MAX
48
49/* Adaptations for mhiramat x86 decoder v14. */
50#define OPCODE1(insn) ((insn)->opcode.bytes[0])
51#define OPCODE2(insn) ((insn)->opcode.bytes[1])
52#define OPCODE3(insn) ((insn)->opcode.bytes[2])
53#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
54
55#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
56 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
57 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
58 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
59 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
60 << (row % 32))
61
62/*
63 * Good-instruction tables for 32-bit apps. This is non-const and volatile
64 * to keep gcc from statically optimizing it out, as variable_test_bit makes
65 * some versions of gcc to think only *(unsigned long*) is used.
66 */
67static volatile u32 good_insns_32[256 / 32] = {
68 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
69 /* ---------------------------------------------- */
70 W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
71 W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
72 W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
73 W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
74 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
75 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
76 W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
77 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
78 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
79 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
80 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
81 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
82 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
83 W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
84 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
85 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
86 /* ---------------------------------------------- */
87 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
88};
89
90/* Using this for both 64-bit and 32-bit apps */
91static volatile u32 good_2byte_insns[256 / 32] = {
92 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
93 /* ---------------------------------------------- */
94 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
95 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
96 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
97 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
98 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
99 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
100 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
101 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
102 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
103 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
104 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
105 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
106 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
107 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
108 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
109 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
110 /* ---------------------------------------------- */
111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
112};
113
114#ifdef CONFIG_X86_64
115/* Good-instruction tables for 64-bit apps */
116static volatile u32 good_insns_64[256 / 32] = {
117 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
118 /* ---------------------------------------------- */
119 W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
120 W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
121 W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
122 W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
123 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
124 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
125 W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
126 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
127 W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
128 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
129 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
130 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
131 W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
132 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
133 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
134 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
135 /* ---------------------------------------------- */
136 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
137};
138#endif
139#undef W
140
141/*
142 * opcodes we'll probably never support:
143 *
144 * 6c-6d, e4-e5, ec-ed - in
145 * 6e-6f, e6-e7, ee-ef - out
146 * cc, cd - int3, int
147 * cf - iret
148 * d6 - illegal instruction
149 * f1 - int1/icebp
150 * f4 - hlt
151 * fa, fb - cli, sti
152 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
153 *
154 * invalid opcodes in 64-bit mode:
155 *
156 * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
157 * 63 - we support this opcode in x86_64 but not in i386.
158 *
159 * opcodes we may need to refine support for:
160 *
161 * 0f - 2-byte instructions: For many of these instructions, the validity
162 * depends on the prefix and/or the reg field. On such instructions, we
163 * just consider the opcode combination valid if it corresponds to any
164 * valid instruction.
165 *
166 * 8f - Group 1 - only reg = 0 is OK
167 * c6-c7 - Group 11 - only reg = 0 is OK
168 * d9-df - fpu insns with some illegal encodings
169 * f2, f3 - repnz, repz prefixes. These are also the first byte for
170 * certain floating-point instructions, such as addsd.
171 *
172 * fe - Group 4 - only reg = 0 or 1 is OK
173 * ff - Group 5 - only reg = 0-6 is OK
174 *
175 * others -- Do we need to support these?
176 *
177 * 0f - (floating-point?) prefetch instructions
178 * 07, 17, 1f - pop es, pop ss, pop ds
179 * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
180 * but 64 and 65 (fs: and gs:) seem to be used, so we support them
181 * 67 - addr16 prefix
182 * ce - into
183 * f0 - lock prefix
184 */
185
186/*
187 * TODO:
188 * - Where necessary, examine the modrm byte and allow only valid instructions
189 * in the different Groups and fpu instructions.
190 */
191
192static bool is_prefix_bad(struct insn *insn)
193{
194 int i;
195
196 for (i = 0; i < insn->prefixes.nbytes; i++) {
197 switch (insn->prefixes.bytes[i]) {
198 case 0x26: /* INAT_PFX_ES */
199 case 0x2E: /* INAT_PFX_CS */
200 case 0x36: /* INAT_PFX_DS */
201 case 0x3E: /* INAT_PFX_SS */
202 case 0xF0: /* INAT_PFX_LOCK */
203 return true;
204 }
205 }
206 return false;
207}
208
209static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
210{
211 insn_init(insn, auprobe->insn, false);
212
213 /* Skip good instruction prefixes; reject "bad" ones. */
214 insn_get_opcode(insn);
215 if (is_prefix_bad(insn))
216 return -ENOTSUPP;
217
218 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
219 return 0;
220
221 if (insn->opcode.nbytes == 2) {
222 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
223 return 0;
224 }
225
226 return -ENOTSUPP;
227}
228
229/*
230 * Figure out which fixups arch_uprobe_post_xol() will need to perform, and
231 * annotate arch_uprobe->fixups accordingly. To start with,
232 * arch_uprobe->fixups is either zero or it reflects rip-related fixups.
233 */
234static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
235{
236 bool fix_ip = true, fix_call = false; /* defaults */
237 int reg;
238
239 insn_get_opcode(insn); /* should be a nop */
240
241 switch (OPCODE1(insn)) {
242 case 0xc3: /* ret/lret */
243 case 0xcb:
244 case 0xc2:
245 case 0xca:
246 /* ip is correct */
247 fix_ip = false;
248 break;
249 case 0xe8: /* call relative - Fix return addr */
250 fix_call = true;
251 break;
252 case 0x9a: /* call absolute - Fix return addr, not ip */
253 fix_call = true;
254 fix_ip = false;
255 break;
256 case 0xff:
257 insn_get_modrm(insn);
258 reg = MODRM_REG(insn);
259 if (reg == 2 || reg == 3) {
260 /* call or lcall, indirect */
261 /* Fix return addr; ip is correct. */
262 fix_call = true;
263 fix_ip = false;
264 } else if (reg == 4 || reg == 5) {
265 /* jmp or ljmp, indirect */
266 /* ip is correct. */
267 fix_ip = false;
268 }
269 break;
270 case 0xea: /* jmp absolute -- ip is correct */
271 fix_ip = false;
272 break;
273 default:
274 break;
275 }
276 if (fix_ip)
277 auprobe->fixups |= UPROBE_FIX_IP;
278 if (fix_call)
279 auprobe->fixups |= UPROBE_FIX_CALL;
280}
281
282#ifdef CONFIG_X86_64
283/*
284 * If arch_uprobe->insn doesn't use rip-relative addressing, return
285 * immediately. Otherwise, rewrite the instruction so that it accesses
286 * its memory operand indirectly through a scratch register. Set
287 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
288 * accordingly. (The contents of the scratch register will be saved
289 * before we single-step the modified instruction, and restored
290 * afterward.)
291 *
292 * We do this because a rip-relative instruction can access only a
293 * relatively small area (+/- 2 GB from the instruction), and the XOL
294 * area typically lies beyond that area. At least for instructions
295 * that store to memory, we can't execute the original instruction
296 * and "fix things up" later, because the misdirected store could be
297 * disastrous.
298 *
299 * Some useful facts about rip-relative instructions:
300 *
301 * - There's always a modrm byte.
302 * - There's never a SIB byte.
303 * - The displacement is always 4 bytes.
304 */
305static void
306handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
307{
308 u8 *cursor;
309 u8 reg;
310
311 if (mm->context.ia32_compat)
312 return;
313
314 auprobe->rip_rela_target_address = 0x0;
315 if (!insn_rip_relative(insn))
316 return;
317
318 /*
319 * insn_rip_relative() would have decoded rex_prefix, modrm.
320 * Clear REX.b bit (extension of MODRM.rm field):
321 * we want to encode rax/rcx, not r8/r9.
322 */
323 if (insn->rex_prefix.nbytes) {
324 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
325 *cursor &= 0xfe; /* Clearing REX.B bit */
326 }
327
328 /*
329 * Point cursor at the modrm byte. The next 4 bytes are the
330 * displacement. Beyond the displacement, for some instructions,
331 * is the immediate operand.
332 */
333 cursor = auprobe->insn + insn_offset_modrm(insn);
334 insn_get_length(insn);
335
336 /*
337 * Convert from rip-relative addressing to indirect addressing
338 * via a scratch register. Change the r/m field from 0x5 (%rip)
339 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
340 */
341 reg = MODRM_REG(insn);
342 if (reg == 0) {
343 /*
344 * The register operand (if any) is either the A register
345 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
346 * REX prefix) %r8. In any case, we know the C register
347 * is NOT the register operand, so we use %rcx (register
348 * #1) for the scratch register.
349 */
350 auprobe->fixups = UPROBE_FIX_RIP_CX;
351 /* Change modrm from 00 000 101 to 00 000 001. */
352 *cursor = 0x1;
353 } else {
354 /* Use %rax (register #0) for the scratch register. */
355 auprobe->fixups = UPROBE_FIX_RIP_AX;
356 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
357 *cursor = (reg << 3);
358 }
359
360 /* Target address = address of next instruction + (signed) offset */
361 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
362
363 /* Displacement field is gone; slide immediate field (if any) over. */
364 if (insn->immediate.nbytes) {
365 cursor++;
366 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
367 }
368 return;
369}
370
371static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
372{
373 insn_init(insn, auprobe->insn, true);
374
375 /* Skip good instruction prefixes; reject "bad" ones. */
376 insn_get_opcode(insn);
377 if (is_prefix_bad(insn))
378 return -ENOTSUPP;
379
380 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
381 return 0;
382
383 if (insn->opcode.nbytes == 2) {
384 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
385 return 0;
386 }
387 return -ENOTSUPP;
388}
389
390static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
391{
392 if (mm->context.ia32_compat)
393 return validate_insn_32bits(auprobe, insn);
394 return validate_insn_64bits(auprobe, insn);
395}
396#else /* 32-bit: */
397static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
398{
399 /* No RIP-relative addressing on 32-bit */
400}
401
402static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
403{
404 return validate_insn_32bits(auprobe, insn);
405}
406#endif /* CONFIG_X86_64 */
407
408/**
409 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
410 * @mm: the probed address space.
411 * @arch_uprobe: the probepoint information.
412 * Return 0 on success or a -ve number on error.
413 */
414int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
415{
416 int ret;
417 struct insn insn;
418
419 auprobe->fixups = 0;
420 ret = validate_insn_bits(auprobe, mm, &insn);
421 if (ret != 0)
422 return ret;
423
424 handle_riprel_insn(auprobe, mm, &insn);
425 prepare_fixups(auprobe, &insn);
426
427 return 0;
428}
429
430#ifdef CONFIG_X86_64
431/*
432 * If we're emulating a rip-relative instruction, save the contents
433 * of the scratch register and store the target address in that register.
434 */
435static void
436pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
437 struct arch_uprobe_task *autask)
438{
439 if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
440 autask->saved_scratch_register = regs->ax;
441 regs->ax = current->utask->vaddr;
442 regs->ax += auprobe->rip_rela_target_address;
443 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
444 autask->saved_scratch_register = regs->cx;
445 regs->cx = current->utask->vaddr;
446 regs->cx += auprobe->rip_rela_target_address;
447 }
448}
449#else
450static void
451pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
452 struct arch_uprobe_task *autask)
453{
454 /* No RIP-relative addressing on 32-bit */
455}
456#endif
457
458/*
459 * arch_uprobe_pre_xol - prepare to execute out of line.
460 * @auprobe: the probepoint information.
461 * @regs: reflects the saved user state of current task.
462 */
463int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
464{
465 struct arch_uprobe_task *autask;
466
467 autask = &current->utask->autask;
468 autask->saved_trap_nr = current->thread.trap_nr;
469 current->thread.trap_nr = UPROBE_TRAP_NR;
470 regs->ip = current->utask->xol_vaddr;
471 pre_xol_rip_insn(auprobe, regs, autask);
472
473 return 0;
474}
475
476/*
477 * This function is called by arch_uprobe_post_xol() to adjust the return
478 * address pushed by a call instruction executed out of line.
479 */
480static int adjust_ret_addr(unsigned long sp, long correction)
481{
482 int rasize, ncopied;
483 long ra = 0;
484
485 if (is_ia32_task())
486 rasize = 4;
487 else
488 rasize = 8;
489
490 ncopied = copy_from_user(&ra, (void __user *)sp, rasize);
491 if (unlikely(ncopied))
492 return -EFAULT;
493
494 ra += correction;
495 ncopied = copy_to_user((void __user *)sp, &ra, rasize);
496 if (unlikely(ncopied))
497 return -EFAULT;
498
499 return 0;
500}
501
502#ifdef CONFIG_X86_64
503static bool is_riprel_insn(struct arch_uprobe *auprobe)
504{
505 return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0);
506}
507
508static void
509handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
510{
511 if (is_riprel_insn(auprobe)) {
512 struct arch_uprobe_task *autask;
513
514 autask = &current->utask->autask;
515 if (auprobe->fixups & UPROBE_FIX_RIP_AX)
516 regs->ax = autask->saved_scratch_register;
517 else
518 regs->cx = autask->saved_scratch_register;
519
520 /*
521 * The original instruction includes a displacement, and so
522 * is 4 bytes longer than what we've just single-stepped.
523 * Fall through to handle stuff like "jmpq *...(%rip)" and
524 * "callq *...(%rip)".
525 */
526 if (correction)
527 *correction += 4;
528 }
529}
530#else
531static void
532handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
533{
534 /* No RIP-relative addressing on 32-bit */
535}
536#endif
537
538/*
539 * If xol insn itself traps and generates a signal(Say,
540 * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped
541 * instruction jumps back to its own address. It is assumed that anything
542 * like do_page_fault/do_trap/etc sets thread.trap_nr != -1.
543 *
544 * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr,
545 * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
546 * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol().
547 */
548bool arch_uprobe_xol_was_trapped(struct task_struct *t)
549{
550 if (t->thread.trap_nr != UPROBE_TRAP_NR)
551 return true;
552
553 return false;
554}
555
556/*
557 * Called after single-stepping. To avoid the SMP problems that can
558 * occur when we temporarily put back the original opcode to
559 * single-step, we single-stepped a copy of the instruction.
560 *
561 * This function prepares to resume execution after the single-step.
562 * We have to fix things up as follows:
563 *
564 * Typically, the new ip is relative to the copied instruction. We need
565 * to make it relative to the original instruction (FIX_IP). Exceptions
566 * are return instructions and absolute or indirect jump or call instructions.
567 *
568 * If the single-stepped instruction was a call, the return address that
569 * is atop the stack is the address following the copied instruction. We
570 * need to make it the address following the original instruction (FIX_CALL).
571 *
572 * If the original instruction was a rip-relative instruction such as
573 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
574 * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
575 * We need to restore the contents of the scratch register and adjust
576 * the ip, keeping in mind that the instruction we executed is 4 bytes
577 * shorter than the original instruction (since we squeezed out the offset
578 * field). (FIX_RIP_AX or FIX_RIP_CX)
579 */
580int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
581{
582 struct uprobe_task *utask;
583 long correction;
584 int result = 0;
585
586 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
587
588 utask = current->utask;
589 current->thread.trap_nr = utask->autask.saved_trap_nr;
590 correction = (long)(utask->vaddr - utask->xol_vaddr);
591 handle_riprel_post_xol(auprobe, regs, &correction);
592 if (auprobe->fixups & UPROBE_FIX_IP)
593 regs->ip += correction;
594
595 if (auprobe->fixups & UPROBE_FIX_CALL)
596 result = adjust_ret_addr(regs->sp, correction);
597
598 return result;
599}
600
601/* callback routine for handling exceptions. */
602int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data)
603{
604 struct die_args *args = data;
605 struct pt_regs *regs = args->regs;
606 int ret = NOTIFY_DONE;
607
608 /* We are only interested in userspace traps */
609 if (regs && !user_mode_vm(regs))
610 return NOTIFY_DONE;
611
612 switch (val) {
613 case DIE_INT3:
614 if (uprobe_pre_sstep_notifier(regs))
615 ret = NOTIFY_STOP;
616
617 break;
618
619 case DIE_DEBUG:
620 if (uprobe_post_sstep_notifier(regs))
621 ret = NOTIFY_STOP;
622
623 default:
624 break;
625 }
626
627 return ret;
628}
629
630/*
631 * This function gets called when XOL instruction either gets trapped or
632 * the thread has a fatal signal, so reset the instruction pointer to its
633 * probed address.
634 */
635void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
636{
637 struct uprobe_task *utask = current->utask;
638
639 current->thread.trap_nr = utask->autask.saved_trap_nr;
640 handle_riprel_post_xol(auprobe, regs, NULL);
641 instruction_pointer_set(regs, utask->vaddr);
642}
643
644/*
645 * Skip these instructions as per the currently known x86 ISA.
646 * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
647 */
648bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
649{
650 int i;
651
652 for (i = 0; i < MAX_UINSN_BYTES; i++) {
653 if ((auprobe->insn[i] == 0x66))
654 continue;
655
656 if (auprobe->insn[i] == 0x90)
657 return true;
658
659 if (i == (MAX_UINSN_BYTES - 1))
660 break;
661
662 if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f))
663 return true;
664
665 if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19))
666 return true;
667
668 if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0))
669 return true;
670
671 break;
672 }
673 return false;
674}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3cc3062b3767..26574c726121 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/page-debug-flags.h> 14#include <linux/page-debug-flags.h>
15#include <linux/uprobes.h>
15#include <asm/page.h> 16#include <asm/page.h>
16#include <asm/mmu.h> 17#include <asm/mmu.h>
17 18
@@ -388,6 +389,7 @@ struct mm_struct {
388#ifdef CONFIG_CPUMASK_OFFSTACK 389#ifdef CONFIG_CPUMASK_OFFSTACK
389 struct cpumask cpumask_allocation; 390 struct cpumask cpumask_allocation;
390#endif 391#endif
392 struct uprobes_state uprobes_state;
391}; 393};
392 394
393static inline void mm_init_cpumask(struct mm_struct *mm) 395static inline void mm_init_cpumask(struct mm_struct *mm)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ea8baea9387..f45c0b280b5d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1572,6 +1572,10 @@ struct task_struct {
1572#ifdef CONFIG_HAVE_HW_BREAKPOINT 1572#ifdef CONFIG_HAVE_HW_BREAKPOINT
1573 atomic_t ptrace_bp_refcnt; 1573 atomic_t ptrace_bp_refcnt;
1574#endif 1574#endif
1575#ifdef CONFIG_UPROBES
1576 struct uprobe_task *utask;
1577 int uprobe_srcu_id;
1578#endif
1575}; 1579};
1576 1580
1577/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1581/* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
new file mode 100644
index 000000000000..efe4b3308c74
--- /dev/null
+++ b/include/linux/uprobes.h
@@ -0,0 +1,165 @@
1#ifndef _LINUX_UPROBES_H
2#define _LINUX_UPROBES_H
3/*
4 * User-space Probes (UProbes)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright (C) IBM Corporation, 2008-2012
21 * Authors:
22 * Srikar Dronamraju
23 * Jim Keniston
24 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
25 */
26
27#include <linux/errno.h>
28#include <linux/rbtree.h>
29
30struct vm_area_struct;
31struct mm_struct;
32struct inode;
33
34#ifdef CONFIG_ARCH_SUPPORTS_UPROBES
35# include <asm/uprobes.h>
36#endif
37
38/* flags that denote/change uprobes behaviour */
39
40/* Have a copy of original instruction */
41#define UPROBE_COPY_INSN 0x1
42
43/* Dont run handlers when first register/ last unregister in progress*/
44#define UPROBE_RUN_HANDLER 0x2
45/* Can skip singlestep */
46#define UPROBE_SKIP_SSTEP 0x4
47
48struct uprobe_consumer {
49 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
50 /*
51 * filter is optional; If a filter exists, handler is run
52 * if and only if filter returns true.
53 */
54 bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
55
56 struct uprobe_consumer *next;
57};
58
59#ifdef CONFIG_UPROBES
60enum uprobe_task_state {
61 UTASK_RUNNING,
62 UTASK_BP_HIT,
63 UTASK_SSTEP,
64 UTASK_SSTEP_ACK,
65 UTASK_SSTEP_TRAPPED,
66};
67
68/*
69 * uprobe_task: Metadata of a task while it singlesteps.
70 */
71struct uprobe_task {
72 enum uprobe_task_state state;
73 struct arch_uprobe_task autask;
74
75 struct uprobe *active_uprobe;
76
77 unsigned long xol_vaddr;
78 unsigned long vaddr;
79};
80
81/*
82 * On a breakpoint hit, thread contests for a slot. It frees the
83 * slot after singlestep. Currently a fixed number of slots are
84 * allocated.
85 */
86struct xol_area {
87 wait_queue_head_t wq; /* if all slots are busy */
88 atomic_t slot_count; /* number of in-use slots */
89 unsigned long *bitmap; /* 0 = free slot */
90 struct page *page;
91
92 /*
93 * We keep the vma's vm_start rather than a pointer to the vma
94 * itself. The probed process or a naughty kernel module could make
95 * the vma go away, and we must handle that reasonably gracefully.
96 */
97 unsigned long vaddr; /* Page(s) of instruction slots */
98};
99
100struct uprobes_state {
101 struct xol_area *xol_area;
102 atomic_t count;
103};
104extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
105extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify);
106extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
107extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
108extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
109extern int uprobe_mmap(struct vm_area_struct *vma);
110extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
111extern void uprobe_free_utask(struct task_struct *t);
112extern void uprobe_copy_process(struct task_struct *t);
113extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
114extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
115extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
116extern void uprobe_notify_resume(struct pt_regs *regs);
117extern bool uprobe_deny_signal(void);
118extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
119extern void uprobe_clear_state(struct mm_struct *mm);
120extern void uprobe_reset_state(struct mm_struct *mm);
121#else /* !CONFIG_UPROBES */
122struct uprobes_state {
123};
124static inline int
125uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
126{
127 return -ENOSYS;
128}
129static inline void
130uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
131{
132}
133static inline int uprobe_mmap(struct vm_area_struct *vma)
134{
135 return 0;
136}
137static inline void
138uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
139{
140}
141static inline void uprobe_notify_resume(struct pt_regs *regs)
142{
143}
144static inline bool uprobe_deny_signal(void)
145{
146 return false;
147}
148static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
149{
150 return 0;
151}
152static inline void uprobe_free_utask(struct task_struct *t)
153{
154}
155static inline void uprobe_copy_process(struct task_struct *t)
156{
157}
158static inline void uprobe_clear_state(struct mm_struct *mm)
159{
160}
161static inline void uprobe_reset_state(struct mm_struct *mm)
162{
163}
164#endif /* !CONFIG_UPROBES */
165#endif /* _LINUX_UPROBES_H */
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 22d901f9caf4..103f5d147b2f 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -3,4 +3,7 @@ CFLAGS_REMOVE_core.o = -pg
3endif 3endif
4 4
5obj-y := core.o ring_buffer.o callchain.o 5obj-y := core.o ring_buffer.o callchain.o
6
6obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 7obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
8obj-$(CONFIG_UPROBES) += uprobes.o
9
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
new file mode 100644
index 000000000000..985be4d80fe8
--- /dev/null
+++ b/kernel/events/uprobes.c
@@ -0,0 +1,1667 @@
1/*
2 * User-space Probes (UProbes)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2012
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
23 */
24
25#include <linux/kernel.h>
26#include <linux/highmem.h>
27#include <linux/pagemap.h> /* read_mapping_page */
28#include <linux/slab.h>
29#include <linux/sched.h>
30#include <linux/rmap.h> /* anon_vma_prepare */
31#include <linux/mmu_notifier.h> /* set_pte_at_notify */
32#include <linux/swap.h> /* try_to_free_swap */
33#include <linux/ptrace.h> /* user_enable_single_step */
34#include <linux/kdebug.h> /* notifier mechanism */
35
36#include <linux/uprobes.h>
37
38#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
39#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
40
41static struct srcu_struct uprobes_srcu;
42static struct rb_root uprobes_tree = RB_ROOT;
43
44static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
45
46#define UPROBES_HASH_SZ 13
47
48/* serialize (un)register */
49static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
50
51#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
52
53/* serialize uprobe->pending_list */
54static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
55#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
56
57/*
58 * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
59 * events active at this time. Probably a fine grained per inode count is
60 * better?
61 */
62static atomic_t uprobe_events = ATOMIC_INIT(0);
63
64/*
65 * Maintain a temporary per vma info that can be used to search if a vma
66 * has already been handled. This structure is introduced since extending
67 * vm_area_struct wasnt recommended.
68 */
69struct vma_info {
70 struct list_head probe_list;
71 struct mm_struct *mm;
72 loff_t vaddr;
73};
74
75struct uprobe {
76 struct rb_node rb_node; /* node in the rb tree */
77 atomic_t ref;
78 struct rw_semaphore consumer_rwsem;
79 struct list_head pending_list;
80 struct uprobe_consumer *consumers;
81 struct inode *inode; /* Also hold a ref to inode */
82 loff_t offset;
83 int flags;
84 struct arch_uprobe arch;
85};
86
87/*
88 * valid_vma: Verify if the specified vma is an executable vma
89 * Relax restrictions while unregistering: vm_flags might have
90 * changed after breakpoint was inserted.
91 * - is_register: indicates if we are in register context.
92 * - Return 1 if the specified virtual address is in an
93 * executable vma.
94 */
95static bool valid_vma(struct vm_area_struct *vma, bool is_register)
96{
97 if (!vma->vm_file)
98 return false;
99
100 if (!is_register)
101 return true;
102
103 if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC))
104 return true;
105
106 return false;
107}
108
109static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
110{
111 loff_t vaddr;
112
113 vaddr = vma->vm_start + offset;
114 vaddr -= vma->vm_pgoff << PAGE_SHIFT;
115
116 return vaddr;
117}
118
119/**
120 * __replace_page - replace page in vma by new page.
121 * based on replace_page in mm/ksm.c
122 *
123 * @vma: vma that holds the pte pointing to page
124 * @page: the cowed page we are replacing by kpage
125 * @kpage: the modified page we replace page by
126 *
127 * Returns 0 on success, -EFAULT on failure.
128 */
129static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
130{
131 struct mm_struct *mm = vma->vm_mm;
132 pgd_t *pgd;
133 pud_t *pud;
134 pmd_t *pmd;
135 pte_t *ptep;
136 spinlock_t *ptl;
137 unsigned long addr;
138 int err = -EFAULT;
139
140 addr = page_address_in_vma(page, vma);
141 if (addr == -EFAULT)
142 goto out;
143
144 pgd = pgd_offset(mm, addr);
145 if (!pgd_present(*pgd))
146 goto out;
147
148 pud = pud_offset(pgd, addr);
149 if (!pud_present(*pud))
150 goto out;
151
152 pmd = pmd_offset(pud, addr);
153 if (!pmd_present(*pmd))
154 goto out;
155
156 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
157 if (!ptep)
158 goto out;
159
160 get_page(kpage);
161 page_add_new_anon_rmap(kpage, vma, addr);
162
163 if (!PageAnon(page)) {
164 dec_mm_counter(mm, MM_FILEPAGES);
165 inc_mm_counter(mm, MM_ANONPAGES);
166 }
167
168 flush_cache_page(vma, addr, pte_pfn(*ptep));
169 ptep_clear_flush(vma, addr, ptep);
170 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
171
172 page_remove_rmap(page);
173 if (!page_mapped(page))
174 try_to_free_swap(page);
175 put_page(page);
176 pte_unmap_unlock(ptep, ptl);
177 err = 0;
178
179out:
180 return err;
181}
182
183/**
184 * is_swbp_insn - check if instruction is breakpoint instruction.
185 * @insn: instruction to be checked.
186 * Default implementation of is_swbp_insn
187 * Returns true if @insn is a breakpoint instruction.
188 */
189bool __weak is_swbp_insn(uprobe_opcode_t *insn)
190{
191 return *insn == UPROBE_SWBP_INSN;
192}
193
194/*
195 * NOTE:
196 * Expect the breakpoint instruction to be the smallest size instruction for
197 * the architecture. If an arch has variable length instruction and the
198 * breakpoint instruction is not of the smallest length instruction
199 * supported by that architecture then we need to modify read_opcode /
200 * write_opcode accordingly. This would never be a problem for archs that
201 * have fixed length instructions.
202 */
203
204/*
205 * write_opcode - write the opcode at a given virtual address.
206 * @auprobe: arch breakpointing information.
207 * @mm: the probed process address space.
208 * @vaddr: the virtual address to store the opcode.
209 * @opcode: opcode to be written at @vaddr.
210 *
211 * Called with mm->mmap_sem held (for read and with a reference to
212 * mm).
213 *
214 * For mm @mm, write the opcode at @vaddr.
215 * Return 0 (success) or a negative errno.
216 */
217static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
218 unsigned long vaddr, uprobe_opcode_t opcode)
219{
220 struct page *old_page, *new_page;
221 struct address_space *mapping;
222 void *vaddr_old, *vaddr_new;
223 struct vm_area_struct *vma;
224 struct uprobe *uprobe;
225 loff_t addr;
226 int ret;
227
228 /* Read the page with vaddr into memory */
229 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
230 if (ret <= 0)
231 return ret;
232
233 ret = -EINVAL;
234
235 /*
236 * We are interested in text pages only. Our pages of interest
237 * should be mapped for read and execute only. We desist from
238 * adding probes in write mapped pages since the breakpoints
239 * might end up in the file copy.
240 */
241 if (!valid_vma(vma, is_swbp_insn(&opcode)))
242 goto put_out;
243
244 uprobe = container_of(auprobe, struct uprobe, arch);
245 mapping = uprobe->inode->i_mapping;
246 if (mapping != vma->vm_file->f_mapping)
247 goto put_out;
248
249 addr = vma_address(vma, uprobe->offset);
250 if (vaddr != (unsigned long)addr)
251 goto put_out;
252
253 ret = -ENOMEM;
254 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
255 if (!new_page)
256 goto put_out;
257
258 __SetPageUptodate(new_page);
259
260 /*
261 * lock page will serialize against do_wp_page()'s
262 * PageAnon() handling
263 */
264 lock_page(old_page);
265 /* copy the page now that we've got it stable */
266 vaddr_old = kmap_atomic(old_page);
267 vaddr_new = kmap_atomic(new_page);
268
269 memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
270
271 /* poke the new insn in, ASSUMES we don't cross page boundary */
272 vaddr &= ~PAGE_MASK;
273 BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
274 memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
275
276 kunmap_atomic(vaddr_new);
277 kunmap_atomic(vaddr_old);
278
279 ret = anon_vma_prepare(vma);
280 if (ret)
281 goto unlock_out;
282
283 lock_page(new_page);
284 ret = __replace_page(vma, old_page, new_page);
285 unlock_page(new_page);
286
287unlock_out:
288 unlock_page(old_page);
289 page_cache_release(new_page);
290
291put_out:
292 put_page(old_page);
293
294 return ret;
295}
296
297/**
298 * read_opcode - read the opcode at a given virtual address.
299 * @mm: the probed process address space.
300 * @vaddr: the virtual address to read the opcode.
301 * @opcode: location to store the read opcode.
302 *
303 * Called with mm->mmap_sem held (for read and with a reference to
304 * mm.
305 *
306 * For mm @mm, read the opcode at @vaddr and store it in @opcode.
307 * Return 0 (success) or a negative errno.
308 */
309static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode)
310{
311 struct page *page;
312 void *vaddr_new;
313 int ret;
314
315 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL);
316 if (ret <= 0)
317 return ret;
318
319 lock_page(page);
320 vaddr_new = kmap_atomic(page);
321 vaddr &= ~PAGE_MASK;
322 memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE);
323 kunmap_atomic(vaddr_new);
324 unlock_page(page);
325
326 put_page(page);
327
328 return 0;
329}
330
331static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
332{
333 uprobe_opcode_t opcode;
334 int result;
335
336 result = read_opcode(mm, vaddr, &opcode);
337 if (result)
338 return result;
339
340 if (is_swbp_insn(&opcode))
341 return 1;
342
343 return 0;
344}
345
346/**
347 * set_swbp - store breakpoint at a given address.
348 * @auprobe: arch specific probepoint information.
349 * @mm: the probed process address space.
350 * @vaddr: the virtual address to insert the opcode.
351 *
352 * For mm @mm, store the breakpoint instruction at @vaddr.
353 * Return 0 (success) or a negative errno.
354 */
355int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
356{
357 int result;
358
359 result = is_swbp_at_addr(mm, vaddr);
360 if (result == 1)
361 return -EEXIST;
362
363 if (result)
364 return result;
365
366 return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
367}
368
369/**
370 * set_orig_insn - Restore the original instruction.
371 * @mm: the probed process address space.
372 * @auprobe: arch specific probepoint information.
373 * @vaddr: the virtual address to insert the opcode.
374 * @verify: if true, verify existance of breakpoint instruction.
375 *
376 * For mm @mm, restore the original opcode (opcode) at @vaddr.
377 * Return 0 (success) or a negative errno.
378 */
379int __weak
380set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify)
381{
382 if (verify) {
383 int result;
384
385 result = is_swbp_at_addr(mm, vaddr);
386 if (!result)
387 return -EINVAL;
388
389 if (result != 1)
390 return result;
391 }
392 return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
393}
394
395static int match_uprobe(struct uprobe *l, struct uprobe *r)
396{
397 if (l->inode < r->inode)
398 return -1;
399
400 if (l->inode > r->inode)
401 return 1;
402
403 if (l->offset < r->offset)
404 return -1;
405
406 if (l->offset > r->offset)
407 return 1;
408
409 return 0;
410}
411
412static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
413{
414 struct uprobe u = { .inode = inode, .offset = offset };
415 struct rb_node *n = uprobes_tree.rb_node;
416 struct uprobe *uprobe;
417 int match;
418
419 while (n) {
420 uprobe = rb_entry(n, struct uprobe, rb_node);
421 match = match_uprobe(&u, uprobe);
422 if (!match) {
423 atomic_inc(&uprobe->ref);
424 return uprobe;
425 }
426
427 if (match < 0)
428 n = n->rb_left;
429 else
430 n = n->rb_right;
431 }
432 return NULL;
433}
434
435/*
436 * Find a uprobe corresponding to a given inode:offset
437 * Acquires uprobes_treelock
438 */
439static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
440{
441 struct uprobe *uprobe;
442 unsigned long flags;
443
444 spin_lock_irqsave(&uprobes_treelock, flags);
445 uprobe = __find_uprobe(inode, offset);
446 spin_unlock_irqrestore(&uprobes_treelock, flags);
447
448 return uprobe;
449}
450
451static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
452{
453 struct rb_node **p = &uprobes_tree.rb_node;
454 struct rb_node *parent = NULL;
455 struct uprobe *u;
456 int match;
457
458 while (*p) {
459 parent = *p;
460 u = rb_entry(parent, struct uprobe, rb_node);
461 match = match_uprobe(uprobe, u);
462 if (!match) {
463 atomic_inc(&u->ref);
464 return u;
465 }
466
467 if (match < 0)
468 p = &parent->rb_left;
469 else
470 p = &parent->rb_right;
471
472 }
473
474 u = NULL;
475 rb_link_node(&uprobe->rb_node, parent, p);
476 rb_insert_color(&uprobe->rb_node, &uprobes_tree);
477 /* get access + creation ref */
478 atomic_set(&uprobe->ref, 2);
479
480 return u;
481}
482
483/*
484 * Acquire uprobes_treelock.
485 * Matching uprobe already exists in rbtree;
486 * increment (access refcount) and return the matching uprobe.
487 *
488 * No matching uprobe; insert the uprobe in rb_tree;
489 * get a double refcount (access + creation) and return NULL.
490 */
491static struct uprobe *insert_uprobe(struct uprobe *uprobe)
492{
493 unsigned long flags;
494 struct uprobe *u;
495
496 spin_lock_irqsave(&uprobes_treelock, flags);
497 u = __insert_uprobe(uprobe);
498 spin_unlock_irqrestore(&uprobes_treelock, flags);
499
500 /* For now assume that the instruction need not be single-stepped */
501 uprobe->flags |= UPROBE_SKIP_SSTEP;
502
503 return u;
504}
505
506static void put_uprobe(struct uprobe *uprobe)
507{
508 if (atomic_dec_and_test(&uprobe->ref))
509 kfree(uprobe);
510}
511
512static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
513{
514 struct uprobe *uprobe, *cur_uprobe;
515
516 uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL);
517 if (!uprobe)
518 return NULL;
519
520 uprobe->inode = igrab(inode);
521 uprobe->offset = offset;
522 init_rwsem(&uprobe->consumer_rwsem);
523 INIT_LIST_HEAD(&uprobe->pending_list);
524
525 /* add to uprobes_tree, sorted on inode:offset */
526 cur_uprobe = insert_uprobe(uprobe);
527
528 /* a uprobe exists for this inode:offset combination */
529 if (cur_uprobe) {
530 kfree(uprobe);
531 uprobe = cur_uprobe;
532 iput(inode);
533 } else {
534 atomic_inc(&uprobe_events);
535 }
536
537 return uprobe;
538}
539
540static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
541{
542 struct uprobe_consumer *uc;
543
544 if (!(uprobe->flags & UPROBE_RUN_HANDLER))
545 return;
546
547 down_read(&uprobe->consumer_rwsem);
548 for (uc = uprobe->consumers; uc; uc = uc->next) {
549 if (!uc->filter || uc->filter(uc, current))
550 uc->handler(uc, regs);
551 }
552 up_read(&uprobe->consumer_rwsem);
553}
554
555/* Returns the previous consumer */
556static struct uprobe_consumer *
557consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
558{
559 down_write(&uprobe->consumer_rwsem);
560 uc->next = uprobe->consumers;
561 uprobe->consumers = uc;
562 up_write(&uprobe->consumer_rwsem);
563
564 return uc->next;
565}
566
567/*
568 * For uprobe @uprobe, delete the consumer @uc.
569 * Return true if the @uc is deleted successfully
570 * or return false.
571 */
572static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
573{
574 struct uprobe_consumer **con;
575 bool ret = false;
576
577 down_write(&uprobe->consumer_rwsem);
578 for (con = &uprobe->consumers; *con; con = &(*con)->next) {
579 if (*con == uc) {
580 *con = uc->next;
581 ret = true;
582 break;
583 }
584 }
585 up_write(&uprobe->consumer_rwsem);
586
587 return ret;
588}
589
590static int
591__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn,
592 unsigned long nbytes, unsigned long offset)
593{
594 struct file *filp = vma->vm_file;
595 struct page *page;
596 void *vaddr;
597 unsigned long off1;
598 unsigned long idx;
599
600 if (!filp)
601 return -EINVAL;
602
603 idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT);
604 off1 = offset &= ~PAGE_MASK;
605
606 /*
607 * Ensure that the page that has the original instruction is
608 * populated and in page-cache.
609 */
610 page = read_mapping_page(mapping, idx, filp);
611 if (IS_ERR(page))
612 return PTR_ERR(page);
613
614 vaddr = kmap_atomic(page);
615 memcpy(insn, vaddr + off1, nbytes);
616 kunmap_atomic(vaddr);
617 page_cache_release(page);
618
619 return 0;
620}
621
622static int
623copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
624{
625 struct address_space *mapping;
626 unsigned long nbytes;
627 int bytes;
628
629 addr &= ~PAGE_MASK;
630 nbytes = PAGE_SIZE - addr;
631 mapping = uprobe->inode->i_mapping;
632
633 /* Instruction at end of binary; copy only available bytes */
634 if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size)
635 bytes = uprobe->inode->i_size - uprobe->offset;
636 else
637 bytes = MAX_UINSN_BYTES;
638
639 /* Instruction at the page-boundary; copy bytes in second page */
640 if (nbytes < bytes) {
641 if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes,
642 bytes - nbytes, uprobe->offset + nbytes))
643 return -ENOMEM;
644
645 bytes = nbytes;
646 }
647 return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset);
648}
649
650/*
651 * How mm->uprobes_state.count gets updated
652 * uprobe_mmap() increments the count if
653 * - it successfully adds a breakpoint.
654 * - it cannot add a breakpoint, but sees that there is a underlying
655 * breakpoint (via a is_swbp_at_addr()).
656 *
657 * uprobe_munmap() decrements the count if
658 * - it sees a underlying breakpoint, (via is_swbp_at_addr)
659 * (Subsequent uprobe_unregister wouldnt find the breakpoint
660 * unless a uprobe_mmap kicks in, since the old vma would be
661 * dropped just after uprobe_munmap.)
662 *
663 * uprobe_register increments the count if:
664 * - it successfully adds a breakpoint.
665 *
666 * uprobe_unregister decrements the count if:
667 * - it sees a underlying breakpoint and removes successfully.
668 * (via is_swbp_at_addr)
669 * (Subsequent uprobe_munmap wouldnt find the breakpoint
670 * since there is no underlying breakpoint after the
671 * breakpoint removal.)
672 */
673static int
674install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
675 struct vm_area_struct *vma, loff_t vaddr)
676{
677 unsigned long addr;
678 int ret;
679
680 /*
681 * If probe is being deleted, unregister thread could be done with
682 * the vma-rmap-walk through. Adding a probe now can be fatal since
683 * nobody will be able to cleanup. Also we could be from fork or
684 * mremap path, where the probe might have already been inserted.
685 * Hence behave as if probe already existed.
686 */
687 if (!uprobe->consumers)
688 return -EEXIST;
689
690 addr = (unsigned long)vaddr;
691
692 if (!(uprobe->flags & UPROBE_COPY_INSN)) {
693 ret = copy_insn(uprobe, vma, addr);
694 if (ret)
695 return ret;
696
697 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
698 return -EEXIST;
699
700 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm);
701 if (ret)
702 return ret;
703
704 uprobe->flags |= UPROBE_COPY_INSN;
705 }
706
707 /*
708 * Ideally, should be updating the probe count after the breakpoint
709 * has been successfully inserted. However a thread could hit the
710 * breakpoint we just inserted even before the probe count is
711 * incremented. If this is the first breakpoint placed, breakpoint
712 * notifier might ignore uprobes and pass the trap to the thread.
713 * Hence increment before and decrement on failure.
714 */
715 atomic_inc(&mm->uprobes_state.count);
716 ret = set_swbp(&uprobe->arch, mm, addr);
717 if (ret)
718 atomic_dec(&mm->uprobes_state.count);
719
720 return ret;
721}
722
723static void
724remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
725{
726 if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true))
727 atomic_dec(&mm->uprobes_state.count);
728}
729
730/*
731 * There could be threads that have hit the breakpoint and are entering the
732 * notifier code and trying to acquire the uprobes_treelock. The thread
733 * calling delete_uprobe() that is removing the uprobe from the rb_tree can
734 * race with these threads and might acquire the uprobes_treelock compared
735 * to some of the breakpoint hit threads. In such a case, the breakpoint
736 * hit threads will not find the uprobe. The current unregistering thread
737 * waits till all other threads have hit a breakpoint, to acquire the
738 * uprobes_treelock before the uprobe is removed from the rbtree.
739 */
740static void delete_uprobe(struct uprobe *uprobe)
741{
742 unsigned long flags;
743
744 synchronize_srcu(&uprobes_srcu);
745 spin_lock_irqsave(&uprobes_treelock, flags);
746 rb_erase(&uprobe->rb_node, &uprobes_tree);
747 spin_unlock_irqrestore(&uprobes_treelock, flags);
748 iput(uprobe->inode);
749 put_uprobe(uprobe);
750 atomic_dec(&uprobe_events);
751}
752
753static struct vma_info *
754__find_next_vma_info(struct address_space *mapping, struct list_head *head,
755 struct vma_info *vi, loff_t offset, bool is_register)
756{
757 struct prio_tree_iter iter;
758 struct vm_area_struct *vma;
759 struct vma_info *tmpvi;
760 unsigned long pgoff;
761 int existing_vma;
762 loff_t vaddr;
763
764 pgoff = offset >> PAGE_SHIFT;
765
766 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
767 if (!valid_vma(vma, is_register))
768 continue;
769
770 existing_vma = 0;
771 vaddr = vma_address(vma, offset);
772
773 list_for_each_entry(tmpvi, head, probe_list) {
774 if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
775 existing_vma = 1;
776 break;
777 }
778 }
779
780 /*
781 * Another vma needs a probe to be installed. However skip
782 * installing the probe if the vma is about to be unlinked.
783 */
784 if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
785 vi->mm = vma->vm_mm;
786 vi->vaddr = vaddr;
787 list_add(&vi->probe_list, head);
788
789 return vi;
790 }
791 }
792
793 return NULL;
794}
795
796/*
797 * Iterate in the rmap prio tree and find a vma where a probe has not
798 * yet been inserted.
799 */
800static struct vma_info *
801find_next_vma_info(struct address_space *mapping, struct list_head *head,
802 loff_t offset, bool is_register)
803{
804 struct vma_info *vi, *retvi;
805
806 vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL);
807 if (!vi)
808 return ERR_PTR(-ENOMEM);
809
810 mutex_lock(&mapping->i_mmap_mutex);
811 retvi = __find_next_vma_info(mapping, head, vi, offset, is_register);
812 mutex_unlock(&mapping->i_mmap_mutex);
813
814 if (!retvi)
815 kfree(vi);
816
817 return retvi;
818}
819
820static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
821{
822 struct list_head try_list;
823 struct vm_area_struct *vma;
824 struct address_space *mapping;
825 struct vma_info *vi, *tmpvi;
826 struct mm_struct *mm;
827 loff_t vaddr;
828 int ret;
829
830 mapping = uprobe->inode->i_mapping;
831 INIT_LIST_HEAD(&try_list);
832
833 ret = 0;
834
835 for (;;) {
836 vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register);
837 if (!vi)
838 break;
839
840 if (IS_ERR(vi)) {
841 ret = PTR_ERR(vi);
842 break;
843 }
844
845 mm = vi->mm;
846 down_read(&mm->mmap_sem);
847 vma = find_vma(mm, (unsigned long)vi->vaddr);
848 if (!vma || !valid_vma(vma, is_register)) {
849 list_del(&vi->probe_list);
850 kfree(vi);
851 up_read(&mm->mmap_sem);
852 mmput(mm);
853 continue;
854 }
855 vaddr = vma_address(vma, uprobe->offset);
856 if (vma->vm_file->f_mapping->host != uprobe->inode ||
857 vaddr != vi->vaddr) {
858 list_del(&vi->probe_list);
859 kfree(vi);
860 up_read(&mm->mmap_sem);
861 mmput(mm);
862 continue;
863 }
864
865 if (is_register)
866 ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
867 else
868 remove_breakpoint(uprobe, mm, vi->vaddr);
869
870 up_read(&mm->mmap_sem);
871 mmput(mm);
872 if (is_register) {
873 if (ret && ret == -EEXIST)
874 ret = 0;
875 if (ret)
876 break;
877 }
878 }
879
880 list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) {
881 list_del(&vi->probe_list);
882 kfree(vi);
883 }
884
885 return ret;
886}
887
888static int __uprobe_register(struct uprobe *uprobe)
889{
890 return register_for_each_vma(uprobe, true);
891}
892
893static void __uprobe_unregister(struct uprobe *uprobe)
894{
895 if (!register_for_each_vma(uprobe, false))
896 delete_uprobe(uprobe);
897
898 /* TODO : cant unregister? schedule a worker thread */
899}
900
901/*
902 * uprobe_register - register a probe
903 * @inode: the file in which the probe has to be placed.
904 * @offset: offset from the start of the file.
905 * @uc: information on howto handle the probe..
906 *
907 * Apart from the access refcount, uprobe_register() takes a creation
908 * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
909 * inserted into the rbtree (i.e first consumer for a @inode:@offset
910 * tuple). Creation refcount stops uprobe_unregister from freeing the
911 * @uprobe even before the register operation is complete. Creation
912 * refcount is released when the last @uc for the @uprobe
913 * unregisters.
914 *
915 * Return errno if it cannot successully install probes
916 * else return 0 (success)
917 */
918int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
919{
920 struct uprobe *uprobe;
921 int ret;
922
923 if (!inode || !uc || uc->next)
924 return -EINVAL;
925
926 if (offset > i_size_read(inode))
927 return -EINVAL;
928
929 ret = 0;
930 mutex_lock(uprobes_hash(inode));
931 uprobe = alloc_uprobe(inode, offset);
932
933 if (uprobe && !consumer_add(uprobe, uc)) {
934 ret = __uprobe_register(uprobe);
935 if (ret) {
936 uprobe->consumers = NULL;
937 __uprobe_unregister(uprobe);
938 } else {
939 uprobe->flags |= UPROBE_RUN_HANDLER;
940 }
941 }
942
943 mutex_unlock(uprobes_hash(inode));
944 put_uprobe(uprobe);
945
946 return ret;
947}
948
949/*
950 * uprobe_unregister - unregister a already registered probe.
951 * @inode: the file in which the probe has to be removed.
952 * @offset: offset from the start of the file.
953 * @uc: identify which probe if multiple probes are colocated.
954 */
955void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
956{
957 struct uprobe *uprobe;
958
959 if (!inode || !uc)
960 return;
961
962 uprobe = find_uprobe(inode, offset);
963 if (!uprobe)
964 return;
965
966 mutex_lock(uprobes_hash(inode));
967
968 if (consumer_del(uprobe, uc)) {
969 if (!uprobe->consumers) {
970 __uprobe_unregister(uprobe);
971 uprobe->flags &= ~UPROBE_RUN_HANDLER;
972 }
973 }
974
975 mutex_unlock(uprobes_hash(inode));
976 if (uprobe)
977 put_uprobe(uprobe);
978}
979
980/*
981 * Of all the nodes that correspond to the given inode, return the node
982 * with the least offset.
983 */
984static struct rb_node *find_least_offset_node(struct inode *inode)
985{
986 struct uprobe u = { .inode = inode, .offset = 0};
987 struct rb_node *n = uprobes_tree.rb_node;
988 struct rb_node *close_node = NULL;
989 struct uprobe *uprobe;
990 int match;
991
992 while (n) {
993 uprobe = rb_entry(n, struct uprobe, rb_node);
994 match = match_uprobe(&u, uprobe);
995
996 if (uprobe->inode == inode)
997 close_node = n;
998
999 if (!match)
1000 return close_node;
1001
1002 if (match < 0)
1003 n = n->rb_left;
1004 else
1005 n = n->rb_right;
1006 }
1007
1008 return close_node;
1009}
1010
1011/*
1012 * For a given inode, build a list of probes that need to be inserted.
1013 */
1014static void build_probe_list(struct inode *inode, struct list_head *head)
1015{
1016 struct uprobe *uprobe;
1017 unsigned long flags;
1018 struct rb_node *n;
1019
1020 spin_lock_irqsave(&uprobes_treelock, flags);
1021
1022 n = find_least_offset_node(inode);
1023
1024 for (; n; n = rb_next(n)) {
1025 uprobe = rb_entry(n, struct uprobe, rb_node);
1026 if (uprobe->inode != inode)
1027 break;
1028
1029 list_add(&uprobe->pending_list, head);
1030 atomic_inc(&uprobe->ref);
1031 }
1032
1033 spin_unlock_irqrestore(&uprobes_treelock, flags);
1034}
1035
1036/*
1037 * Called from mmap_region.
1038 * called with mm->mmap_sem acquired.
1039 *
1040 * Return -ve no if we fail to insert probes and we cannot
1041 * bail-out.
1042 * Return 0 otherwise. i.e:
1043 *
1044 * - successful insertion of probes
1045 * - (or) no possible probes to be inserted.
1046 * - (or) insertion of probes failed but we can bail-out.
1047 */
1048int uprobe_mmap(struct vm_area_struct *vma)
1049{
1050 struct list_head tmp_list;
1051 struct uprobe *uprobe, *u;
1052 struct inode *inode;
1053 int ret, count;
1054
1055 if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
1056 return 0;
1057
1058 inode = vma->vm_file->f_mapping->host;
1059 if (!inode)
1060 return 0;
1061
1062 INIT_LIST_HEAD(&tmp_list);
1063 mutex_lock(uprobes_mmap_hash(inode));
1064 build_probe_list(inode, &tmp_list);
1065
1066 ret = 0;
1067 count = 0;
1068
1069 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
1070 loff_t vaddr;
1071
1072 list_del(&uprobe->pending_list);
1073 if (!ret) {
1074 vaddr = vma_address(vma, uprobe->offset);
1075
1076 if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {
1077 put_uprobe(uprobe);
1078 continue;
1079 }
1080
1081 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
1082
1083 /* Ignore double add: */
1084 if (ret == -EEXIST) {
1085 ret = 0;
1086
1087 if (!is_swbp_at_addr(vma->vm_mm, vaddr))
1088 continue;
1089
1090 /*
1091 * Unable to insert a breakpoint, but
1092 * breakpoint lies underneath. Increment the
1093 * probe count.
1094 */
1095 atomic_inc(&vma->vm_mm->uprobes_state.count);
1096 }
1097
1098 if (!ret)
1099 count++;
1100 }
1101 put_uprobe(uprobe);
1102 }
1103
1104 mutex_unlock(uprobes_mmap_hash(inode));
1105
1106 if (ret)
1107 atomic_sub(count, &vma->vm_mm->uprobes_state.count);
1108
1109 return ret;
1110}
1111
1112/*
1113 * Called in context of a munmap of a vma.
1114 */
1115void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1116{
1117 struct list_head tmp_list;
1118 struct uprobe *uprobe, *u;
1119 struct inode *inode;
1120
1121 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
1122 return;
1123
1124 if (!atomic_read(&vma->vm_mm->uprobes_state.count))
1125 return;
1126
1127 inode = vma->vm_file->f_mapping->host;
1128 if (!inode)
1129 return;
1130
1131 INIT_LIST_HEAD(&tmp_list);
1132 mutex_lock(uprobes_mmap_hash(inode));
1133 build_probe_list(inode, &tmp_list);
1134
1135 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
1136 loff_t vaddr;
1137
1138 list_del(&uprobe->pending_list);
1139 vaddr = vma_address(vma, uprobe->offset);
1140
1141 if (vaddr >= start && vaddr < end) {
1142 /*
1143 * An unregister could have removed the probe before
1144 * unmap. So check before we decrement the count.
1145 */
1146 if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1)
1147 atomic_dec(&vma->vm_mm->uprobes_state.count);
1148 }
1149 put_uprobe(uprobe);
1150 }
1151 mutex_unlock(uprobes_mmap_hash(inode));
1152}
1153
1154/* Slot allocation for XOL */
1155static int xol_add_vma(struct xol_area *area)
1156{
1157 struct mm_struct *mm;
1158 int ret;
1159
1160 area->page = alloc_page(GFP_HIGHUSER);
1161 if (!area->page)
1162 return -ENOMEM;
1163
1164 ret = -EALREADY;
1165 mm = current->mm;
1166
1167 down_write(&mm->mmap_sem);
1168 if (mm->uprobes_state.xol_area)
1169 goto fail;
1170
1171 ret = -ENOMEM;
1172
1173 /* Try to map as high as possible, this is only a hint. */
1174 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
1175 if (area->vaddr & ~PAGE_MASK) {
1176 ret = area->vaddr;
1177 goto fail;
1178 }
1179
1180 ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE,
1181 VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &area->page);
1182 if (ret)
1183 goto fail;
1184
1185 smp_wmb(); /* pairs with get_xol_area() */
1186 mm->uprobes_state.xol_area = area;
1187 ret = 0;
1188
1189fail:
1190 up_write(&mm->mmap_sem);
1191 if (ret)
1192 __free_page(area->page);
1193
1194 return ret;
1195}
1196
1197static struct xol_area *get_xol_area(struct mm_struct *mm)
1198{
1199 struct xol_area *area;
1200
1201 area = mm->uprobes_state.xol_area;
1202 smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
1203
1204 return area;
1205}
1206
1207/*
1208 * xol_alloc_area - Allocate process's xol_area.
1209 * This area will be used for storing instructions for execution out of
1210 * line.
1211 *
1212 * Returns the allocated area or NULL.
1213 */
1214static struct xol_area *xol_alloc_area(void)
1215{
1216 struct xol_area *area;
1217
1218 area = kzalloc(sizeof(*area), GFP_KERNEL);
1219 if (unlikely(!area))
1220 return NULL;
1221
1222 area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
1223
1224 if (!area->bitmap)
1225 goto fail;
1226
1227 init_waitqueue_head(&area->wq);
1228 if (!xol_add_vma(area))
1229 return area;
1230
1231fail:
1232 kfree(area->bitmap);
1233 kfree(area);
1234
1235 return get_xol_area(current->mm);
1236}
1237
1238/*
1239 * uprobe_clear_state - Free the area allocated for slots.
1240 */
1241void uprobe_clear_state(struct mm_struct *mm)
1242{
1243 struct xol_area *area = mm->uprobes_state.xol_area;
1244
1245 if (!area)
1246 return;
1247
1248 put_page(area->page);
1249 kfree(area->bitmap);
1250 kfree(area);
1251}
1252
1253/*
1254 * uprobe_reset_state - Free the area allocated for slots.
1255 */
1256void uprobe_reset_state(struct mm_struct *mm)
1257{
1258 mm->uprobes_state.xol_area = NULL;
1259 atomic_set(&mm->uprobes_state.count, 0);
1260}
1261
1262/*
1263 * - search for a free slot.
1264 */
1265static unsigned long xol_take_insn_slot(struct xol_area *area)
1266{
1267 unsigned long slot_addr;
1268 int slot_nr;
1269
1270 do {
1271 slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
1272 if (slot_nr < UINSNS_PER_PAGE) {
1273 if (!test_and_set_bit(slot_nr, area->bitmap))
1274 break;
1275
1276 slot_nr = UINSNS_PER_PAGE;
1277 continue;
1278 }
1279 wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
1280 } while (slot_nr >= UINSNS_PER_PAGE);
1281
1282 slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
1283 atomic_inc(&area->slot_count);
1284
1285 return slot_addr;
1286}
1287
1288/*
1289 * xol_get_insn_slot - If was not allocated a slot, then
1290 * allocate a slot.
1291 * Returns the allocated slot address or 0.
1292 */
1293static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr)
1294{
1295 struct xol_area *area;
1296 unsigned long offset;
1297 void *vaddr;
1298
1299 area = get_xol_area(current->mm);
1300 if (!area) {
1301 area = xol_alloc_area();
1302 if (!area)
1303 return 0;
1304 }
1305 current->utask->xol_vaddr = xol_take_insn_slot(area);
1306
1307 /*
1308 * Initialize the slot if xol_vaddr points to valid
1309 * instruction slot.
1310 */
1311 if (unlikely(!current->utask->xol_vaddr))
1312 return 0;
1313
1314 current->utask->vaddr = slot_addr;
1315 offset = current->utask->xol_vaddr & ~PAGE_MASK;
1316 vaddr = kmap_atomic(area->page);
1317 memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
1318 kunmap_atomic(vaddr);
1319
1320 return current->utask->xol_vaddr;
1321}
1322
1323/*
1324 * xol_free_insn_slot - If slot was earlier allocated by
1325 * @xol_get_insn_slot(), make the slot available for
1326 * subsequent requests.
1327 */
1328static void xol_free_insn_slot(struct task_struct *tsk)
1329{
1330 struct xol_area *area;
1331 unsigned long vma_end;
1332 unsigned long slot_addr;
1333
1334 if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask)
1335 return;
1336
1337 slot_addr = tsk->utask->xol_vaddr;
1338
1339 if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
1340 return;
1341
1342 area = tsk->mm->uprobes_state.xol_area;
1343 vma_end = area->vaddr + PAGE_SIZE;
1344 if (area->vaddr <= slot_addr && slot_addr < vma_end) {
1345 unsigned long offset;
1346 int slot_nr;
1347
1348 offset = slot_addr - area->vaddr;
1349 slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
1350 if (slot_nr >= UINSNS_PER_PAGE)
1351 return;
1352
1353 clear_bit(slot_nr, area->bitmap);
1354 atomic_dec(&area->slot_count);
1355 if (waitqueue_active(&area->wq))
1356 wake_up(&area->wq);
1357
1358 tsk->utask->xol_vaddr = 0;
1359 }
1360}
1361
1362/**
1363 * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
1364 * @regs: Reflects the saved state of the task after it has hit a breakpoint
1365 * instruction.
1366 * Return the address of the breakpoint instruction.
1367 */
1368unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1369{
1370 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
1371}
1372
1373/*
1374 * Called with no locks held.
1375 * Called in context of a exiting or a exec-ing thread.
1376 */
1377void uprobe_free_utask(struct task_struct *t)
1378{
1379 struct uprobe_task *utask = t->utask;
1380
1381 if (t->uprobe_srcu_id != -1)
1382 srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id);
1383
1384 if (!utask)
1385 return;
1386
1387 if (utask->active_uprobe)
1388 put_uprobe(utask->active_uprobe);
1389
1390 xol_free_insn_slot(t);
1391 kfree(utask);
1392 t->utask = NULL;
1393}
1394
1395/*
1396 * Called in context of a new clone/fork from copy_process.
1397 */
1398void uprobe_copy_process(struct task_struct *t)
1399{
1400 t->utask = NULL;
1401 t->uprobe_srcu_id = -1;
1402}
1403
1404/*
1405 * Allocate a uprobe_task object for the task.
1406 * Called when the thread hits a breakpoint for the first time.
1407 *
1408 * Returns:
1409 * - pointer to new uprobe_task on success
1410 * - NULL otherwise
1411 */
1412static struct uprobe_task *add_utask(void)
1413{
1414 struct uprobe_task *utask;
1415
1416 utask = kzalloc(sizeof *utask, GFP_KERNEL);
1417 if (unlikely(!utask))
1418 return NULL;
1419
1420 utask->active_uprobe = NULL;
1421 current->utask = utask;
1422 return utask;
1423}
1424
1425/* Prepare to single-step probed instruction out of line. */
1426static int
1427pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr)
1428{
1429 if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs))
1430 return 0;
1431
1432 return -EFAULT;
1433}
1434
1435/*
1436 * If we are singlestepping, then ensure this thread is not connected to
1437 * non-fatal signals until completion of singlestep. When xol insn itself
1438 * triggers the signal, restart the original insn even if the task is
1439 * already SIGKILL'ed (since coredump should report the correct ip). This
1440 * is even more important if the task has a handler for SIGSEGV/etc, The
1441 * _same_ instruction should be repeated again after return from the signal
1442 * handler, and SSTEP can never finish in this case.
1443 */
1444bool uprobe_deny_signal(void)
1445{
1446 struct task_struct *t = current;
1447 struct uprobe_task *utask = t->utask;
1448
1449 if (likely(!utask || !utask->active_uprobe))
1450 return false;
1451
1452 WARN_ON_ONCE(utask->state != UTASK_SSTEP);
1453
1454 if (signal_pending(t)) {
1455 spin_lock_irq(&t->sighand->siglock);
1456 clear_tsk_thread_flag(t, TIF_SIGPENDING);
1457 spin_unlock_irq(&t->sighand->siglock);
1458
1459 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
1460 utask->state = UTASK_SSTEP_TRAPPED;
1461 set_tsk_thread_flag(t, TIF_UPROBE);
1462 set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
1463 }
1464 }
1465
1466 return true;
1467}
1468
1469/*
1470 * Avoid singlestepping the original instruction if the original instruction
1471 * is a NOP or can be emulated.
1472 */
1473static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1474{
1475 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1476 return true;
1477
1478 uprobe->flags &= ~UPROBE_SKIP_SSTEP;
1479 return false;
1480}
1481
1482/*
1483 * Run handler and ask thread to singlestep.
1484 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
1485 */
1486static void handle_swbp(struct pt_regs *regs)
1487{
1488 struct vm_area_struct *vma;
1489 struct uprobe_task *utask;
1490 struct uprobe *uprobe;
1491 struct mm_struct *mm;
1492 unsigned long bp_vaddr;
1493
1494 uprobe = NULL;
1495 bp_vaddr = uprobe_get_swbp_addr(regs);
1496 mm = current->mm;
1497 down_read(&mm->mmap_sem);
1498 vma = find_vma(mm, bp_vaddr);
1499
1500 if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) {
1501 struct inode *inode;
1502 loff_t offset;
1503
1504 inode = vma->vm_file->f_mapping->host;
1505 offset = bp_vaddr - vma->vm_start;
1506 offset += (vma->vm_pgoff << PAGE_SHIFT);
1507 uprobe = find_uprobe(inode, offset);
1508 }
1509
1510 srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
1511 current->uprobe_srcu_id = -1;
1512 up_read(&mm->mmap_sem);
1513
1514 if (!uprobe) {
1515 /* No matching uprobe; signal SIGTRAP. */
1516 send_sig(SIGTRAP, current, 0);
1517 return;
1518 }
1519
1520 utask = current->utask;
1521 if (!utask) {
1522 utask = add_utask();
1523 /* Cannot allocate; re-execute the instruction. */
1524 if (!utask)
1525 goto cleanup_ret;
1526 }
1527 utask->active_uprobe = uprobe;
1528 handler_chain(uprobe, regs);
1529 if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs))
1530 goto cleanup_ret;
1531
1532 utask->state = UTASK_SSTEP;
1533 if (!pre_ssout(uprobe, regs, bp_vaddr)) {
1534 user_enable_single_step(current);
1535 return;
1536 }
1537
1538cleanup_ret:
1539 if (utask) {
1540 utask->active_uprobe = NULL;
1541 utask->state = UTASK_RUNNING;
1542 }
1543 if (uprobe) {
1544 if (!(uprobe->flags & UPROBE_SKIP_SSTEP))
1545
1546 /*
1547 * cannot singlestep; cannot skip instruction;
1548 * re-execute the instruction.
1549 */
1550 instruction_pointer_set(regs, bp_vaddr);
1551
1552 put_uprobe(uprobe);
1553 }
1554}
1555
1556/*
1557 * Perform required fix-ups and disable singlestep.
1558 * Allow pending signals to take effect.
1559 */
1560static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1561{
1562 struct uprobe *uprobe;
1563
1564 uprobe = utask->active_uprobe;
1565 if (utask->state == UTASK_SSTEP_ACK)
1566 arch_uprobe_post_xol(&uprobe->arch, regs);
1567 else if (utask->state == UTASK_SSTEP_TRAPPED)
1568 arch_uprobe_abort_xol(&uprobe->arch, regs);
1569 else
1570 WARN_ON_ONCE(1);
1571
1572 put_uprobe(uprobe);
1573 utask->active_uprobe = NULL;
1574 utask->state = UTASK_RUNNING;
1575 user_disable_single_step(current);
1576 xol_free_insn_slot(current);
1577
1578 spin_lock_irq(&current->sighand->siglock);
1579 recalc_sigpending(); /* see uprobe_deny_signal() */
1580 spin_unlock_irq(&current->sighand->siglock);
1581}
1582
1583/*
1584 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag. (and on
1585 * subsequent probe hits on the thread sets the state to UTASK_BP_HIT) and
1586 * allows the thread to return from interrupt.
1587 *
1588 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag and
1589 * also sets the state to UTASK_SSTEP_ACK and allows the thread to return from
1590 * interrupt.
1591 *
1592 * While returning to userspace, thread notices the TIF_UPROBE flag and calls
1593 * uprobe_notify_resume().
1594 */
1595void uprobe_notify_resume(struct pt_regs *regs)
1596{
1597 struct uprobe_task *utask;
1598
1599 utask = current->utask;
1600 if (!utask || utask->state == UTASK_BP_HIT)
1601 handle_swbp(regs);
1602 else
1603 handle_singlestep(utask, regs);
1604}
1605
1606/*
1607 * uprobe_pre_sstep_notifier gets called from interrupt context as part of
1608 * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit.
1609 */
1610int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1611{
1612 struct uprobe_task *utask;
1613
1614 if (!current->mm || !atomic_read(&current->mm->uprobes_state.count))
1615 /* task is currently not uprobed */
1616 return 0;
1617
1618 utask = current->utask;
1619 if (utask)
1620 utask->state = UTASK_BP_HIT;
1621
1622 set_thread_flag(TIF_UPROBE);
1623 current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);
1624
1625 return 1;
1626}
1627
1628/*
1629 * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier
1630 * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep.
1631 */
1632int uprobe_post_sstep_notifier(struct pt_regs *regs)
1633{
1634 struct uprobe_task *utask = current->utask;
1635
1636 if (!current->mm || !utask || !utask->active_uprobe)
1637 /* task is currently not uprobed */
1638 return 0;
1639
1640 utask->state = UTASK_SSTEP_ACK;
1641 set_thread_flag(TIF_UPROBE);
1642 return 1;
1643}
1644
1645static struct notifier_block uprobe_exception_nb = {
1646 .notifier_call = arch_uprobe_exception_notify,
1647 .priority = INT_MAX-1, /* notified after kprobes, kgdb */
1648};
1649
1650static int __init init_uprobes(void)
1651{
1652 int i;
1653
1654 for (i = 0; i < UPROBES_HASH_SZ; i++) {
1655 mutex_init(&uprobes_mutex[i]);
1656 mutex_init(&uprobes_mmap_mutex[i]);
1657 }
1658 init_srcu_struct(&uprobes_srcu);
1659
1660 return register_die_notifier(&uprobe_exception_nb);
1661}
1662module_init(init_uprobes);
1663
1664static void __exit exit_uprobes(void)
1665{
1666}
1667module_exit(exit_uprobes);
diff --git a/kernel/fork.c b/kernel/fork.c
index 05c813dc9ecc..47b4e4f379f9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -69,6 +69,7 @@
69#include <linux/oom.h> 69#include <linux/oom.h>
70#include <linux/khugepaged.h> 70#include <linux/khugepaged.h>
71#include <linux/signalfd.h> 71#include <linux/signalfd.h>
72#include <linux/uprobes.h>
72 73
73#include <asm/pgtable.h> 74#include <asm/pgtable.h>
74#include <asm/pgalloc.h> 75#include <asm/pgalloc.h>
@@ -451,6 +452,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
451 452
452 if (retval) 453 if (retval)
453 goto out; 454 goto out;
455
456 if (file && uprobe_mmap(tmp))
457 goto out;
454 } 458 }
455 /* a new mm has just been created */ 459 /* a new mm has just been created */
456 arch_dup_mmap(oldmm, mm); 460 arch_dup_mmap(oldmm, mm);
@@ -599,6 +603,7 @@ void mmput(struct mm_struct *mm)
599 might_sleep(); 603 might_sleep();
600 604
601 if (atomic_dec_and_test(&mm->mm_users)) { 605 if (atomic_dec_and_test(&mm->mm_users)) {
606 uprobe_clear_state(mm);
602 exit_aio(mm); 607 exit_aio(mm);
603 ksm_exit(mm); 608 ksm_exit(mm);
604 khugepaged_exit(mm); /* must run before exit_mmap */ 609 khugepaged_exit(mm); /* must run before exit_mmap */
@@ -777,6 +782,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
777 exit_pi_state_list(tsk); 782 exit_pi_state_list(tsk);
778#endif 783#endif
779 784
785 uprobe_free_utask(tsk);
786
780 /* Get rid of any cached register state */ 787 /* Get rid of any cached register state */
781 deactivate_mm(tsk, mm); 788 deactivate_mm(tsk, mm);
782 789
@@ -831,6 +838,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
831#ifdef CONFIG_TRANSPARENT_HUGEPAGE 838#ifdef CONFIG_TRANSPARENT_HUGEPAGE
832 mm->pmd_huge_pte = NULL; 839 mm->pmd_huge_pte = NULL;
833#endif 840#endif
841 uprobe_reset_state(mm);
834 842
835 if (!mm_init(mm, tsk)) 843 if (!mm_init(mm, tsk))
836 goto fail_nomem; 844 goto fail_nomem;
@@ -1373,6 +1381,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1373 INIT_LIST_HEAD(&p->pi_state_list); 1381 INIT_LIST_HEAD(&p->pi_state_list);
1374 p->pi_state_cache = NULL; 1382 p->pi_state_cache = NULL;
1375#endif 1383#endif
1384 uprobe_copy_process(p);
1376 /* 1385 /*
1377 * sigaltstack should be cleared when sharing the same VM 1386 * sigaltstack should be cleared when sharing the same VM
1378 */ 1387 */
diff --git a/kernel/signal.c b/kernel/signal.c
index 4dbf00dfb359..f7b418217633 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -29,6 +29,7 @@
29#include <linux/pid_namespace.h> 29#include <linux/pid_namespace.h>
30#include <linux/nsproxy.h> 30#include <linux/nsproxy.h>
31#include <linux/user_namespace.h> 31#include <linux/user_namespace.h>
32#include <linux/uprobes.h>
32#define CREATE_TRACE_POINTS 33#define CREATE_TRACE_POINTS
33#include <trace/events/signal.h> 34#include <trace/events/signal.h>
34 35
@@ -2191,6 +2192,9 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
2191 struct signal_struct *signal = current->signal; 2192 struct signal_struct *signal = current->signal;
2192 int signr; 2193 int signr;
2193 2194
2195 if (unlikely(uprobe_deny_signal()))
2196 return 0;
2197
2194relock: 2198relock:
2195 /* 2199 /*
2196 * We'll jump back here after any time we were stopped in TASK_STOPPED. 2200 * We'll jump back here after any time we were stopped in TASK_STOPPED.
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f347ac91292d..8c4c07071cc5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -372,6 +372,7 @@ config KPROBE_EVENT
372 depends on HAVE_REGS_AND_STACK_ACCESS_API 372 depends on HAVE_REGS_AND_STACK_ACCESS_API
373 bool "Enable kprobes-based dynamic events" 373 bool "Enable kprobes-based dynamic events"
374 select TRACING 374 select TRACING
375 select PROBE_EVENTS
375 default y 376 default y
376 help 377 help
377 This allows the user to add tracing events (similar to tracepoints) 378 This allows the user to add tracing events (similar to tracepoints)
@@ -384,6 +385,25 @@ config KPROBE_EVENT
384 This option is also required by perf-probe subcommand of perf tools. 385 This option is also required by perf-probe subcommand of perf tools.
385 If you want to use perf tools, this option is strongly recommended. 386 If you want to use perf tools, this option is strongly recommended.
386 387
388config UPROBE_EVENT
389 bool "Enable uprobes-based dynamic events"
390 depends on ARCH_SUPPORTS_UPROBES
391 depends on MMU
392 select UPROBES
393 select PROBE_EVENTS
394 select TRACING
395 default n
396 help
397 This allows the user to add tracing events on top of userspace
398 dynamic events (similar to tracepoints) on the fly via the trace
399 events interface. Those events can be inserted wherever uprobes
400 can probe, and record various registers.
401 This option is required if you plan to use perf-probe subcommand
402 of perf tools on user space applications.
403
404config PROBE_EVENTS
405 def_bool n
406
387config DYNAMIC_FTRACE 407config DYNAMIC_FTRACE
388 bool "enable/disable ftrace tracepoints dynamically" 408 bool "enable/disable ftrace tracepoints dynamically"
389 depends on FUNCTION_TRACER 409 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index b3afe0e76f79..b831087c8200 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -60,5 +60,7 @@ endif
60ifeq ($(CONFIG_TRACING),y) 60ifeq ($(CONFIG_TRACING),y)
61obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 61obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
62endif 62endif
63obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
64obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
63 65
64libftrace-y := ftrace.o 66libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6c6f7933eede..5aec220d2de0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -103,6 +103,11 @@ struct kretprobe_trace_entry_head {
103 unsigned long ret_ip; 103 unsigned long ret_ip;
104}; 104};
105 105
106struct uprobe_trace_entry_head {
107 struct trace_entry ent;
108 unsigned long ip;
109};
110
106/* 111/*
107 * trace_flag_type is an enumeration that holds different 112 * trace_flag_type is an enumeration that holds different
108 * states when a trace occurs. These are: 113 * states when a trace occurs. These are:
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 580a05ec926b..b31d3d5699fe 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -19,547 +19,15 @@
19 19
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <asm/bitsperlong.h>
35
36#include "trace.h"
37#include "trace_output.h"
38
39#define MAX_TRACE_ARGS 128
40#define MAX_ARGSTR_LEN 63
41#define MAX_EVENT_NAME_LEN 64
42#define MAX_STRING_SIZE PATH_MAX
43#define KPROBE_EVENT_SYSTEM "kprobes"
44
45/* Reserved field names */
46#define FIELD_STRING_IP "__probe_ip"
47#define FIELD_STRING_RETIP "__probe_ret_ip"
48#define FIELD_STRING_FUNC "__probe_func"
49
50const char *reserved_field_names[] = {
51 "common_type",
52 "common_flags",
53 "common_preempt_count",
54 "common_pid",
55 "common_tgid",
56 FIELD_STRING_IP,
57 FIELD_STRING_RETIP,
58 FIELD_STRING_FUNC,
59};
60
61/* Printing function type */
62typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
63 void *);
64#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
65#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
66
67/* Printing in basic type function template */
68#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
69static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
70 const char *name, \
71 void *data, void *ent)\
72{ \
73 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
74} \
75static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
76
77DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
78DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
80DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
82DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
83DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
84DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
85
86/* data_rloc: data relative location, compatible with u32 */
87#define make_data_rloc(len, roffs) \
88 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
89#define get_rloc_len(dl) ((u32)(dl) >> 16)
90#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
91
92static inline void *get_rloc_data(u32 *dl)
93{
94 return (u8 *)dl + get_rloc_offs(*dl);
95}
96
97/* For data_loc conversion */
98static inline void *get_loc_data(u32 *dl, void *ent)
99{
100 return (u8 *)ent + get_rloc_offs(*dl);
101}
102
103/*
104 * Convert data_rloc to data_loc:
105 * data_rloc stores the offset from data_rloc itself, but data_loc
106 * stores the offset from event entry.
107 */
108#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
109
110/* For defining macros, define string/string_size types */
111typedef u32 string;
112typedef u32 string_size;
113
114/* Print type function for string type */
115static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
116 const char *name,
117 void *data, void *ent)
118{
119 int len = *(u32 *)data >> 16;
120
121 if (!len)
122 return trace_seq_printf(s, " %s=(fault)", name);
123 else
124 return trace_seq_printf(s, " %s=\"%s\"", name,
125 (const char *)get_loc_data(data, ent));
126}
127static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
128
129/* Data fetch function type */
130typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
131
132struct fetch_param {
133 fetch_func_t fn;
134 void *data;
135};
136
137static __kprobes void call_fetch(struct fetch_param *fprm,
138 struct pt_regs *regs, void *dest)
139{
140 return fprm->fn(regs, fprm->data, dest);
141}
142
143#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
144/*
145 * Define macro for basic types - we don't need to define s* types, because
146 * we have to care only about bitwidth at recording time.
147 */
148#define DEFINE_BASIC_FETCH_FUNCS(method) \
149DEFINE_FETCH_##method(u8) \
150DEFINE_FETCH_##method(u16) \
151DEFINE_FETCH_##method(u32) \
152DEFINE_FETCH_##method(u64)
153
154#define CHECK_FETCH_FUNCS(method, fn) \
155 (((FETCH_FUNC_NAME(method, u8) == fn) || \
156 (FETCH_FUNC_NAME(method, u16) == fn) || \
157 (FETCH_FUNC_NAME(method, u32) == fn) || \
158 (FETCH_FUNC_NAME(method, u64) == fn) || \
159 (FETCH_FUNC_NAME(method, string) == fn) || \
160 (FETCH_FUNC_NAME(method, string_size) == fn)) \
161 && (fn != NULL))
162
163/* Data fetch function templates */
164#define DEFINE_FETCH_reg(type) \
165static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
166 void *offset, void *dest) \
167{ \
168 *(type *)dest = (type)regs_get_register(regs, \
169 (unsigned int)((unsigned long)offset)); \
170}
171DEFINE_BASIC_FETCH_FUNCS(reg)
172/* No string on the register */
173#define fetch_reg_string NULL
174#define fetch_reg_string_size NULL
175
176#define DEFINE_FETCH_stack(type) \
177static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
178 void *offset, void *dest) \
179{ \
180 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
181 (unsigned int)((unsigned long)offset)); \
182}
183DEFINE_BASIC_FETCH_FUNCS(stack)
184/* No string on the stack entry */
185#define fetch_stack_string NULL
186#define fetch_stack_string_size NULL
187
188#define DEFINE_FETCH_retval(type) \
189static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
190 void *dummy, void *dest) \
191{ \
192 *(type *)dest = (type)regs_return_value(regs); \
193}
194DEFINE_BASIC_FETCH_FUNCS(retval)
195/* No string on the retval */
196#define fetch_retval_string NULL
197#define fetch_retval_string_size NULL
198
199#define DEFINE_FETCH_memory(type) \
200static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
201 void *addr, void *dest) \
202{ \
203 type retval; \
204 if (probe_kernel_address(addr, retval)) \
205 *(type *)dest = 0; \
206 else \
207 *(type *)dest = retval; \
208}
209DEFINE_BASIC_FETCH_FUNCS(memory)
210/*
211 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
212 * length and relative data location.
213 */
214static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
215 void *addr, void *dest)
216{
217 long ret;
218 int maxlen = get_rloc_len(*(u32 *)dest);
219 u8 *dst = get_rloc_data(dest);
220 u8 *src = addr;
221 mm_segment_t old_fs = get_fs();
222 if (!maxlen)
223 return;
224 /*
225 * Try to get string again, since the string can be changed while
226 * probing.
227 */
228 set_fs(KERNEL_DS);
229 pagefault_disable();
230 do
231 ret = __copy_from_user_inatomic(dst++, src++, 1);
232 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
233 dst[-1] = '\0';
234 pagefault_enable();
235 set_fs(old_fs);
236
237 if (ret < 0) { /* Failed to fetch string */
238 ((u8 *)get_rloc_data(dest))[0] = '\0';
239 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
240 } else
241 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
242 get_rloc_offs(*(u32 *)dest));
243}
244/* Return the length of string -- including null terminal byte */
245static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
246 void *addr, void *dest)
247{
248 int ret, len = 0;
249 u8 c;
250 mm_segment_t old_fs = get_fs();
251
252 set_fs(KERNEL_DS);
253 pagefault_disable();
254 do {
255 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
256 len++;
257 } while (c && ret == 0 && len < MAX_STRING_SIZE);
258 pagefault_enable();
259 set_fs(old_fs);
260
261 if (ret < 0) /* Failed to check the length */
262 *(u32 *)dest = 0;
263 else
264 *(u32 *)dest = len;
265}
266
267/* Memory fetching by symbol */
268struct symbol_cache {
269 char *symbol;
270 long offset;
271 unsigned long addr;
272};
273
274static unsigned long update_symbol_cache(struct symbol_cache *sc)
275{
276 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
277 if (sc->addr)
278 sc->addr += sc->offset;
279 return sc->addr;
280}
281
282static void free_symbol_cache(struct symbol_cache *sc)
283{
284 kfree(sc->symbol);
285 kfree(sc);
286}
287
288static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
289{
290 struct symbol_cache *sc;
291
292 if (!sym || strlen(sym) == 0)
293 return NULL;
294 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
295 if (!sc)
296 return NULL;
297
298 sc->symbol = kstrdup(sym, GFP_KERNEL);
299 if (!sc->symbol) {
300 kfree(sc);
301 return NULL;
302 }
303 sc->offset = offset;
304 22
305 update_symbol_cache(sc); 23#include "trace_probe.h"
306 return sc;
307}
308
309#define DEFINE_FETCH_symbol(type) \
310static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
311 void *data, void *dest) \
312{ \
313 struct symbol_cache *sc = data; \
314 if (sc->addr) \
315 fetch_memory_##type(regs, (void *)sc->addr, dest); \
316 else \
317 *(type *)dest = 0; \
318}
319DEFINE_BASIC_FETCH_FUNCS(symbol)
320DEFINE_FETCH_symbol(string)
321DEFINE_FETCH_symbol(string_size)
322
323/* Dereference memory access function */
324struct deref_fetch_param {
325 struct fetch_param orig;
326 long offset;
327};
328
329#define DEFINE_FETCH_deref(type) \
330static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
331 void *data, void *dest) \
332{ \
333 struct deref_fetch_param *dprm = data; \
334 unsigned long addr; \
335 call_fetch(&dprm->orig, regs, &addr); \
336 if (addr) { \
337 addr += dprm->offset; \
338 fetch_memory_##type(regs, (void *)addr, dest); \
339 } else \
340 *(type *)dest = 0; \
341}
342DEFINE_BASIC_FETCH_FUNCS(deref)
343DEFINE_FETCH_deref(string)
344DEFINE_FETCH_deref(string_size)
345
346static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
347{
348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
349 update_deref_fetch_param(data->orig.data);
350 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
351 update_symbol_cache(data->orig.data);
352}
353
354static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
355{
356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
357 free_deref_fetch_param(data->orig.data);
358 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
359 free_symbol_cache(data->orig.data);
360 kfree(data);
361}
362
363/* Bitfield fetch function */
364struct bitfield_fetch_param {
365 struct fetch_param orig;
366 unsigned char hi_shift;
367 unsigned char low_shift;
368};
369 24
370#define DEFINE_FETCH_bitfield(type) \ 25#define KPROBE_EVENT_SYSTEM "kprobes"
371static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
372 void *data, void *dest) \
373{ \
374 struct bitfield_fetch_param *bprm = data; \
375 type buf = 0; \
376 call_fetch(&bprm->orig, regs, &buf); \
377 if (buf) { \
378 buf <<= bprm->hi_shift; \
379 buf >>= bprm->low_shift; \
380 } \
381 *(type *)dest = buf; \
382}
383DEFINE_BASIC_FETCH_FUNCS(bitfield)
384#define fetch_bitfield_string NULL
385#define fetch_bitfield_string_size NULL
386
387static __kprobes void
388update_bitfield_fetch_param(struct bitfield_fetch_param *data)
389{
390 /*
391 * Don't check the bitfield itself, because this must be the
392 * last fetch function.
393 */
394 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
395 update_deref_fetch_param(data->orig.data);
396 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
397 update_symbol_cache(data->orig.data);
398}
399
400static __kprobes void
401free_bitfield_fetch_param(struct bitfield_fetch_param *data)
402{
403 /*
404 * Don't check the bitfield itself, because this must be the
405 * last fetch function.
406 */
407 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
408 free_deref_fetch_param(data->orig.data);
409 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
410 free_symbol_cache(data->orig.data);
411 kfree(data);
412}
413
414/* Default (unsigned long) fetch type */
415#define __DEFAULT_FETCH_TYPE(t) u##t
416#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
417#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
418#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
419
420/* Fetch types */
421enum {
422 FETCH_MTD_reg = 0,
423 FETCH_MTD_stack,
424 FETCH_MTD_retval,
425 FETCH_MTD_memory,
426 FETCH_MTD_symbol,
427 FETCH_MTD_deref,
428 FETCH_MTD_bitfield,
429 FETCH_MTD_END,
430};
431
432#define ASSIGN_FETCH_FUNC(method, type) \
433 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
434
435#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
436 {.name = _name, \
437 .size = _size, \
438 .is_signed = sign, \
439 .print = PRINT_TYPE_FUNC_NAME(ptype), \
440 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
441 .fmttype = _fmttype, \
442 .fetch = { \
443ASSIGN_FETCH_FUNC(reg, ftype), \
444ASSIGN_FETCH_FUNC(stack, ftype), \
445ASSIGN_FETCH_FUNC(retval, ftype), \
446ASSIGN_FETCH_FUNC(memory, ftype), \
447ASSIGN_FETCH_FUNC(symbol, ftype), \
448ASSIGN_FETCH_FUNC(deref, ftype), \
449ASSIGN_FETCH_FUNC(bitfield, ftype), \
450 } \
451 }
452
453#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
454 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
455
456#define FETCH_TYPE_STRING 0
457#define FETCH_TYPE_STRSIZE 1
458
459/* Fetch type information table */
460static const struct fetch_type {
461 const char *name; /* Name of type */
462 size_t size; /* Byte size of type */
463 int is_signed; /* Signed flag */
464 print_type_func_t print; /* Print functions */
465 const char *fmt; /* Fromat string */
466 const char *fmttype; /* Name in format file */
467 /* Fetch functions */
468 fetch_func_t fetch[FETCH_MTD_END];
469} fetch_type_table[] = {
470 /* Special types */
471 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
472 sizeof(u32), 1, "__data_loc char[]"),
473 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
474 string_size, sizeof(u32), 0, "u32"),
475 /* Basic types */
476 ASSIGN_FETCH_TYPE(u8, u8, 0),
477 ASSIGN_FETCH_TYPE(u16, u16, 0),
478 ASSIGN_FETCH_TYPE(u32, u32, 0),
479 ASSIGN_FETCH_TYPE(u64, u64, 0),
480 ASSIGN_FETCH_TYPE(s8, u8, 1),
481 ASSIGN_FETCH_TYPE(s16, u16, 1),
482 ASSIGN_FETCH_TYPE(s32, u32, 1),
483 ASSIGN_FETCH_TYPE(s64, u64, 1),
484};
485
486static const struct fetch_type *find_fetch_type(const char *type)
487{
488 int i;
489
490 if (!type)
491 type = DEFAULT_FETCH_TYPE_STR;
492
493 /* Special case: bitfield */
494 if (*type == 'b') {
495 unsigned long bs;
496 type = strchr(type, '/');
497 if (!type)
498 goto fail;
499 type++;
500 if (strict_strtoul(type, 0, &bs))
501 goto fail;
502 switch (bs) {
503 case 8:
504 return find_fetch_type("u8");
505 case 16:
506 return find_fetch_type("u16");
507 case 32:
508 return find_fetch_type("u32");
509 case 64:
510 return find_fetch_type("u64");
511 default:
512 goto fail;
513 }
514 }
515
516 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
517 if (strcmp(type, fetch_type_table[i].name) == 0)
518 return &fetch_type_table[i];
519fail:
520 return NULL;
521}
522
523/* Special function : only accept unsigned long */
524static __kprobes void fetch_stack_address(struct pt_regs *regs,
525 void *dummy, void *dest)
526{
527 *(unsigned long *)dest = kernel_stack_pointer(regs);
528}
529
530static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
531 fetch_func_t orig_fn)
532{
533 int i;
534
535 if (type != &fetch_type_table[FETCH_TYPE_STRING])
536 return NULL; /* Only string type needs size function */
537 for (i = 0; i < FETCH_MTD_END; i++)
538 if (type->fetch[i] == orig_fn)
539 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
540
541 WARN_ON(1); /* This should not happen */
542 return NULL;
543}
544 26
545/** 27/**
546 * Kprobe event core functions 28 * Kprobe event core functions
547 */ 29 */
548 30
549struct probe_arg {
550 struct fetch_param fetch;
551 struct fetch_param fetch_size;
552 unsigned int offset; /* Offset from argument entry */
553 const char *name; /* Name of this argument */
554 const char *comm; /* Command of this argument */
555 const struct fetch_type *type; /* Type of this argument */
556};
557
558/* Flags for trace_probe */
559#define TP_FLAG_TRACE 1
560#define TP_FLAG_PROFILE 2
561#define TP_FLAG_REGISTERED 4
562
563struct trace_probe { 31struct trace_probe {
564 struct list_head list; 32 struct list_head list;
565 struct kretprobe rp; /* Use rp.kp for kprobe use */ 33 struct kretprobe rp; /* Use rp.kp for kprobe use */
@@ -631,18 +99,6 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
631static int kretprobe_dispatcher(struct kretprobe_instance *ri, 99static int kretprobe_dispatcher(struct kretprobe_instance *ri,
632 struct pt_regs *regs); 100 struct pt_regs *regs);
633 101
634/* Check the name is good for event/group/fields */
635static int is_good_name(const char *name)
636{
637 if (!isalpha(*name) && *name != '_')
638 return 0;
639 while (*++name != '\0') {
640 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
641 return 0;
642 }
643 return 1;
644}
645
646/* 102/*
647 * Allocate new trace_probe and initialize it (including kprobes). 103 * Allocate new trace_probe and initialize it (including kprobes).
648 */ 104 */
@@ -651,7 +107,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
651 void *addr, 107 void *addr,
652 const char *symbol, 108 const char *symbol,
653 unsigned long offs, 109 unsigned long offs,
654 int nargs, int is_return) 110 int nargs, bool is_return)
655{ 111{
656 struct trace_probe *tp; 112 struct trace_probe *tp;
657 int ret = -ENOMEM; 113 int ret = -ENOMEM;
@@ -702,34 +158,12 @@ error:
702 return ERR_PTR(ret); 158 return ERR_PTR(ret);
703} 159}
704 160
705static void update_probe_arg(struct probe_arg *arg)
706{
707 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
708 update_bitfield_fetch_param(arg->fetch.data);
709 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
710 update_deref_fetch_param(arg->fetch.data);
711 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
712 update_symbol_cache(arg->fetch.data);
713}
714
715static void free_probe_arg(struct probe_arg *arg)
716{
717 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
718 free_bitfield_fetch_param(arg->fetch.data);
719 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
720 free_deref_fetch_param(arg->fetch.data);
721 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
722 free_symbol_cache(arg->fetch.data);
723 kfree(arg->name);
724 kfree(arg->comm);
725}
726
727static void free_trace_probe(struct trace_probe *tp) 161static void free_trace_probe(struct trace_probe *tp)
728{ 162{
729 int i; 163 int i;
730 164
731 for (i = 0; i < tp->nr_args; i++) 165 for (i = 0; i < tp->nr_args; i++)
732 free_probe_arg(&tp->args[i]); 166 traceprobe_free_probe_arg(&tp->args[i]);
733 167
734 kfree(tp->call.class->system); 168 kfree(tp->call.class->system);
735 kfree(tp->call.name); 169 kfree(tp->call.name);
@@ -787,7 +221,7 @@ static int __register_trace_probe(struct trace_probe *tp)
787 return -EINVAL; 221 return -EINVAL;
788 222
789 for (i = 0; i < tp->nr_args; i++) 223 for (i = 0; i < tp->nr_args; i++)
790 update_probe_arg(&tp->args[i]); 224 traceprobe_update_arg(&tp->args[i]);
791 225
792 /* Set/clear disabled flag according to tp->flag */ 226 /* Set/clear disabled flag according to tp->flag */
793 if (trace_probe_is_enabled(tp)) 227 if (trace_probe_is_enabled(tp))
@@ -919,227 +353,6 @@ static struct notifier_block trace_probe_module_nb = {
919 .priority = 1 /* Invoked after kprobe module callback */ 353 .priority = 1 /* Invoked after kprobe module callback */
920}; 354};
921 355
922/* Split symbol and offset. */
923static int split_symbol_offset(char *symbol, unsigned long *offset)
924{
925 char *tmp;
926 int ret;
927
928 if (!offset)
929 return -EINVAL;
930
931 tmp = strchr(symbol, '+');
932 if (tmp) {
933 /* skip sign because strict_strtol doesn't accept '+' */
934 ret = strict_strtoul(tmp + 1, 0, offset);
935 if (ret)
936 return ret;
937 *tmp = '\0';
938 } else
939 *offset = 0;
940 return 0;
941}
942
943#define PARAM_MAX_ARGS 16
944#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
945
946static int parse_probe_vars(char *arg, const struct fetch_type *t,
947 struct fetch_param *f, int is_return)
948{
949 int ret = 0;
950 unsigned long param;
951
952 if (strcmp(arg, "retval") == 0) {
953 if (is_return)
954 f->fn = t->fetch[FETCH_MTD_retval];
955 else
956 ret = -EINVAL;
957 } else if (strncmp(arg, "stack", 5) == 0) {
958 if (arg[5] == '\0') {
959 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
960 f->fn = fetch_stack_address;
961 else
962 ret = -EINVAL;
963 } else if (isdigit(arg[5])) {
964 ret = strict_strtoul(arg + 5, 10, &param);
965 if (ret || param > PARAM_MAX_STACK)
966 ret = -EINVAL;
967 else {
968 f->fn = t->fetch[FETCH_MTD_stack];
969 f->data = (void *)param;
970 }
971 } else
972 ret = -EINVAL;
973 } else
974 ret = -EINVAL;
975 return ret;
976}
977
978/* Recursive argument parser */
979static int __parse_probe_arg(char *arg, const struct fetch_type *t,
980 struct fetch_param *f, int is_return)
981{
982 int ret = 0;
983 unsigned long param;
984 long offset;
985 char *tmp;
986
987 switch (arg[0]) {
988 case '$':
989 ret = parse_probe_vars(arg + 1, t, f, is_return);
990 break;
991 case '%': /* named register */
992 ret = regs_query_register_offset(arg + 1);
993 if (ret >= 0) {
994 f->fn = t->fetch[FETCH_MTD_reg];
995 f->data = (void *)(unsigned long)ret;
996 ret = 0;
997 }
998 break;
999 case '@': /* memory or symbol */
1000 if (isdigit(arg[1])) {
1001 ret = strict_strtoul(arg + 1, 0, &param);
1002 if (ret)
1003 break;
1004 f->fn = t->fetch[FETCH_MTD_memory];
1005 f->data = (void *)param;
1006 } else {
1007 ret = split_symbol_offset(arg + 1, &offset);
1008 if (ret)
1009 break;
1010 f->data = alloc_symbol_cache(arg + 1, offset);
1011 if (f->data)
1012 f->fn = t->fetch[FETCH_MTD_symbol];
1013 }
1014 break;
1015 case '+': /* deref memory */
1016 arg++; /* Skip '+', because strict_strtol() rejects it. */
1017 case '-':
1018 tmp = strchr(arg, '(');
1019 if (!tmp)
1020 break;
1021 *tmp = '\0';
1022 ret = strict_strtol(arg, 0, &offset);
1023 if (ret)
1024 break;
1025 arg = tmp + 1;
1026 tmp = strrchr(arg, ')');
1027 if (tmp) {
1028 struct deref_fetch_param *dprm;
1029 const struct fetch_type *t2 = find_fetch_type(NULL);
1030 *tmp = '\0';
1031 dprm = kzalloc(sizeof(struct deref_fetch_param),
1032 GFP_KERNEL);
1033 if (!dprm)
1034 return -ENOMEM;
1035 dprm->offset = offset;
1036 ret = __parse_probe_arg(arg, t2, &dprm->orig,
1037 is_return);
1038 if (ret)
1039 kfree(dprm);
1040 else {
1041 f->fn = t->fetch[FETCH_MTD_deref];
1042 f->data = (void *)dprm;
1043 }
1044 }
1045 break;
1046 }
1047 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
1048 pr_info("%s type has no corresponding fetch method.\n",
1049 t->name);
1050 ret = -EINVAL;
1051 }
1052 return ret;
1053}
1054
1055#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
1056
1057/* Bitfield type needs to be parsed into a fetch function */
1058static int __parse_bitfield_probe_arg(const char *bf,
1059 const struct fetch_type *t,
1060 struct fetch_param *f)
1061{
1062 struct bitfield_fetch_param *bprm;
1063 unsigned long bw, bo;
1064 char *tail;
1065
1066 if (*bf != 'b')
1067 return 0;
1068
1069 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1070 if (!bprm)
1071 return -ENOMEM;
1072 bprm->orig = *f;
1073 f->fn = t->fetch[FETCH_MTD_bitfield];
1074 f->data = (void *)bprm;
1075
1076 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
1077 if (bw == 0 || *tail != '@')
1078 return -EINVAL;
1079
1080 bf = tail + 1;
1081 bo = simple_strtoul(bf, &tail, 0);
1082 if (tail == bf || *tail != '/')
1083 return -EINVAL;
1084
1085 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
1086 bprm->low_shift = bprm->hi_shift + bo;
1087 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
1088}
1089
1090/* String length checking wrapper */
1091static int parse_probe_arg(char *arg, struct trace_probe *tp,
1092 struct probe_arg *parg, int is_return)
1093{
1094 const char *t;
1095 int ret;
1096
1097 if (strlen(arg) > MAX_ARGSTR_LEN) {
1098 pr_info("Argument is too long.: %s\n", arg);
1099 return -ENOSPC;
1100 }
1101 parg->comm = kstrdup(arg, GFP_KERNEL);
1102 if (!parg->comm) {
1103 pr_info("Failed to allocate memory for command '%s'.\n", arg);
1104 return -ENOMEM;
1105 }
1106 t = strchr(parg->comm, ':');
1107 if (t) {
1108 arg[t - parg->comm] = '\0';
1109 t++;
1110 }
1111 parg->type = find_fetch_type(t);
1112 if (!parg->type) {
1113 pr_info("Unsupported type: %s\n", t);
1114 return -EINVAL;
1115 }
1116 parg->offset = tp->size;
1117 tp->size += parg->type->size;
1118 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
1119 if (ret >= 0 && t != NULL)
1120 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
1121 if (ret >= 0) {
1122 parg->fetch_size.fn = get_fetch_size_function(parg->type,
1123 parg->fetch.fn);
1124 parg->fetch_size.data = parg->fetch.data;
1125 }
1126 return ret;
1127}
1128
1129/* Return 1 if name is reserved or already used by another argument */
1130static int conflict_field_name(const char *name,
1131 struct probe_arg *args, int narg)
1132{
1133 int i;
1134 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
1135 if (strcmp(reserved_field_names[i], name) == 0)
1136 return 1;
1137 for (i = 0; i < narg; i++)
1138 if (strcmp(args[i].name, name) == 0)
1139 return 1;
1140 return 0;
1141}
1142
1143static int create_trace_probe(int argc, char **argv) 356static int create_trace_probe(int argc, char **argv)
1144{ 357{
1145 /* 358 /*
@@ -1162,7 +375,7 @@ static int create_trace_probe(int argc, char **argv)
1162 */ 375 */
1163 struct trace_probe *tp; 376 struct trace_probe *tp;
1164 int i, ret = 0; 377 int i, ret = 0;
1165 int is_return = 0, is_delete = 0; 378 bool is_return = false, is_delete = false;
1166 char *symbol = NULL, *event = NULL, *group = NULL; 379 char *symbol = NULL, *event = NULL, *group = NULL;
1167 char *arg; 380 char *arg;
1168 unsigned long offset = 0; 381 unsigned long offset = 0;
@@ -1171,11 +384,11 @@ static int create_trace_probe(int argc, char **argv)
1171 384
1172 /* argc must be >= 1 */ 385 /* argc must be >= 1 */
1173 if (argv[0][0] == 'p') 386 if (argv[0][0] == 'p')
1174 is_return = 0; 387 is_return = false;
1175 else if (argv[0][0] == 'r') 388 else if (argv[0][0] == 'r')
1176 is_return = 1; 389 is_return = true;
1177 else if (argv[0][0] == '-') 390 else if (argv[0][0] == '-')
1178 is_delete = 1; 391 is_delete = true;
1179 else { 392 else {
1180 pr_info("Probe definition must be started with 'p', 'r' or" 393 pr_info("Probe definition must be started with 'p', 'r' or"
1181 " '-'.\n"); 394 " '-'.\n");
@@ -1240,7 +453,7 @@ static int create_trace_probe(int argc, char **argv)
1240 /* a symbol specified */ 453 /* a symbol specified */
1241 symbol = argv[1]; 454 symbol = argv[1];
1242 /* TODO: support .init module functions */ 455 /* TODO: support .init module functions */
1243 ret = split_symbol_offset(symbol, &offset); 456 ret = traceprobe_split_symbol_offset(symbol, &offset);
1244 if (ret) { 457 if (ret) {
1245 pr_info("Failed to parse symbol.\n"); 458 pr_info("Failed to parse symbol.\n");
1246 return ret; 459 return ret;
@@ -1302,7 +515,8 @@ static int create_trace_probe(int argc, char **argv)
1302 goto error; 515 goto error;
1303 } 516 }
1304 517
1305 if (conflict_field_name(tp->args[i].name, tp->args, i)) { 518 if (traceprobe_conflict_field_name(tp->args[i].name,
519 tp->args, i)) {
1306 pr_info("Argument[%d] name '%s' conflicts with " 520 pr_info("Argument[%d] name '%s' conflicts with "
1307 "another field.\n", i, argv[i]); 521 "another field.\n", i, argv[i]);
1308 ret = -EINVAL; 522 ret = -EINVAL;
@@ -1310,7 +524,8 @@ static int create_trace_probe(int argc, char **argv)
1310 } 524 }
1311 525
1312 /* Parse fetch argument */ 526 /* Parse fetch argument */
1313 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); 527 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
528 is_return, true);
1314 if (ret) { 529 if (ret) {
1315 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 530 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
1316 goto error; 531 goto error;
@@ -1412,70 +627,11 @@ static int probes_open(struct inode *inode, struct file *file)
1412 return seq_open(file, &probes_seq_op); 627 return seq_open(file, &probes_seq_op);
1413} 628}
1414 629
1415static int command_trace_probe(const char *buf)
1416{
1417 char **argv;
1418 int argc = 0, ret = 0;
1419
1420 argv = argv_split(GFP_KERNEL, buf, &argc);
1421 if (!argv)
1422 return -ENOMEM;
1423
1424 if (argc)
1425 ret = create_trace_probe(argc, argv);
1426
1427 argv_free(argv);
1428 return ret;
1429}
1430
1431#define WRITE_BUFSIZE 4096
1432
1433static ssize_t probes_write(struct file *file, const char __user *buffer, 630static ssize_t probes_write(struct file *file, const char __user *buffer,
1434 size_t count, loff_t *ppos) 631 size_t count, loff_t *ppos)
1435{ 632{
1436 char *kbuf, *tmp; 633 return traceprobe_probes_write(file, buffer, count, ppos,
1437 int ret; 634 create_trace_probe);
1438 size_t done;
1439 size_t size;
1440
1441 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
1442 if (!kbuf)
1443 return -ENOMEM;
1444
1445 ret = done = 0;
1446 while (done < count) {
1447 size = count - done;
1448 if (size >= WRITE_BUFSIZE)
1449 size = WRITE_BUFSIZE - 1;
1450 if (copy_from_user(kbuf, buffer + done, size)) {
1451 ret = -EFAULT;
1452 goto out;
1453 }
1454 kbuf[size] = '\0';
1455 tmp = strchr(kbuf, '\n');
1456 if (tmp) {
1457 *tmp = '\0';
1458 size = tmp - kbuf + 1;
1459 } else if (done + size < count) {
1460 pr_warning("Line length is too long: "
1461 "Should be less than %d.", WRITE_BUFSIZE);
1462 ret = -EINVAL;
1463 goto out;
1464 }
1465 done += size;
1466 /* Remove comments */
1467 tmp = strchr(kbuf, '#');
1468 if (tmp)
1469 *tmp = '\0';
1470
1471 ret = command_trace_probe(kbuf);
1472 if (ret)
1473 goto out;
1474 }
1475 ret = done;
1476out:
1477 kfree(kbuf);
1478 return ret;
1479} 635}
1480 636
1481static const struct file_operations kprobe_events_ops = { 637static const struct file_operations kprobe_events_ops = {
@@ -1711,16 +867,6 @@ partial:
1711 return TRACE_TYPE_PARTIAL_LINE; 867 return TRACE_TYPE_PARTIAL_LINE;
1712} 868}
1713 869
1714#undef DEFINE_FIELD
1715#define DEFINE_FIELD(type, item, name, is_signed) \
1716 do { \
1717 ret = trace_define_field(event_call, #type, name, \
1718 offsetof(typeof(field), item), \
1719 sizeof(field.item), is_signed, \
1720 FILTER_OTHER); \
1721 if (ret) \
1722 return ret; \
1723 } while (0)
1724 870
1725static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 871static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1726{ 872{
@@ -2051,8 +1197,9 @@ static __init int kprobe_trace_self_tests_init(void)
2051 1197
2052 pr_info("Testing kprobe tracing: "); 1198 pr_info("Testing kprobe tracing: ");
2053 1199
2054 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1200 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
2055 "$stack $stack0 +0($stack)"); 1201 "$stack $stack0 +0($stack)",
1202 create_trace_probe);
2056 if (WARN_ON_ONCE(ret)) { 1203 if (WARN_ON_ONCE(ret)) {
2057 pr_warning("error on probing function entry.\n"); 1204 pr_warning("error on probing function entry.\n");
2058 warn++; 1205 warn++;
@@ -2066,8 +1213,8 @@ static __init int kprobe_trace_self_tests_init(void)
2066 enable_trace_probe(tp, TP_FLAG_TRACE); 1213 enable_trace_probe(tp, TP_FLAG_TRACE);
2067 } 1214 }
2068 1215
2069 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1216 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
2070 "$retval"); 1217 "$retval", create_trace_probe);
2071 if (WARN_ON_ONCE(ret)) { 1218 if (WARN_ON_ONCE(ret)) {
2072 pr_warning("error on probing function return.\n"); 1219 pr_warning("error on probing function return.\n");
2073 warn++; 1220 warn++;
@@ -2101,13 +1248,13 @@ static __init int kprobe_trace_self_tests_init(void)
2101 } else 1248 } else
2102 disable_trace_probe(tp, TP_FLAG_TRACE); 1249 disable_trace_probe(tp, TP_FLAG_TRACE);
2103 1250
2104 ret = command_trace_probe("-:testprobe"); 1251 ret = traceprobe_command("-:testprobe", create_trace_probe);
2105 if (WARN_ON_ONCE(ret)) { 1252 if (WARN_ON_ONCE(ret)) {
2106 pr_warning("error on deleting a probe.\n"); 1253 pr_warning("error on deleting a probe.\n");
2107 warn++; 1254 warn++;
2108 } 1255 }
2109 1256
2110 ret = command_trace_probe("-:testprobe2"); 1257 ret = traceprobe_command("-:testprobe2", create_trace_probe);
2111 if (WARN_ON_ONCE(ret)) { 1258 if (WARN_ON_ONCE(ret)) {
2112 pr_warning("error on deleting a probe.\n"); 1259 pr_warning("error on deleting a probe.\n");
2113 warn++; 1260 warn++;
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
new file mode 100644
index 000000000000..daa9980153af
--- /dev/null
+++ b/kernel/trace/trace_probe.c
@@ -0,0 +1,839 @@
1/*
2 * Common code for probe-based Dynamic events.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * This code was copied from kernel/trace/trace_kprobe.c written by
18 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
19 *
20 * Updates to make this generic:
21 * Copyright (C) IBM Corporation, 2010-2011
22 * Author: Srikar Dronamraju
23 */
24
25#include "trace_probe.h"
26
27const char *reserved_field_names[] = {
28 "common_type",
29 "common_flags",
30 "common_preempt_count",
31 "common_pid",
32 "common_tgid",
33 FIELD_STRING_IP,
34 FIELD_STRING_RETIP,
35 FIELD_STRING_FUNC,
36};
37
38/* Printing function type */
39#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
40#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
41
42/* Printing in basic type function template */
43#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
44static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
45 const char *name, \
46 void *data, void *ent)\
47{ \
48 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
49} \
50static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
51
52DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
53DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
54DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
55DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
56DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
57DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
58DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
59DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
60
61static inline void *get_rloc_data(u32 *dl)
62{
63 return (u8 *)dl + get_rloc_offs(*dl);
64}
65
66/* For data_loc conversion */
67static inline void *get_loc_data(u32 *dl, void *ent)
68{
69 return (u8 *)ent + get_rloc_offs(*dl);
70}
71
72/* For defining macros, define string/string_size types */
73typedef u32 string;
74typedef u32 string_size;
75
76/* Print type function for string type */
77static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
78 const char *name,
79 void *data, void *ent)
80{
81 int len = *(u32 *)data >> 16;
82
83 if (!len)
84 return trace_seq_printf(s, " %s=(fault)", name);
85 else
86 return trace_seq_printf(s, " %s=\"%s\"", name,
87 (const char *)get_loc_data(data, ent));
88}
89
90static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
91
92#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
93/*
94 * Define macro for basic types - we don't need to define s* types, because
95 * we have to care only about bitwidth at recording time.
96 */
97#define DEFINE_BASIC_FETCH_FUNCS(method) \
98DEFINE_FETCH_##method(u8) \
99DEFINE_FETCH_##method(u16) \
100DEFINE_FETCH_##method(u32) \
101DEFINE_FETCH_##method(u64)
102
103#define CHECK_FETCH_FUNCS(method, fn) \
104 (((FETCH_FUNC_NAME(method, u8) == fn) || \
105 (FETCH_FUNC_NAME(method, u16) == fn) || \
106 (FETCH_FUNC_NAME(method, u32) == fn) || \
107 (FETCH_FUNC_NAME(method, u64) == fn) || \
108 (FETCH_FUNC_NAME(method, string) == fn) || \
109 (FETCH_FUNC_NAME(method, string_size) == fn)) \
110 && (fn != NULL))
111
112/* Data fetch function templates */
113#define DEFINE_FETCH_reg(type) \
114static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
115 void *offset, void *dest) \
116{ \
117 *(type *)dest = (type)regs_get_register(regs, \
118 (unsigned int)((unsigned long)offset)); \
119}
120DEFINE_BASIC_FETCH_FUNCS(reg)
121/* No string on the register */
122#define fetch_reg_string NULL
123#define fetch_reg_string_size NULL
124
125#define DEFINE_FETCH_stack(type) \
126static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
127 void *offset, void *dest) \
128{ \
129 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
130 (unsigned int)((unsigned long)offset)); \
131}
132DEFINE_BASIC_FETCH_FUNCS(stack)
133/* No string on the stack entry */
134#define fetch_stack_string NULL
135#define fetch_stack_string_size NULL
136
137#define DEFINE_FETCH_retval(type) \
138static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
139 void *dummy, void *dest) \
140{ \
141 *(type *)dest = (type)regs_return_value(regs); \
142}
143DEFINE_BASIC_FETCH_FUNCS(retval)
144/* No string on the retval */
145#define fetch_retval_string NULL
146#define fetch_retval_string_size NULL
147
148#define DEFINE_FETCH_memory(type) \
149static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
150 void *addr, void *dest) \
151{ \
152 type retval; \
153 if (probe_kernel_address(addr, retval)) \
154 *(type *)dest = 0; \
155 else \
156 *(type *)dest = retval; \
157}
158DEFINE_BASIC_FETCH_FUNCS(memory)
159/*
160 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
161 * length and relative data location.
162 */
163static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
164 void *addr, void *dest)
165{
166 long ret;
167 int maxlen = get_rloc_len(*(u32 *)dest);
168 u8 *dst = get_rloc_data(dest);
169 u8 *src = addr;
170 mm_segment_t old_fs = get_fs();
171
172 if (!maxlen)
173 return;
174
175 /*
176 * Try to get string again, since the string can be changed while
177 * probing.
178 */
179 set_fs(KERNEL_DS);
180 pagefault_disable();
181
182 do
183 ret = __copy_from_user_inatomic(dst++, src++, 1);
184 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
185
186 dst[-1] = '\0';
187 pagefault_enable();
188 set_fs(old_fs);
189
190 if (ret < 0) { /* Failed to fetch string */
191 ((u8 *)get_rloc_data(dest))[0] = '\0';
192 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
193 } else {
194 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
195 get_rloc_offs(*(u32 *)dest));
196 }
197}
198
199/* Return the length of string -- including null terminal byte */
200static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
201 void *addr, void *dest)
202{
203 mm_segment_t old_fs;
204 int ret, len = 0;
205 u8 c;
206
207 old_fs = get_fs();
208 set_fs(KERNEL_DS);
209 pagefault_disable();
210
211 do {
212 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
213 len++;
214 } while (c && ret == 0 && len < MAX_STRING_SIZE);
215
216 pagefault_enable();
217 set_fs(old_fs);
218
219 if (ret < 0) /* Failed to check the length */
220 *(u32 *)dest = 0;
221 else
222 *(u32 *)dest = len;
223}
224
225/* Memory fetching by symbol */
226struct symbol_cache {
227 char *symbol;
228 long offset;
229 unsigned long addr;
230};
231
232static unsigned long update_symbol_cache(struct symbol_cache *sc)
233{
234 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
235
236 if (sc->addr)
237 sc->addr += sc->offset;
238
239 return sc->addr;
240}
241
242static void free_symbol_cache(struct symbol_cache *sc)
243{
244 kfree(sc->symbol);
245 kfree(sc);
246}
247
248static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
249{
250 struct symbol_cache *sc;
251
252 if (!sym || strlen(sym) == 0)
253 return NULL;
254
255 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
256 if (!sc)
257 return NULL;
258
259 sc->symbol = kstrdup(sym, GFP_KERNEL);
260 if (!sc->symbol) {
261 kfree(sc);
262 return NULL;
263 }
264 sc->offset = offset;
265 update_symbol_cache(sc);
266
267 return sc;
268}
269
270#define DEFINE_FETCH_symbol(type) \
271static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
272 void *data, void *dest) \
273{ \
274 struct symbol_cache *sc = data; \
275 if (sc->addr) \
276 fetch_memory_##type(regs, (void *)sc->addr, dest); \
277 else \
278 *(type *)dest = 0; \
279}
280DEFINE_BASIC_FETCH_FUNCS(symbol)
281DEFINE_FETCH_symbol(string)
282DEFINE_FETCH_symbol(string_size)
283
284/* Dereference memory access function */
285struct deref_fetch_param {
286 struct fetch_param orig;
287 long offset;
288};
289
290#define DEFINE_FETCH_deref(type) \
291static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
292 void *data, void *dest) \
293{ \
294 struct deref_fetch_param *dprm = data; \
295 unsigned long addr; \
296 call_fetch(&dprm->orig, regs, &addr); \
297 if (addr) { \
298 addr += dprm->offset; \
299 fetch_memory_##type(regs, (void *)addr, dest); \
300 } else \
301 *(type *)dest = 0; \
302}
303DEFINE_BASIC_FETCH_FUNCS(deref)
304DEFINE_FETCH_deref(string)
305DEFINE_FETCH_deref(string_size)
306
307static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
308{
309 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
310 update_deref_fetch_param(data->orig.data);
311 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
312 update_symbol_cache(data->orig.data);
313}
314
315static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
316{
317 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
318 free_deref_fetch_param(data->orig.data);
319 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
320 free_symbol_cache(data->orig.data);
321 kfree(data);
322}
323
324/* Bitfield fetch function */
325struct bitfield_fetch_param {
326 struct fetch_param orig;
327 unsigned char hi_shift;
328 unsigned char low_shift;
329};
330
331#define DEFINE_FETCH_bitfield(type) \
332static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
333 void *data, void *dest) \
334{ \
335 struct bitfield_fetch_param *bprm = data; \
336 type buf = 0; \
337 call_fetch(&bprm->orig, regs, &buf); \
338 if (buf) { \
339 buf <<= bprm->hi_shift; \
340 buf >>= bprm->low_shift; \
341 } \
342 *(type *)dest = buf; \
343}
344
345DEFINE_BASIC_FETCH_FUNCS(bitfield)
346#define fetch_bitfield_string NULL
347#define fetch_bitfield_string_size NULL
348
349static __kprobes void
350update_bitfield_fetch_param(struct bitfield_fetch_param *data)
351{
352 /*
353 * Don't check the bitfield itself, because this must be the
354 * last fetch function.
355 */
356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
357 update_deref_fetch_param(data->orig.data);
358 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
359 update_symbol_cache(data->orig.data);
360}
361
362static __kprobes void
363free_bitfield_fetch_param(struct bitfield_fetch_param *data)
364{
365 /*
366 * Don't check the bitfield itself, because this must be the
367 * last fetch function.
368 */
369 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
370 free_deref_fetch_param(data->orig.data);
371 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
372 free_symbol_cache(data->orig.data);
373
374 kfree(data);
375}
376
377/* Default (unsigned long) fetch type */
378#define __DEFAULT_FETCH_TYPE(t) u##t
379#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
380#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
381#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
382
383#define ASSIGN_FETCH_FUNC(method, type) \
384 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
385
386#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
387 {.name = _name, \
388 .size = _size, \
389 .is_signed = sign, \
390 .print = PRINT_TYPE_FUNC_NAME(ptype), \
391 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
392 .fmttype = _fmttype, \
393 .fetch = { \
394ASSIGN_FETCH_FUNC(reg, ftype), \
395ASSIGN_FETCH_FUNC(stack, ftype), \
396ASSIGN_FETCH_FUNC(retval, ftype), \
397ASSIGN_FETCH_FUNC(memory, ftype), \
398ASSIGN_FETCH_FUNC(symbol, ftype), \
399ASSIGN_FETCH_FUNC(deref, ftype), \
400ASSIGN_FETCH_FUNC(bitfield, ftype), \
401 } \
402 }
403
404#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
405 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
406
407#define FETCH_TYPE_STRING 0
408#define FETCH_TYPE_STRSIZE 1
409
410/* Fetch type information table */
411static const struct fetch_type fetch_type_table[] = {
412 /* Special types */
413 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
414 sizeof(u32), 1, "__data_loc char[]"),
415 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
416 string_size, sizeof(u32), 0, "u32"),
417 /* Basic types */
418 ASSIGN_FETCH_TYPE(u8, u8, 0),
419 ASSIGN_FETCH_TYPE(u16, u16, 0),
420 ASSIGN_FETCH_TYPE(u32, u32, 0),
421 ASSIGN_FETCH_TYPE(u64, u64, 0),
422 ASSIGN_FETCH_TYPE(s8, u8, 1),
423 ASSIGN_FETCH_TYPE(s16, u16, 1),
424 ASSIGN_FETCH_TYPE(s32, u32, 1),
425 ASSIGN_FETCH_TYPE(s64, u64, 1),
426};
427
428static const struct fetch_type *find_fetch_type(const char *type)
429{
430 int i;
431
432 if (!type)
433 type = DEFAULT_FETCH_TYPE_STR;
434
435 /* Special case: bitfield */
436 if (*type == 'b') {
437 unsigned long bs;
438
439 type = strchr(type, '/');
440 if (!type)
441 goto fail;
442
443 type++;
444 if (strict_strtoul(type, 0, &bs))
445 goto fail;
446
447 switch (bs) {
448 case 8:
449 return find_fetch_type("u8");
450 case 16:
451 return find_fetch_type("u16");
452 case 32:
453 return find_fetch_type("u32");
454 case 64:
455 return find_fetch_type("u64");
456 default:
457 goto fail;
458 }
459 }
460
461 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
462 if (strcmp(type, fetch_type_table[i].name) == 0)
463 return &fetch_type_table[i];
464
465fail:
466 return NULL;
467}
468
469/* Special function : only accept unsigned long */
470static __kprobes void fetch_stack_address(struct pt_regs *regs,
471 void *dummy, void *dest)
472{
473 *(unsigned long *)dest = kernel_stack_pointer(regs);
474}
475
476static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
477 fetch_func_t orig_fn)
478{
479 int i;
480
481 if (type != &fetch_type_table[FETCH_TYPE_STRING])
482 return NULL; /* Only string type needs size function */
483
484 for (i = 0; i < FETCH_MTD_END; i++)
485 if (type->fetch[i] == orig_fn)
486 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
487
488 WARN_ON(1); /* This should not happen */
489
490 return NULL;
491}
492
493/* Split symbol and offset. */
494int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset)
495{
496 char *tmp;
497 int ret;
498
499 if (!offset)
500 return -EINVAL;
501
502 tmp = strchr(symbol, '+');
503 if (tmp) {
504 /* skip sign because strict_strtol doesn't accept '+' */
505 ret = strict_strtoul(tmp + 1, 0, offset);
506 if (ret)
507 return ret;
508
509 *tmp = '\0';
510 } else
511 *offset = 0;
512
513 return 0;
514}
515
516#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
517
518static int parse_probe_vars(char *arg, const struct fetch_type *t,
519 struct fetch_param *f, bool is_return)
520{
521 int ret = 0;
522 unsigned long param;
523
524 if (strcmp(arg, "retval") == 0) {
525 if (is_return)
526 f->fn = t->fetch[FETCH_MTD_retval];
527 else
528 ret = -EINVAL;
529 } else if (strncmp(arg, "stack", 5) == 0) {
530 if (arg[5] == '\0') {
531 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
532 f->fn = fetch_stack_address;
533 else
534 ret = -EINVAL;
535 } else if (isdigit(arg[5])) {
536 ret = strict_strtoul(arg + 5, 10, &param);
537 if (ret || param > PARAM_MAX_STACK)
538 ret = -EINVAL;
539 else {
540 f->fn = t->fetch[FETCH_MTD_stack];
541 f->data = (void *)param;
542 }
543 } else
544 ret = -EINVAL;
545 } else
546 ret = -EINVAL;
547
548 return ret;
549}
550
551/* Recursive argument parser */
552static int parse_probe_arg(char *arg, const struct fetch_type *t,
553 struct fetch_param *f, bool is_return, bool is_kprobe)
554{
555 unsigned long param;
556 long offset;
557 char *tmp;
558 int ret;
559
560 ret = 0;
561
562 /* Until uprobe_events supports only reg arguments */
563 if (!is_kprobe && arg[0] != '%')
564 return -EINVAL;
565
566 switch (arg[0]) {
567 case '$':
568 ret = parse_probe_vars(arg + 1, t, f, is_return);
569 break;
570
571 case '%': /* named register */
572 ret = regs_query_register_offset(arg + 1);
573 if (ret >= 0) {
574 f->fn = t->fetch[FETCH_MTD_reg];
575 f->data = (void *)(unsigned long)ret;
576 ret = 0;
577 }
578 break;
579
580 case '@': /* memory or symbol */
581 if (isdigit(arg[1])) {
582 ret = strict_strtoul(arg + 1, 0, &param);
583 if (ret)
584 break;
585
586 f->fn = t->fetch[FETCH_MTD_memory];
587 f->data = (void *)param;
588 } else {
589 ret = traceprobe_split_symbol_offset(arg + 1, &offset);
590 if (ret)
591 break;
592
593 f->data = alloc_symbol_cache(arg + 1, offset);
594 if (f->data)
595 f->fn = t->fetch[FETCH_MTD_symbol];
596 }
597 break;
598
599 case '+': /* deref memory */
600 arg++; /* Skip '+', because strict_strtol() rejects it. */
601 case '-':
602 tmp = strchr(arg, '(');
603 if (!tmp)
604 break;
605
606 *tmp = '\0';
607 ret = strict_strtol(arg, 0, &offset);
608
609 if (ret)
610 break;
611
612 arg = tmp + 1;
613 tmp = strrchr(arg, ')');
614
615 if (tmp) {
616 struct deref_fetch_param *dprm;
617 const struct fetch_type *t2;
618
619 t2 = find_fetch_type(NULL);
620 *tmp = '\0';
621 dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL);
622
623 if (!dprm)
624 return -ENOMEM;
625
626 dprm->offset = offset;
627 ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
628 is_kprobe);
629 if (ret)
630 kfree(dprm);
631 else {
632 f->fn = t->fetch[FETCH_MTD_deref];
633 f->data = (void *)dprm;
634 }
635 }
636 break;
637 }
638 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
639 pr_info("%s type has no corresponding fetch method.\n", t->name);
640 ret = -EINVAL;
641 }
642
643 return ret;
644}
645
646#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
647
648/* Bitfield type needs to be parsed into a fetch function */
649static int __parse_bitfield_probe_arg(const char *bf,
650 const struct fetch_type *t,
651 struct fetch_param *f)
652{
653 struct bitfield_fetch_param *bprm;
654 unsigned long bw, bo;
655 char *tail;
656
657 if (*bf != 'b')
658 return 0;
659
660 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
661 if (!bprm)
662 return -ENOMEM;
663
664 bprm->orig = *f;
665 f->fn = t->fetch[FETCH_MTD_bitfield];
666 f->data = (void *)bprm;
667 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
668
669 if (bw == 0 || *tail != '@')
670 return -EINVAL;
671
672 bf = tail + 1;
673 bo = simple_strtoul(bf, &tail, 0);
674
675 if (tail == bf || *tail != '/')
676 return -EINVAL;
677
678 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
679 bprm->low_shift = bprm->hi_shift + bo;
680
681 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
682}
683
684/* String length checking wrapper */
685int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
686 struct probe_arg *parg, bool is_return, bool is_kprobe)
687{
688 const char *t;
689 int ret;
690
691 if (strlen(arg) > MAX_ARGSTR_LEN) {
692 pr_info("Argument is too long.: %s\n", arg);
693 return -ENOSPC;
694 }
695 parg->comm = kstrdup(arg, GFP_KERNEL);
696 if (!parg->comm) {
697 pr_info("Failed to allocate memory for command '%s'.\n", arg);
698 return -ENOMEM;
699 }
700 t = strchr(parg->comm, ':');
701 if (t) {
702 arg[t - parg->comm] = '\0';
703 t++;
704 }
705 parg->type = find_fetch_type(t);
706 if (!parg->type) {
707 pr_info("Unsupported type: %s\n", t);
708 return -EINVAL;
709 }
710 parg->offset = *size;
711 *size += parg->type->size;
712 ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe);
713
714 if (ret >= 0 && t != NULL)
715 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
716
717 if (ret >= 0) {
718 parg->fetch_size.fn = get_fetch_size_function(parg->type,
719 parg->fetch.fn);
720 parg->fetch_size.data = parg->fetch.data;
721 }
722
723 return ret;
724}
725
726/* Return 1 if name is reserved or already used by another argument */
727int traceprobe_conflict_field_name(const char *name,
728 struct probe_arg *args, int narg)
729{
730 int i;
731
732 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
733 if (strcmp(reserved_field_names[i], name) == 0)
734 return 1;
735
736 for (i = 0; i < narg; i++)
737 if (strcmp(args[i].name, name) == 0)
738 return 1;
739
740 return 0;
741}
742
743void traceprobe_update_arg(struct probe_arg *arg)
744{
745 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
746 update_bitfield_fetch_param(arg->fetch.data);
747 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
748 update_deref_fetch_param(arg->fetch.data);
749 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
750 update_symbol_cache(arg->fetch.data);
751}
752
753void traceprobe_free_probe_arg(struct probe_arg *arg)
754{
755 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
756 free_bitfield_fetch_param(arg->fetch.data);
757 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
758 free_deref_fetch_param(arg->fetch.data);
759 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
760 free_symbol_cache(arg->fetch.data);
761
762 kfree(arg->name);
763 kfree(arg->comm);
764}
765
766int traceprobe_command(const char *buf, int (*createfn)(int, char **))
767{
768 char **argv;
769 int argc, ret;
770
771 argc = 0;
772 ret = 0;
773 argv = argv_split(GFP_KERNEL, buf, &argc);
774 if (!argv)
775 return -ENOMEM;
776
777 if (argc)
778 ret = createfn(argc, argv);
779
780 argv_free(argv);
781
782 return ret;
783}
784
785#define WRITE_BUFSIZE 4096
786
787ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
788 size_t count, loff_t *ppos,
789 int (*createfn)(int, char **))
790{
791 char *kbuf, *tmp;
792 int ret = 0;
793 size_t done = 0;
794 size_t size;
795
796 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
797 if (!kbuf)
798 return -ENOMEM;
799
800 while (done < count) {
801 size = count - done;
802
803 if (size >= WRITE_BUFSIZE)
804 size = WRITE_BUFSIZE - 1;
805
806 if (copy_from_user(kbuf, buffer + done, size)) {
807 ret = -EFAULT;
808 goto out;
809 }
810 kbuf[size] = '\0';
811 tmp = strchr(kbuf, '\n');
812
813 if (tmp) {
814 *tmp = '\0';
815 size = tmp - kbuf + 1;
816 } else if (done + size < count) {
817 pr_warning("Line length is too long: "
818 "Should be less than %d.", WRITE_BUFSIZE);
819 ret = -EINVAL;
820 goto out;
821 }
822 done += size;
823 /* Remove comments */
824 tmp = strchr(kbuf, '#');
825
826 if (tmp)
827 *tmp = '\0';
828
829 ret = traceprobe_command(kbuf, createfn);
830 if (ret)
831 goto out;
832 }
833 ret = done;
834
835out:
836 kfree(kbuf);
837
838 return ret;
839}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
new file mode 100644
index 000000000000..933708677814
--- /dev/null
+++ b/kernel/trace/trace_probe.h
@@ -0,0 +1,161 @@
1/*
2 * Common header file for probe-based Dynamic events.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * This code was copied from kernel/trace/trace_kprobe.h written by
18 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
19 *
20 * Updates to make this generic:
21 * Copyright (C) IBM Corporation, 2010-2011
22 * Author: Srikar Dronamraju
23 */
24
25#include <linux/seq_file.h>
26#include <linux/slab.h>
27#include <linux/smp.h>
28#include <linux/debugfs.h>
29#include <linux/types.h>
30#include <linux/string.h>
31#include <linux/ctype.h>
32#include <linux/ptrace.h>
33#include <linux/perf_event.h>
34#include <linux/kprobes.h>
35#include <linux/stringify.h>
36#include <linux/limits.h>
37#include <linux/uaccess.h>
38#include <asm/bitsperlong.h>
39
40#include "trace.h"
41#include "trace_output.h"
42
43#define MAX_TRACE_ARGS 128
44#define MAX_ARGSTR_LEN 63
45#define MAX_EVENT_NAME_LEN 64
46#define MAX_STRING_SIZE PATH_MAX
47
48/* Reserved field names */
49#define FIELD_STRING_IP "__probe_ip"
50#define FIELD_STRING_RETIP "__probe_ret_ip"
51#define FIELD_STRING_FUNC "__probe_func"
52
53#undef DEFINE_FIELD
54#define DEFINE_FIELD(type, item, name, is_signed) \
55 do { \
56 ret = trace_define_field(event_call, #type, name, \
57 offsetof(typeof(field), item), \
58 sizeof(field.item), is_signed, \
59 FILTER_OTHER); \
60 if (ret) \
61 return ret; \
62 } while (0)
63
64
65/* Flags for trace_probe */
66#define TP_FLAG_TRACE 1
67#define TP_FLAG_PROFILE 2
68#define TP_FLAG_REGISTERED 4
69#define TP_FLAG_UPROBE 8
70
71
72/* data_rloc: data relative location, compatible with u32 */
73#define make_data_rloc(len, roffs) \
74 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
75#define get_rloc_len(dl) ((u32)(dl) >> 16)
76#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
77
78/*
79 * Convert data_rloc to data_loc:
80 * data_rloc stores the offset from data_rloc itself, but data_loc
81 * stores the offset from event entry.
82 */
83#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
84
85/* Data fetch function type */
86typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
87/* Printing function type */
88typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *);
89
90/* Fetch types */
91enum {
92 FETCH_MTD_reg = 0,
93 FETCH_MTD_stack,
94 FETCH_MTD_retval,
95 FETCH_MTD_memory,
96 FETCH_MTD_symbol,
97 FETCH_MTD_deref,
98 FETCH_MTD_bitfield,
99 FETCH_MTD_END,
100};
101
102/* Fetch type information table */
103struct fetch_type {
104 const char *name; /* Name of type */
105 size_t size; /* Byte size of type */
106 int is_signed; /* Signed flag */
107 print_type_func_t print; /* Print functions */
108 const char *fmt; /* Fromat string */
109 const char *fmttype; /* Name in format file */
110 /* Fetch functions */
111 fetch_func_t fetch[FETCH_MTD_END];
112};
113
114struct fetch_param {
115 fetch_func_t fn;
116 void *data;
117};
118
119struct probe_arg {
120 struct fetch_param fetch;
121 struct fetch_param fetch_size;
122 unsigned int offset; /* Offset from argument entry */
123 const char *name; /* Name of this argument */
124 const char *comm; /* Command of this argument */
125 const struct fetch_type *type; /* Type of this argument */
126};
127
128static inline __kprobes void call_fetch(struct fetch_param *fprm,
129 struct pt_regs *regs, void *dest)
130{
131 return fprm->fn(regs, fprm->data, dest);
132}
133
134/* Check the name is good for event/group/fields */
135static inline int is_good_name(const char *name)
136{
137 if (!isalpha(*name) && *name != '_')
138 return 0;
139 while (*++name != '\0') {
140 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
141 return 0;
142 }
143 return 1;
144}
145
146extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
147 struct probe_arg *parg, bool is_return, bool is_kprobe);
148
149extern int traceprobe_conflict_field_name(const char *name,
150 struct probe_arg *args, int narg);
151
152extern void traceprobe_update_arg(struct probe_arg *arg);
153extern void traceprobe_free_probe_arg(struct probe_arg *arg);
154
155extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset);
156
157extern ssize_t traceprobe_probes_write(struct file *file,
158 const char __user *buffer, size_t count, loff_t *ppos,
159 int (*createfn)(int, char**));
160
161extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
new file mode 100644
index 000000000000..2b36ac68549e
--- /dev/null
+++ b/kernel/trace/trace_uprobe.c
@@ -0,0 +1,788 @@
1/*
2 * uprobes-based tracing events
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * Copyright (C) IBM Corporation, 2010-2012
18 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
19 */
20
21#include <linux/module.h>
22#include <linux/uaccess.h>
23#include <linux/uprobes.h>
24#include <linux/namei.h>
25
26#include "trace_probe.h"
27
28#define UPROBE_EVENT_SYSTEM "uprobes"
29
30/*
31 * uprobe event core functions
32 */
33struct trace_uprobe;
34struct uprobe_trace_consumer {
35 struct uprobe_consumer cons;
36 struct trace_uprobe *tu;
37};
38
39struct trace_uprobe {
40 struct list_head list;
41 struct ftrace_event_class class;
42 struct ftrace_event_call call;
43 struct uprobe_trace_consumer *consumer;
44 struct inode *inode;
45 char *filename;
46 unsigned long offset;
47 unsigned long nhit;
48 unsigned int flags; /* For TP_FLAG_* */
49 ssize_t size; /* trace entry size */
50 unsigned int nr_args;
51 struct probe_arg args[];
52};
53
54#define SIZEOF_TRACE_UPROBE(n) \
55 (offsetof(struct trace_uprobe, args) + \
56 (sizeof(struct probe_arg) * (n)))
57
58static int register_uprobe_event(struct trace_uprobe *tu);
59static void unregister_uprobe_event(struct trace_uprobe *tu);
60
61static DEFINE_MUTEX(uprobe_lock);
62static LIST_HEAD(uprobe_list);
63
64static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
65
66/*
67 * Allocate new trace_uprobe and initialize it (including uprobes).
68 */
69static struct trace_uprobe *
70alloc_trace_uprobe(const char *group, const char *event, int nargs)
71{
72 struct trace_uprobe *tu;
73
74 if (!event || !is_good_name(event))
75 return ERR_PTR(-EINVAL);
76
77 if (!group || !is_good_name(group))
78 return ERR_PTR(-EINVAL);
79
80 tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
81 if (!tu)
82 return ERR_PTR(-ENOMEM);
83
84 tu->call.class = &tu->class;
85 tu->call.name = kstrdup(event, GFP_KERNEL);
86 if (!tu->call.name)
87 goto error;
88
89 tu->class.system = kstrdup(group, GFP_KERNEL);
90 if (!tu->class.system)
91 goto error;
92
93 INIT_LIST_HEAD(&tu->list);
94 return tu;
95
96error:
97 kfree(tu->call.name);
98 kfree(tu);
99
100 return ERR_PTR(-ENOMEM);
101}
102
103static void free_trace_uprobe(struct trace_uprobe *tu)
104{
105 int i;
106
107 for (i = 0; i < tu->nr_args; i++)
108 traceprobe_free_probe_arg(&tu->args[i]);
109
110 iput(tu->inode);
111 kfree(tu->call.class->system);
112 kfree(tu->call.name);
113 kfree(tu->filename);
114 kfree(tu);
115}
116
117static struct trace_uprobe *find_probe_event(const char *event, const char *group)
118{
119 struct trace_uprobe *tu;
120
121 list_for_each_entry(tu, &uprobe_list, list)
122 if (strcmp(tu->call.name, event) == 0 &&
123 strcmp(tu->call.class->system, group) == 0)
124 return tu;
125
126 return NULL;
127}
128
129/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
130static void unregister_trace_uprobe(struct trace_uprobe *tu)
131{
132 list_del(&tu->list);
133 unregister_uprobe_event(tu);
134 free_trace_uprobe(tu);
135}
136
137/* Register a trace_uprobe and probe_event */
138static int register_trace_uprobe(struct trace_uprobe *tu)
139{
140 struct trace_uprobe *old_tp;
141 int ret;
142
143 mutex_lock(&uprobe_lock);
144
145 /* register as an event */
146 old_tp = find_probe_event(tu->call.name, tu->call.class->system);
147 if (old_tp)
148 /* delete old event */
149 unregister_trace_uprobe(old_tp);
150
151 ret = register_uprobe_event(tu);
152 if (ret) {
153 pr_warning("Failed to register probe event(%d)\n", ret);
154 goto end;
155 }
156
157 list_add_tail(&tu->list, &uprobe_list);
158
159end:
160 mutex_unlock(&uprobe_lock);
161
162 return ret;
163}
164
165/*
166 * Argument syntax:
167 * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS]
168 *
169 * - Remove uprobe: -:[GRP/]EVENT
170 */
171static int create_trace_uprobe(int argc, char **argv)
172{
173 struct trace_uprobe *tu;
174 struct inode *inode;
175 char *arg, *event, *group, *filename;
176 char buf[MAX_EVENT_NAME_LEN];
177 struct path path;
178 unsigned long offset;
179 bool is_delete;
180 int i, ret;
181
182 inode = NULL;
183 ret = 0;
184 is_delete = false;
185 event = NULL;
186 group = NULL;
187
188 /* argc must be >= 1 */
189 if (argv[0][0] == '-')
190 is_delete = true;
191 else if (argv[0][0] != 'p') {
192 pr_info("Probe definition must be started with 'p', 'r' or" " '-'.\n");
193 return -EINVAL;
194 }
195
196 if (argv[0][1] == ':') {
197 event = &argv[0][2];
198 arg = strchr(event, '/');
199
200 if (arg) {
201 group = event;
202 event = arg + 1;
203 event[-1] = '\0';
204
205 if (strlen(group) == 0) {
206 pr_info("Group name is not specified\n");
207 return -EINVAL;
208 }
209 }
210 if (strlen(event) == 0) {
211 pr_info("Event name is not specified\n");
212 return -EINVAL;
213 }
214 }
215 if (!group)
216 group = UPROBE_EVENT_SYSTEM;
217
218 if (is_delete) {
219 if (!event) {
220 pr_info("Delete command needs an event name.\n");
221 return -EINVAL;
222 }
223 mutex_lock(&uprobe_lock);
224 tu = find_probe_event(event, group);
225
226 if (!tu) {
227 mutex_unlock(&uprobe_lock);
228 pr_info("Event %s/%s doesn't exist.\n", group, event);
229 return -ENOENT;
230 }
231 /* delete an event */
232 unregister_trace_uprobe(tu);
233 mutex_unlock(&uprobe_lock);
234 return 0;
235 }
236
237 if (argc < 2) {
238 pr_info("Probe point is not specified.\n");
239 return -EINVAL;
240 }
241 if (isdigit(argv[1][0])) {
242 pr_info("probe point must be have a filename.\n");
243 return -EINVAL;
244 }
245 arg = strchr(argv[1], ':');
246 if (!arg)
247 goto fail_address_parse;
248
249 *arg++ = '\0';
250 filename = argv[1];
251 ret = kern_path(filename, LOOKUP_FOLLOW, &path);
252 if (ret)
253 goto fail_address_parse;
254
255 ret = strict_strtoul(arg, 0, &offset);
256 if (ret)
257 goto fail_address_parse;
258
259 inode = igrab(path.dentry->d_inode);
260
261 argc -= 2;
262 argv += 2;
263
264 /* setup a probe */
265 if (!event) {
266 char *tail = strrchr(filename, '/');
267 char *ptr;
268
269 ptr = kstrdup((tail ? tail + 1 : filename), GFP_KERNEL);
270 if (!ptr) {
271 ret = -ENOMEM;
272 goto fail_address_parse;
273 }
274
275 tail = ptr;
276 ptr = strpbrk(tail, ".-_");
277 if (ptr)
278 *ptr = '\0';
279
280 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
281 event = buf;
282 kfree(tail);
283 }
284
285 tu = alloc_trace_uprobe(group, event, argc);
286 if (IS_ERR(tu)) {
287 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
288 ret = PTR_ERR(tu);
289 goto fail_address_parse;
290 }
291 tu->offset = offset;
292 tu->inode = inode;
293 tu->filename = kstrdup(filename, GFP_KERNEL);
294
295 if (!tu->filename) {
296 pr_info("Failed to allocate filename.\n");
297 ret = -ENOMEM;
298 goto error;
299 }
300
301 /* parse arguments */
302 ret = 0;
303 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
304 /* Increment count for freeing args in error case */
305 tu->nr_args++;
306
307 /* Parse argument name */
308 arg = strchr(argv[i], '=');
309 if (arg) {
310 *arg++ = '\0';
311 tu->args[i].name = kstrdup(argv[i], GFP_KERNEL);
312 } else {
313 arg = argv[i];
314 /* If argument name is omitted, set "argN" */
315 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
316 tu->args[i].name = kstrdup(buf, GFP_KERNEL);
317 }
318
319 if (!tu->args[i].name) {
320 pr_info("Failed to allocate argument[%d] name.\n", i);
321 ret = -ENOMEM;
322 goto error;
323 }
324
325 if (!is_good_name(tu->args[i].name)) {
326 pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name);
327 ret = -EINVAL;
328 goto error;
329 }
330
331 if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) {
332 pr_info("Argument[%d] name '%s' conflicts with "
333 "another field.\n", i, argv[i]);
334 ret = -EINVAL;
335 goto error;
336 }
337
338 /* Parse fetch argument */
339 ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false);
340 if (ret) {
341 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
342 goto error;
343 }
344 }
345
346 ret = register_trace_uprobe(tu);
347 if (ret)
348 goto error;
349 return 0;
350
351error:
352 free_trace_uprobe(tu);
353 return ret;
354
355fail_address_parse:
356 if (inode)
357 iput(inode);
358
359 pr_info("Failed to parse address.\n");
360
361 return ret;
362}
363
364static void cleanup_all_probes(void)
365{
366 struct trace_uprobe *tu;
367
368 mutex_lock(&uprobe_lock);
369 while (!list_empty(&uprobe_list)) {
370 tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
371 unregister_trace_uprobe(tu);
372 }
373 mutex_unlock(&uprobe_lock);
374}
375
376/* Probes listing interfaces */
377static void *probes_seq_start(struct seq_file *m, loff_t *pos)
378{
379 mutex_lock(&uprobe_lock);
380 return seq_list_start(&uprobe_list, *pos);
381}
382
383static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
384{
385 return seq_list_next(v, &uprobe_list, pos);
386}
387
388static void probes_seq_stop(struct seq_file *m, void *v)
389{
390 mutex_unlock(&uprobe_lock);
391}
392
393static int probes_seq_show(struct seq_file *m, void *v)
394{
395 struct trace_uprobe *tu = v;
396 int i;
397
398 seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name);
399 seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
400
401 for (i = 0; i < tu->nr_args; i++)
402 seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm);
403
404 seq_printf(m, "\n");
405 return 0;
406}
407
408static const struct seq_operations probes_seq_op = {
409 .start = probes_seq_start,
410 .next = probes_seq_next,
411 .stop = probes_seq_stop,
412 .show = probes_seq_show
413};
414
415static int probes_open(struct inode *inode, struct file *file)
416{
417 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
418 cleanup_all_probes();
419
420 return seq_open(file, &probes_seq_op);
421}
422
423static ssize_t probes_write(struct file *file, const char __user *buffer,
424 size_t count, loff_t *ppos)
425{
426 return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe);
427}
428
429static const struct file_operations uprobe_events_ops = {
430 .owner = THIS_MODULE,
431 .open = probes_open,
432 .read = seq_read,
433 .llseek = seq_lseek,
434 .release = seq_release,
435 .write = probes_write,
436};
437
438/* Probes profiling interfaces */
439static int probes_profile_seq_show(struct seq_file *m, void *v)
440{
441 struct trace_uprobe *tu = v;
442
443 seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit);
444 return 0;
445}
446
447static const struct seq_operations profile_seq_op = {
448 .start = probes_seq_start,
449 .next = probes_seq_next,
450 .stop = probes_seq_stop,
451 .show = probes_profile_seq_show
452};
453
454static int profile_open(struct inode *inode, struct file *file)
455{
456 return seq_open(file, &profile_seq_op);
457}
458
459static const struct file_operations uprobe_profile_ops = {
460 .owner = THIS_MODULE,
461 .open = profile_open,
462 .read = seq_read,
463 .llseek = seq_lseek,
464 .release = seq_release,
465};
466
467/* uprobe handler */
468static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
469{
470 struct uprobe_trace_entry_head *entry;
471 struct ring_buffer_event *event;
472 struct ring_buffer *buffer;
473 u8 *data;
474 int size, i, pc;
475 unsigned long irq_flags;
476 struct ftrace_event_call *call = &tu->call;
477
478 tu->nhit++;
479
480 local_save_flags(irq_flags);
481 pc = preempt_count();
482
483 size = sizeof(*entry) + tu->size;
484
485 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
486 size, irq_flags, pc);
487 if (!event)
488 return;
489
490 entry = ring_buffer_event_data(event);
491 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
492 data = (u8 *)&entry[1];
493 for (i = 0; i < tu->nr_args; i++)
494 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
495
496 if (!filter_current_check_discard(buffer, call, entry, event))
497 trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
498}
499
500/* Event entry printers */
501static enum print_line_t
502print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
503{
504 struct uprobe_trace_entry_head *field;
505 struct trace_seq *s = &iter->seq;
506 struct trace_uprobe *tu;
507 u8 *data;
508 int i;
509
510 field = (struct uprobe_trace_entry_head *)iter->ent;
511 tu = container_of(event, struct trace_uprobe, call.event);
512
513 if (!trace_seq_printf(s, "%s: (", tu->call.name))
514 goto partial;
515
516 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
517 goto partial;
518
519 if (!trace_seq_puts(s, ")"))
520 goto partial;
521
522 data = (u8 *)&field[1];
523 for (i = 0; i < tu->nr_args; i++) {
524 if (!tu->args[i].type->print(s, tu->args[i].name,
525 data + tu->args[i].offset, field))
526 goto partial;
527 }
528
529 if (trace_seq_puts(s, "\n"))
530 return TRACE_TYPE_HANDLED;
531
532partial:
533 return TRACE_TYPE_PARTIAL_LINE;
534}
535
536static int probe_event_enable(struct trace_uprobe *tu, int flag)
537{
538 struct uprobe_trace_consumer *utc;
539 int ret = 0;
540
541 if (!tu->inode || tu->consumer)
542 return -EINTR;
543
544 utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL);
545 if (!utc)
546 return -EINTR;
547
548 utc->cons.handler = uprobe_dispatcher;
549 utc->cons.filter = NULL;
550 ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
551 if (ret) {
552 kfree(utc);
553 return ret;
554 }
555
556 tu->flags |= flag;
557 utc->tu = tu;
558 tu->consumer = utc;
559
560 return 0;
561}
562
563static void probe_event_disable(struct trace_uprobe *tu, int flag)
564{
565 if (!tu->inode || !tu->consumer)
566 return;
567
568 uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons);
569 tu->flags &= ~flag;
570 kfree(tu->consumer);
571 tu->consumer = NULL;
572}
573
574static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
575{
576 int ret, i;
577 struct uprobe_trace_entry_head field;
578 struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data;
579
580 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
581 /* Set argument names as fields */
582 for (i = 0; i < tu->nr_args; i++) {
583 ret = trace_define_field(event_call, tu->args[i].type->fmttype,
584 tu->args[i].name,
585 sizeof(field) + tu->args[i].offset,
586 tu->args[i].type->size,
587 tu->args[i].type->is_signed,
588 FILTER_OTHER);
589
590 if (ret)
591 return ret;
592 }
593 return 0;
594}
595
596#define LEN_OR_ZERO (len ? len - pos : 0)
597static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
598{
599 const char *fmt, *arg;
600 int i;
601 int pos = 0;
602
603 fmt = "(%lx)";
604 arg = "REC->" FIELD_STRING_IP;
605
606 /* When len=0, we just calculate the needed length */
607
608 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
609
610 for (i = 0; i < tu->nr_args; i++) {
611 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
612 tu->args[i].name, tu->args[i].type->fmt);
613 }
614
615 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
616
617 for (i = 0; i < tu->nr_args; i++) {
618 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
619 tu->args[i].name);
620 }
621
622 return pos; /* return the length of print_fmt */
623}
624#undef LEN_OR_ZERO
625
626static int set_print_fmt(struct trace_uprobe *tu)
627{
628 char *print_fmt;
629 int len;
630
631 /* First: called with 0 length to calculate the needed length */
632 len = __set_print_fmt(tu, NULL, 0);
633 print_fmt = kmalloc(len + 1, GFP_KERNEL);
634 if (!print_fmt)
635 return -ENOMEM;
636
637 /* Second: actually write the @print_fmt */
638 __set_print_fmt(tu, print_fmt, len + 1);
639 tu->call.print_fmt = print_fmt;
640
641 return 0;
642}
643
644#ifdef CONFIG_PERF_EVENTS
645/* uprobe profile handler */
646static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
647{
648 struct ftrace_event_call *call = &tu->call;
649 struct uprobe_trace_entry_head *entry;
650 struct hlist_head *head;
651 u8 *data;
652 int size, __size, i;
653 int rctx;
654
655 __size = sizeof(*entry) + tu->size;
656 size = ALIGN(__size + sizeof(u32), sizeof(u64));
657 size -= sizeof(u32);
658 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
659 return;
660
661 preempt_disable();
662
663 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
664 if (!entry)
665 goto out;
666
667 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
668 data = (u8 *)&entry[1];
669 for (i = 0; i < tu->nr_args; i++)
670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
671
672 head = this_cpu_ptr(call->perf_events);
673 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
674
675 out:
676 preempt_enable();
677}
678#endif /* CONFIG_PERF_EVENTS */
679
680static
681int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
682{
683 struct trace_uprobe *tu = (struct trace_uprobe *)event->data;
684
685 switch (type) {
686 case TRACE_REG_REGISTER:
687 return probe_event_enable(tu, TP_FLAG_TRACE);
688
689 case TRACE_REG_UNREGISTER:
690 probe_event_disable(tu, TP_FLAG_TRACE);
691 return 0;
692
693#ifdef CONFIG_PERF_EVENTS
694 case TRACE_REG_PERF_REGISTER:
695 return probe_event_enable(tu, TP_FLAG_PROFILE);
696
697 case TRACE_REG_PERF_UNREGISTER:
698 probe_event_disable(tu, TP_FLAG_PROFILE);
699 return 0;
700#endif
701 default:
702 return 0;
703 }
704 return 0;
705}
706
707static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
708{
709 struct uprobe_trace_consumer *utc;
710 struct trace_uprobe *tu;
711
712 utc = container_of(con, struct uprobe_trace_consumer, cons);
713 tu = utc->tu;
714 if (!tu || tu->consumer != utc)
715 return 0;
716
717 if (tu->flags & TP_FLAG_TRACE)
718 uprobe_trace_func(tu, regs);
719
720#ifdef CONFIG_PERF_EVENTS
721 if (tu->flags & TP_FLAG_PROFILE)
722 uprobe_perf_func(tu, regs);
723#endif
724 return 0;
725}
726
727static struct trace_event_functions uprobe_funcs = {
728 .trace = print_uprobe_event
729};
730
731static int register_uprobe_event(struct trace_uprobe *tu)
732{
733 struct ftrace_event_call *call = &tu->call;
734 int ret;
735
736 /* Initialize ftrace_event_call */
737 INIT_LIST_HEAD(&call->class->fields);
738 call->event.funcs = &uprobe_funcs;
739 call->class->define_fields = uprobe_event_define_fields;
740
741 if (set_print_fmt(tu) < 0)
742 return -ENOMEM;
743
744 ret = register_ftrace_event(&call->event);
745 if (!ret) {
746 kfree(call->print_fmt);
747 return -ENODEV;
748 }
749 call->flags = 0;
750 call->class->reg = trace_uprobe_register;
751 call->data = tu;
752 ret = trace_add_event_call(call);
753
754 if (ret) {
755 pr_info("Failed to register uprobe event: %s\n", call->name);
756 kfree(call->print_fmt);
757 unregister_ftrace_event(&call->event);
758 }
759
760 return ret;
761}
762
763static void unregister_uprobe_event(struct trace_uprobe *tu)
764{
765 /* tu->event is unregistered in trace_remove_event_call() */
766 trace_remove_event_call(&tu->call);
767 kfree(tu->call.print_fmt);
768 tu->call.print_fmt = NULL;
769}
770
771/* Make a trace interface for controling probe points */
772static __init int init_uprobe_trace(void)
773{
774 struct dentry *d_tracer;
775
776 d_tracer = tracing_init_dentry();
777 if (!d_tracer)
778 return 0;
779
780 trace_create_file("uprobe_events", 0644, d_tracer,
781 NULL, &uprobe_events_ops);
782 /* Profile interface */
783 trace_create_file("uprobe_profile", 0444, d_tracer,
784 NULL, &uprobe_profile_ops);
785 return 0;
786}
787
788fs_initcall(init_uprobe_trace);
diff --git a/mm/memory.c b/mm/memory.c
index 1e77da6d82c1..e40f6759ba98 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1307,6 +1307,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
1307 if (end <= vma->vm_start) 1307 if (end <= vma->vm_start)
1308 return; 1308 return;
1309 1309
1310 if (vma->vm_file)
1311 uprobe_munmap(vma, start, end);
1312
1310 if (unlikely(is_pfn_mapping(vma))) 1313 if (unlikely(is_pfn_mapping(vma)))
1311 untrack_pfn_vma(vma, 0, 0); 1314 untrack_pfn_vma(vma, 0, 0);
1312 1315
diff --git a/mm/mmap.c b/mm/mmap.c
index 69a1889f3790..e8dcfc7de866 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -30,6 +30,7 @@
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/audit.h> 31#include <linux/audit.h>
32#include <linux/khugepaged.h> 32#include <linux/khugepaged.h>
33#include <linux/uprobes.h>
33 34
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35#include <asm/cacheflush.h> 36#include <asm/cacheflush.h>
@@ -546,8 +547,15 @@ again: remove_next = 1 + (end > next->vm_end);
546 547
547 if (file) { 548 if (file) {
548 mapping = file->f_mapping; 549 mapping = file->f_mapping;
549 if (!(vma->vm_flags & VM_NONLINEAR)) 550 if (!(vma->vm_flags & VM_NONLINEAR)) {
550 root = &mapping->i_mmap; 551 root = &mapping->i_mmap;
552 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
553
554 if (adjust_next)
555 uprobe_munmap(next, next->vm_start,
556 next->vm_end);
557 }
558
551 mutex_lock(&mapping->i_mmap_mutex); 559 mutex_lock(&mapping->i_mmap_mutex);
552 if (insert) { 560 if (insert) {
553 /* 561 /*
@@ -617,8 +625,16 @@ again: remove_next = 1 + (end > next->vm_end);
617 if (mapping) 625 if (mapping)
618 mutex_unlock(&mapping->i_mmap_mutex); 626 mutex_unlock(&mapping->i_mmap_mutex);
619 627
628 if (root) {
629 uprobe_mmap(vma);
630
631 if (adjust_next)
632 uprobe_mmap(next);
633 }
634
620 if (remove_next) { 635 if (remove_next) {
621 if (file) { 636 if (file) {
637 uprobe_munmap(next, next->vm_start, next->vm_end);
622 fput(file); 638 fput(file);
623 if (next->vm_flags & VM_EXECUTABLE) 639 if (next->vm_flags & VM_EXECUTABLE)
624 removed_exe_file_vma(mm); 640 removed_exe_file_vma(mm);
@@ -638,6 +654,8 @@ again: remove_next = 1 + (end > next->vm_end);
638 goto again; 654 goto again;
639 } 655 }
640 } 656 }
657 if (insert && file)
658 uprobe_mmap(insert);
641 659
642 validate_mm(mm); 660 validate_mm(mm);
643 661
@@ -1371,6 +1389,11 @@ out:
1371 mm->locked_vm += (len >> PAGE_SHIFT); 1389 mm->locked_vm += (len >> PAGE_SHIFT);
1372 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) 1390 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1373 make_pages_present(addr, addr + len); 1391 make_pages_present(addr, addr + len);
1392
1393 if (file && uprobe_mmap(vma))
1394 /* matching probes but cannot insert */
1395 goto unmap_and_free_vma;
1396
1374 return addr; 1397 return addr;
1375 1398
1376unmap_and_free_vma: 1399unmap_and_free_vma:
@@ -2358,6 +2381,10 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2358 if ((vma->vm_flags & VM_ACCOUNT) && 2381 if ((vma->vm_flags & VM_ACCOUNT) &&
2359 security_vm_enough_memory_mm(mm, vma_pages(vma))) 2382 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2360 return -ENOMEM; 2383 return -ENOMEM;
2384
2385 if (vma->vm_file && uprobe_mmap(vma))
2386 return -EINVAL;
2387
2361 vma_link(mm, vma, prev, rb_link, rb_parent); 2388 vma_link(mm, vma, prev, rb_link, rb_parent);
2362 return 0; 2389 return 0;
2363} 2390}
@@ -2427,6 +2454,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2427 new_vma->vm_pgoff = pgoff; 2454 new_vma->vm_pgoff = pgoff;
2428 if (new_vma->vm_file) { 2455 if (new_vma->vm_file) {
2429 get_file(new_vma->vm_file); 2456 get_file(new_vma->vm_file);
2457
2458 if (uprobe_mmap(new_vma))
2459 goto out_free_mempol;
2460
2430 if (vma->vm_flags & VM_EXECUTABLE) 2461 if (vma->vm_flags & VM_EXECUTABLE)
2431 added_exe_file_vma(mm); 2462 added_exe_file_vma(mm);
2432 } 2463 }
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 2780d9ce48bf..b715cb71592b 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -77,7 +77,8 @@ OPTIONS
77 77
78-F:: 78-F::
79--funcs:: 79--funcs::
80 Show available functions in given module or kernel. 80 Show available functions in given module or kernel. With -x/--exec,
81 can also list functions in a user space executable / shared library.
81 82
82--filter=FILTER:: 83--filter=FILTER::
83 (Only for --vars and --funcs) Set filter. FILTER is a combination of glob 84 (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
@@ -98,6 +99,15 @@ OPTIONS
98--max-probes:: 99--max-probes::
99 Set the maximum number of probe points for an event. Default is 128. 100 Set the maximum number of probe points for an event. Default is 128.
100 101
102-x::
103--exec=PATH::
104 Specify path to the executable or shared library file for user
105 space tracing. Can also be used with --funcs option.
106
107In absence of -m/-x options, perf probe checks if the first argument after
108the options is an absolute path name. If its an absolute path, perf probe
109uses it as a target module/target user space binary to probe.
110
101PROBE SYNTAX 111PROBE SYNTAX
102------------ 112------------
103Probe points are defined by following syntax. 113Probe points are defined by following syntax.
@@ -182,6 +192,13 @@ Delete all probes on schedule().
182 192
183 ./perf probe --del='schedule*' 193 ./perf probe --del='schedule*'
184 194
195Add probes at zfree() function on /bin/zsh
196
197 ./perf probe -x /bin/zsh zfree or ./perf probe /bin/zsh zfree
198
199Add probes at malloc() function on libc
200
201 ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc
185 202
186SEE ALSO 203SEE ALSO
187-------- 204--------
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 4935c09dd5b5..e215ae61b2ae 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -54,6 +54,7 @@ static struct {
54 bool show_ext_vars; 54 bool show_ext_vars;
55 bool show_funcs; 55 bool show_funcs;
56 bool mod_events; 56 bool mod_events;
57 bool uprobes;
57 int nevents; 58 int nevents;
58 struct perf_probe_event events[MAX_PROBES]; 59 struct perf_probe_event events[MAX_PROBES];
59 struct strlist *dellist; 60 struct strlist *dellist;
@@ -75,6 +76,8 @@ static int parse_probe_event(const char *str)
75 return -1; 76 return -1;
76 } 77 }
77 78
79 pev->uprobes = params.uprobes;
80
78 /* Parse a perf-probe command into event */ 81 /* Parse a perf-probe command into event */
79 ret = parse_perf_probe_command(str, pev); 82 ret = parse_perf_probe_command(str, pev);
80 pr_debug("%d arguments\n", pev->nargs); 83 pr_debug("%d arguments\n", pev->nargs);
@@ -82,21 +85,58 @@ static int parse_probe_event(const char *str)
82 return ret; 85 return ret;
83} 86}
84 87
88static int set_target(const char *ptr)
89{
90 int found = 0;
91 const char *buf;
92
93 /*
94 * The first argument after options can be an absolute path
95 * to an executable / library or kernel module.
96 *
97 * TODO: Support relative path, and $PATH, $LD_LIBRARY_PATH,
98 * short module name.
99 */
100 if (!params.target && ptr && *ptr == '/') {
101 params.target = ptr;
102 found = 1;
103 buf = ptr + (strlen(ptr) - 3);
104
105 if (strcmp(buf, ".ko"))
106 params.uprobes = true;
107
108 }
109
110 return found;
111}
112
85static int parse_probe_event_argv(int argc, const char **argv) 113static int parse_probe_event_argv(int argc, const char **argv)
86{ 114{
87 int i, len, ret; 115 int i, len, ret, found_target;
88 char *buf; 116 char *buf;
89 117
118 found_target = set_target(argv[0]);
119 if (found_target && argc == 1)
120 return 0;
121
90 /* Bind up rest arguments */ 122 /* Bind up rest arguments */
91 len = 0; 123 len = 0;
92 for (i = 0; i < argc; i++) 124 for (i = 0; i < argc; i++) {
125 if (i == 0 && found_target)
126 continue;
127
93 len += strlen(argv[i]) + 1; 128 len += strlen(argv[i]) + 1;
129 }
94 buf = zalloc(len + 1); 130 buf = zalloc(len + 1);
95 if (buf == NULL) 131 if (buf == NULL)
96 return -ENOMEM; 132 return -ENOMEM;
97 len = 0; 133 len = 0;
98 for (i = 0; i < argc; i++) 134 for (i = 0; i < argc; i++) {
135 if (i == 0 && found_target)
136 continue;
137
99 len += sprintf(&buf[len], "%s ", argv[i]); 138 len += sprintf(&buf[len], "%s ", argv[i]);
139 }
100 params.mod_events = true; 140 params.mod_events = true;
101 ret = parse_probe_event(buf); 141 ret = parse_probe_event(buf);
102 free(buf); 142 free(buf);
@@ -125,6 +165,28 @@ static int opt_del_probe_event(const struct option *opt __used,
125 return 0; 165 return 0;
126} 166}
127 167
168static int opt_set_target(const struct option *opt, const char *str,
169 int unset __used)
170{
171 int ret = -ENOENT;
172
173 if (str && !params.target) {
174 if (!strcmp(opt->long_name, "exec"))
175 params.uprobes = true;
176#ifdef DWARF_SUPPORT
177 else if (!strcmp(opt->long_name, "module"))
178 params.uprobes = false;
179#endif
180 else
181 return ret;
182
183 params.target = str;
184 ret = 0;
185 }
186
187 return ret;
188}
189
128#ifdef DWARF_SUPPORT 190#ifdef DWARF_SUPPORT
129static int opt_show_lines(const struct option *opt __used, 191static int opt_show_lines(const struct option *opt __used,
130 const char *str, int unset __used) 192 const char *str, int unset __used)
@@ -246,9 +308,9 @@ static const struct option options[] = {
246 "file", "vmlinux pathname"), 308 "file", "vmlinux pathname"),
247 OPT_STRING('s', "source", &symbol_conf.source_prefix, 309 OPT_STRING('s', "source", &symbol_conf.source_prefix,
248 "directory", "path to kernel source"), 310 "directory", "path to kernel source"),
249 OPT_STRING('m', "module", &params.target, 311 OPT_CALLBACK('m', "module", NULL, "modname|path",
250 "modname|path", 312 "target module name (for online) or path (for offline)",
251 "target module name (for online) or path (for offline)"), 313 opt_set_target),
252#endif 314#endif
253 OPT__DRY_RUN(&probe_event_dry_run), 315 OPT__DRY_RUN(&probe_event_dry_run),
254 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 316 OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
@@ -260,6 +322,8 @@ static const struct option options[] = {
260 "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n" 322 "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
261 "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)", 323 "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)",
262 opt_set_filter), 324 opt_set_filter),
325 OPT_CALLBACK('x', "exec", NULL, "executable|path",
326 "target executable name or path", opt_set_target),
263 OPT_END() 327 OPT_END()
264}; 328};
265 329
@@ -310,6 +374,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
310 pr_err(" Error: Don't use --list with --funcs.\n"); 374 pr_err(" Error: Don't use --list with --funcs.\n");
311 usage_with_options(probe_usage, options); 375 usage_with_options(probe_usage, options);
312 } 376 }
377 if (params.uprobes) {
378 pr_warning(" Error: Don't use --list with --exec.\n");
379 usage_with_options(probe_usage, options);
380 }
313 ret = show_perf_probe_events(); 381 ret = show_perf_probe_events();
314 if (ret < 0) 382 if (ret < 0)
315 pr_err(" Error: Failed to show event list. (%d)\n", 383 pr_err(" Error: Failed to show event list. (%d)\n",
@@ -333,8 +401,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
333 if (!params.filter) 401 if (!params.filter)
334 params.filter = strfilter__new(DEFAULT_FUNC_FILTER, 402 params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
335 NULL); 403 NULL);
336 ret = show_available_funcs(params.target, 404 ret = show_available_funcs(params.target, params.filter,
337 params.filter); 405 params.uprobes);
338 strfilter__delete(params.filter); 406 strfilter__delete(params.filter);
339 if (ret < 0) 407 if (ret < 0)
340 pr_err(" Error: Failed to show functions." 408 pr_err(" Error: Failed to show functions."
@@ -343,7 +411,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
343 } 411 }
344 412
345#ifdef DWARF_SUPPORT 413#ifdef DWARF_SUPPORT
346 if (params.show_lines) { 414 if (params.show_lines && !params.uprobes) {
347 if (params.mod_events) { 415 if (params.mod_events) {
348 pr_err(" Error: Don't use --line with" 416 pr_err(" Error: Don't use --line with"
349 " --add/--del.\n"); 417 " --add/--del.\n");
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8a8ee64e72d1..59dccc98b554 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -44,6 +44,7 @@
44#include "trace-event.h" /* For __unused */ 44#include "trace-event.h" /* For __unused */
45#include "probe-event.h" 45#include "probe-event.h"
46#include "probe-finder.h" 46#include "probe-finder.h"
47#include "session.h"
47 48
48#define MAX_CMDLEN 256 49#define MAX_CMDLEN 256
49#define MAX_PROBE_ARGS 128 50#define MAX_PROBE_ARGS 128
@@ -70,6 +71,8 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
70} 71}
71 72
72static char *synthesize_perf_probe_point(struct perf_probe_point *pp); 73static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
74static int convert_name_to_addr(struct perf_probe_event *pev,
75 const char *exec);
73static struct machine machine; 76static struct machine machine;
74 77
75/* Initialize symbol maps and path of vmlinux/modules */ 78/* Initialize symbol maps and path of vmlinux/modules */
@@ -170,6 +173,34 @@ const char *kernel_get_module_path(const char *module)
170 return (dso) ? dso->long_name : NULL; 173 return (dso) ? dso->long_name : NULL;
171} 174}
172 175
176static int init_user_exec(void)
177{
178 int ret = 0;
179
180 symbol_conf.try_vmlinux_path = false;
181 symbol_conf.sort_by_name = true;
182 ret = symbol__init();
183
184 if (ret < 0)
185 pr_debug("Failed to init symbol map.\n");
186
187 return ret;
188}
189
190static int convert_to_perf_probe_point(struct probe_trace_point *tp,
191 struct perf_probe_point *pp)
192{
193 pp->function = strdup(tp->symbol);
194
195 if (pp->function == NULL)
196 return -ENOMEM;
197
198 pp->offset = tp->offset;
199 pp->retprobe = tp->retprobe;
200
201 return 0;
202}
203
173#ifdef DWARF_SUPPORT 204#ifdef DWARF_SUPPORT
174/* Open new debuginfo of given module */ 205/* Open new debuginfo of given module */
175static struct debuginfo *open_debuginfo(const char *module) 206static struct debuginfo *open_debuginfo(const char *module)
@@ -224,10 +255,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
224 if (ret <= 0) { 255 if (ret <= 0) {
225 pr_debug("Failed to find corresponding probes from " 256 pr_debug("Failed to find corresponding probes from "
226 "debuginfo. Use kprobe event information.\n"); 257 "debuginfo. Use kprobe event information.\n");
227 pp->function = strdup(tp->symbol); 258 return convert_to_perf_probe_point(tp, pp);
228 if (pp->function == NULL)
229 return -ENOMEM;
230 pp->offset = tp->offset;
231 } 259 }
232 pp->retprobe = tp->retprobe; 260 pp->retprobe = tp->retprobe;
233 261
@@ -275,9 +303,20 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
275 int max_tevs, const char *target) 303 int max_tevs, const char *target)
276{ 304{
277 bool need_dwarf = perf_probe_event_need_dwarf(pev); 305 bool need_dwarf = perf_probe_event_need_dwarf(pev);
278 struct debuginfo *dinfo = open_debuginfo(target); 306 struct debuginfo *dinfo;
279 int ntevs, ret = 0; 307 int ntevs, ret = 0;
280 308
309 if (pev->uprobes) {
310 if (need_dwarf) {
311 pr_warning("Debuginfo-analysis is not yet supported"
312 " with -x/--exec option.\n");
313 return -ENOSYS;
314 }
315 return convert_name_to_addr(pev, target);
316 }
317
318 dinfo = open_debuginfo(target);
319
281 if (!dinfo) { 320 if (!dinfo) {
282 if (need_dwarf) { 321 if (need_dwarf) {
283 pr_warning("Failed to open debuginfo file.\n"); 322 pr_warning("Failed to open debuginfo file.\n");
@@ -603,23 +642,22 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
603 pr_err("Failed to find symbol %s in kernel.\n", tp->symbol); 642 pr_err("Failed to find symbol %s in kernel.\n", tp->symbol);
604 return -ENOENT; 643 return -ENOENT;
605 } 644 }
606 pp->function = strdup(tp->symbol);
607 if (pp->function == NULL)
608 return -ENOMEM;
609 pp->offset = tp->offset;
610 pp->retprobe = tp->retprobe;
611 645
612 return 0; 646 return convert_to_perf_probe_point(tp, pp);
613} 647}
614 648
615static int try_to_find_probe_trace_events(struct perf_probe_event *pev, 649static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
616 struct probe_trace_event **tevs __unused, 650 struct probe_trace_event **tevs __unused,
617 int max_tevs __unused, const char *mod __unused) 651 int max_tevs __unused, const char *target)
618{ 652{
619 if (perf_probe_event_need_dwarf(pev)) { 653 if (perf_probe_event_need_dwarf(pev)) {
620 pr_warning("Debuginfo-analysis is not supported.\n"); 654 pr_warning("Debuginfo-analysis is not supported.\n");
621 return -ENOSYS; 655 return -ENOSYS;
622 } 656 }
657
658 if (pev->uprobes)
659 return convert_name_to_addr(pev, target);
660
623 return 0; 661 return 0;
624} 662}
625 663
@@ -1341,11 +1379,18 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
1341 if (buf == NULL) 1379 if (buf == NULL)
1342 return NULL; 1380 return NULL;
1343 1381
1344 len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu", 1382 if (tev->uprobes)
1345 tp->retprobe ? 'r' : 'p', 1383 len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s:%s",
1346 tev->group, tev->event, 1384 tp->retprobe ? 'r' : 'p',
1347 tp->module ?: "", tp->module ? ":" : "", 1385 tev->group, tev->event,
1348 tp->symbol, tp->offset); 1386 tp->module, tp->symbol);
1387 else
1388 len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu",
1389 tp->retprobe ? 'r' : 'p',
1390 tev->group, tev->event,
1391 tp->module ?: "", tp->module ? ":" : "",
1392 tp->symbol, tp->offset);
1393
1349 if (len <= 0) 1394 if (len <= 0)
1350 goto error; 1395 goto error;
1351 1396
@@ -1364,7 +1409,7 @@ error:
1364} 1409}
1365 1410
1366static int convert_to_perf_probe_event(struct probe_trace_event *tev, 1411static int convert_to_perf_probe_event(struct probe_trace_event *tev,
1367 struct perf_probe_event *pev) 1412 struct perf_probe_event *pev, bool is_kprobe)
1368{ 1413{
1369 char buf[64] = ""; 1414 char buf[64] = "";
1370 int i, ret; 1415 int i, ret;
@@ -1376,7 +1421,11 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev,
1376 return -ENOMEM; 1421 return -ENOMEM;
1377 1422
1378 /* Convert trace_point to probe_point */ 1423 /* Convert trace_point to probe_point */
1379 ret = kprobe_convert_to_perf_probe(&tev->point, &pev->point); 1424 if (is_kprobe)
1425 ret = kprobe_convert_to_perf_probe(&tev->point, &pev->point);
1426 else
1427 ret = convert_to_perf_probe_point(&tev->point, &pev->point);
1428
1380 if (ret < 0) 1429 if (ret < 0)
1381 return ret; 1430 return ret;
1382 1431
@@ -1472,7 +1521,26 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
1472 memset(tev, 0, sizeof(*tev)); 1521 memset(tev, 0, sizeof(*tev));
1473} 1522}
1474 1523
1475static int open_kprobe_events(bool readwrite) 1524static void print_warn_msg(const char *file, bool is_kprobe)
1525{
1526
1527 if (errno == ENOENT) {
1528 const char *config;
1529
1530 if (!is_kprobe)
1531 config = "CONFIG_UPROBE_EVENTS";
1532 else
1533 config = "CONFIG_KPROBE_EVENTS";
1534
1535 pr_warning("%s file does not exist - please rebuild kernel"
1536 " with %s.\n", file, config);
1537 } else
1538 pr_warning("Failed to open %s file: %s\n", file,
1539 strerror(errno));
1540}
1541
1542static int open_probe_events(const char *trace_file, bool readwrite,
1543 bool is_kprobe)
1476{ 1544{
1477 char buf[PATH_MAX]; 1545 char buf[PATH_MAX];
1478 const char *__debugfs; 1546 const char *__debugfs;
@@ -1484,27 +1552,31 @@ static int open_kprobe_events(bool readwrite)
1484 return -ENOENT; 1552 return -ENOENT;
1485 } 1553 }
1486 1554
1487 ret = e_snprintf(buf, PATH_MAX, "%stracing/kprobe_events", __debugfs); 1555 ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file);
1488 if (ret >= 0) { 1556 if (ret >= 0) {
1489 pr_debug("Opening %s write=%d\n", buf, readwrite); 1557 pr_debug("Opening %s write=%d\n", buf, readwrite);
1490 if (readwrite && !probe_event_dry_run) 1558 if (readwrite && !probe_event_dry_run)
1491 ret = open(buf, O_RDWR, O_APPEND); 1559 ret = open(buf, O_RDWR, O_APPEND);
1492 else 1560 else
1493 ret = open(buf, O_RDONLY, 0); 1561 ret = open(buf, O_RDONLY, 0);
1494 }
1495 1562
1496 if (ret < 0) { 1563 if (ret < 0)
1497 if (errno == ENOENT) 1564 print_warn_msg(buf, is_kprobe);
1498 pr_warning("kprobe_events file does not exist - please"
1499 " rebuild kernel with CONFIG_KPROBE_EVENT.\n");
1500 else
1501 pr_warning("Failed to open kprobe_events file: %s\n",
1502 strerror(errno));
1503 } 1565 }
1504 return ret; 1566 return ret;
1505} 1567}
1506 1568
1507/* Get raw string list of current kprobe_events */ 1569static int open_kprobe_events(bool readwrite)
1570{
1571 return open_probe_events("tracing/kprobe_events", readwrite, true);
1572}
1573
1574static int open_uprobe_events(bool readwrite)
1575{
1576 return open_probe_events("tracing/uprobe_events", readwrite, false);
1577}
1578
1579/* Get raw string list of current kprobe_events or uprobe_events */
1508static struct strlist *get_probe_trace_command_rawlist(int fd) 1580static struct strlist *get_probe_trace_command_rawlist(int fd)
1509{ 1581{
1510 int ret, idx; 1582 int ret, idx;
@@ -1569,36 +1641,26 @@ static int show_perf_probe_event(struct perf_probe_event *pev)
1569 return ret; 1641 return ret;
1570} 1642}
1571 1643
1572/* List up current perf-probe events */ 1644static int __show_perf_probe_events(int fd, bool is_kprobe)
1573int show_perf_probe_events(void)
1574{ 1645{
1575 int fd, ret; 1646 int ret = 0;
1576 struct probe_trace_event tev; 1647 struct probe_trace_event tev;
1577 struct perf_probe_event pev; 1648 struct perf_probe_event pev;
1578 struct strlist *rawlist; 1649 struct strlist *rawlist;
1579 struct str_node *ent; 1650 struct str_node *ent;
1580 1651
1581 setup_pager();
1582 ret = init_vmlinux();
1583 if (ret < 0)
1584 return ret;
1585
1586 memset(&tev, 0, sizeof(tev)); 1652 memset(&tev, 0, sizeof(tev));
1587 memset(&pev, 0, sizeof(pev)); 1653 memset(&pev, 0, sizeof(pev));
1588 1654
1589 fd = open_kprobe_events(false);
1590 if (fd < 0)
1591 return fd;
1592
1593 rawlist = get_probe_trace_command_rawlist(fd); 1655 rawlist = get_probe_trace_command_rawlist(fd);
1594 close(fd);
1595 if (!rawlist) 1656 if (!rawlist)
1596 return -ENOENT; 1657 return -ENOENT;
1597 1658
1598 strlist__for_each(ent, rawlist) { 1659 strlist__for_each(ent, rawlist) {
1599 ret = parse_probe_trace_command(ent->s, &tev); 1660 ret = parse_probe_trace_command(ent->s, &tev);
1600 if (ret >= 0) { 1661 if (ret >= 0) {
1601 ret = convert_to_perf_probe_event(&tev, &pev); 1662 ret = convert_to_perf_probe_event(&tev, &pev,
1663 is_kprobe);
1602 if (ret >= 0) 1664 if (ret >= 0)
1603 ret = show_perf_probe_event(&pev); 1665 ret = show_perf_probe_event(&pev);
1604 } 1666 }
@@ -1612,6 +1674,33 @@ int show_perf_probe_events(void)
1612 return ret; 1674 return ret;
1613} 1675}
1614 1676
1677/* List up current perf-probe events */
1678int show_perf_probe_events(void)
1679{
1680 int fd, ret;
1681
1682 setup_pager();
1683 fd = open_kprobe_events(false);
1684
1685 if (fd < 0)
1686 return fd;
1687
1688 ret = init_vmlinux();
1689 if (ret < 0)
1690 return ret;
1691
1692 ret = __show_perf_probe_events(fd, true);
1693 close(fd);
1694
1695 fd = open_uprobe_events(false);
1696 if (fd >= 0) {
1697 ret = __show_perf_probe_events(fd, false);
1698 close(fd);
1699 }
1700
1701 return ret;
1702}
1703
1615/* Get current perf-probe event names */ 1704/* Get current perf-probe event names */
1616static struct strlist *get_probe_trace_event_names(int fd, bool include_group) 1705static struct strlist *get_probe_trace_event_names(int fd, bool include_group)
1617{ 1706{
@@ -1717,7 +1806,11 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
1717 const char *event, *group; 1806 const char *event, *group;
1718 struct strlist *namelist; 1807 struct strlist *namelist;
1719 1808
1720 fd = open_kprobe_events(true); 1809 if (pev->uprobes)
1810 fd = open_uprobe_events(true);
1811 else
1812 fd = open_kprobe_events(true);
1813
1721 if (fd < 0) 1814 if (fd < 0)
1722 return fd; 1815 return fd;
1723 /* Get current event names */ 1816 /* Get current event names */
@@ -1829,6 +1922,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1829 tev->point.offset = pev->point.offset; 1922 tev->point.offset = pev->point.offset;
1830 tev->point.retprobe = pev->point.retprobe; 1923 tev->point.retprobe = pev->point.retprobe;
1831 tev->nargs = pev->nargs; 1924 tev->nargs = pev->nargs;
1925 tev->uprobes = pev->uprobes;
1926
1832 if (tev->nargs) { 1927 if (tev->nargs) {
1833 tev->args = zalloc(sizeof(struct probe_trace_arg) 1928 tev->args = zalloc(sizeof(struct probe_trace_arg)
1834 * tev->nargs); 1929 * tev->nargs);
@@ -1859,6 +1954,9 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1859 } 1954 }
1860 } 1955 }
1861 1956
1957 if (pev->uprobes)
1958 return 1;
1959
1862 /* Currently just checking function name from symbol map */ 1960 /* Currently just checking function name from symbol map */
1863 sym = __find_kernel_function_by_name(tev->point.symbol, NULL); 1961 sym = __find_kernel_function_by_name(tev->point.symbol, NULL);
1864 if (!sym) { 1962 if (!sym) {
@@ -1894,12 +1992,18 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1894 int i, j, ret; 1992 int i, j, ret;
1895 struct __event_package *pkgs; 1993 struct __event_package *pkgs;
1896 1994
1995 ret = 0;
1897 pkgs = zalloc(sizeof(struct __event_package) * npevs); 1996 pkgs = zalloc(sizeof(struct __event_package) * npevs);
1997
1898 if (pkgs == NULL) 1998 if (pkgs == NULL)
1899 return -ENOMEM; 1999 return -ENOMEM;
1900 2000
1901 /* Init vmlinux path */ 2001 if (!pevs->uprobes)
1902 ret = init_vmlinux(); 2002 /* Init vmlinux path */
2003 ret = init_vmlinux();
2004 else
2005 ret = init_user_exec();
2006
1903 if (ret < 0) { 2007 if (ret < 0) {
1904 free(pkgs); 2008 free(pkgs);
1905 return ret; 2009 return ret;
@@ -1971,23 +2075,15 @@ error:
1971 return ret; 2075 return ret;
1972} 2076}
1973 2077
1974static int del_trace_probe_event(int fd, const char *group, 2078static int del_trace_probe_event(int fd, const char *buf,
1975 const char *event, struct strlist *namelist) 2079 struct strlist *namelist)
1976{ 2080{
1977 char buf[128];
1978 struct str_node *ent, *n; 2081 struct str_node *ent, *n;
1979 int found = 0, ret = 0; 2082 int ret = -1;
1980
1981 ret = e_snprintf(buf, 128, "%s:%s", group, event);
1982 if (ret < 0) {
1983 pr_err("Failed to copy event.\n");
1984 return ret;
1985 }
1986 2083
1987 if (strpbrk(buf, "*?")) { /* Glob-exp */ 2084 if (strpbrk(buf, "*?")) { /* Glob-exp */
1988 strlist__for_each_safe(ent, n, namelist) 2085 strlist__for_each_safe(ent, n, namelist)
1989 if (strglobmatch(ent->s, buf)) { 2086 if (strglobmatch(ent->s, buf)) {
1990 found++;
1991 ret = __del_trace_probe_event(fd, ent); 2087 ret = __del_trace_probe_event(fd, ent);
1992 if (ret < 0) 2088 if (ret < 0)
1993 break; 2089 break;
@@ -1996,40 +2092,43 @@ static int del_trace_probe_event(int fd, const char *group,
1996 } else { 2092 } else {
1997 ent = strlist__find(namelist, buf); 2093 ent = strlist__find(namelist, buf);
1998 if (ent) { 2094 if (ent) {
1999 found++;
2000 ret = __del_trace_probe_event(fd, ent); 2095 ret = __del_trace_probe_event(fd, ent);
2001 if (ret >= 0) 2096 if (ret >= 0)
2002 strlist__remove(namelist, ent); 2097 strlist__remove(namelist, ent);
2003 } 2098 }
2004 } 2099 }
2005 if (found == 0 && ret >= 0)
2006 pr_info("Info: Event \"%s\" does not exist.\n", buf);
2007 2100
2008 return ret; 2101 return ret;
2009} 2102}
2010 2103
2011int del_perf_probe_events(struct strlist *dellist) 2104int del_perf_probe_events(struct strlist *dellist)
2012{ 2105{
2013 int fd, ret = 0; 2106 int ret = -1, ufd = -1, kfd = -1;
2107 char buf[128];
2014 const char *group, *event; 2108 const char *group, *event;
2015 char *p, *str; 2109 char *p, *str;
2016 struct str_node *ent; 2110 struct str_node *ent;
2017 struct strlist *namelist; 2111 struct strlist *namelist = NULL, *unamelist = NULL;
2018
2019 fd = open_kprobe_events(true);
2020 if (fd < 0)
2021 return fd;
2022 2112
2023 /* Get current event names */ 2113 /* Get current event names */
2024 namelist = get_probe_trace_event_names(fd, true); 2114 kfd = open_kprobe_events(true);
2025 if (namelist == NULL) 2115 if (kfd < 0)
2026 return -EINVAL; 2116 return kfd;
2117
2118 namelist = get_probe_trace_event_names(kfd, true);
2119 ufd = open_uprobe_events(true);
2120
2121 if (ufd >= 0)
2122 unamelist = get_probe_trace_event_names(ufd, true);
2123
2124 if (namelist == NULL && unamelist == NULL)
2125 goto error;
2027 2126
2028 strlist__for_each(ent, dellist) { 2127 strlist__for_each(ent, dellist) {
2029 str = strdup(ent->s); 2128 str = strdup(ent->s);
2030 if (str == NULL) { 2129 if (str == NULL) {
2031 ret = -ENOMEM; 2130 ret = -ENOMEM;
2032 break; 2131 goto error;
2033 } 2132 }
2034 pr_debug("Parsing: %s\n", str); 2133 pr_debug("Parsing: %s\n", str);
2035 p = strchr(str, ':'); 2134 p = strchr(str, ':');
@@ -2041,17 +2140,46 @@ int del_perf_probe_events(struct strlist *dellist)
2041 group = "*"; 2140 group = "*";
2042 event = str; 2141 event = str;
2043 } 2142 }
2143
2144 ret = e_snprintf(buf, 128, "%s:%s", group, event);
2145 if (ret < 0) {
2146 pr_err("Failed to copy event.");
2147 free(str);
2148 goto error;
2149 }
2150
2044 pr_debug("Group: %s, Event: %s\n", group, event); 2151 pr_debug("Group: %s, Event: %s\n", group, event);
2045 ret = del_trace_probe_event(fd, group, event, namelist); 2152
2153 if (namelist)
2154 ret = del_trace_probe_event(kfd, buf, namelist);
2155
2156 if (unamelist && ret != 0)
2157 ret = del_trace_probe_event(ufd, buf, unamelist);
2158
2159 if (ret != 0)
2160 pr_info("Info: Event \"%s\" does not exist.\n", buf);
2161
2046 free(str); 2162 free(str);
2047 if (ret < 0)
2048 break;
2049 } 2163 }
2050 strlist__delete(namelist); 2164
2051 close(fd); 2165error:
2166 if (kfd >= 0) {
2167 if (namelist)
2168 strlist__delete(namelist);
2169
2170 close(kfd);
2171 }
2172
2173 if (ufd >= 0) {
2174 if (unamelist)
2175 strlist__delete(unamelist);
2176
2177 close(ufd);
2178 }
2052 2179
2053 return ret; 2180 return ret;
2054} 2181}
2182
2055/* TODO: don't use a global variable for filter ... */ 2183/* TODO: don't use a global variable for filter ... */
2056static struct strfilter *available_func_filter; 2184static struct strfilter *available_func_filter;
2057 2185
@@ -2068,30 +2196,152 @@ static int filter_available_functions(struct map *map __unused,
2068 return 1; 2196 return 1;
2069} 2197}
2070 2198
2071int show_available_funcs(const char *target, struct strfilter *_filter) 2199static int __show_available_funcs(struct map *map)
2200{
2201 if (map__load(map, filter_available_functions)) {
2202 pr_err("Failed to load map.\n");
2203 return -EINVAL;
2204 }
2205 if (!dso__sorted_by_name(map->dso, map->type))
2206 dso__sort_by_name(map->dso, map->type);
2207
2208 dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
2209 return 0;
2210}
2211
2212static int available_kernel_funcs(const char *module)
2072{ 2213{
2073 struct map *map; 2214 struct map *map;
2074 int ret; 2215 int ret;
2075 2216
2076 setup_pager();
2077
2078 ret = init_vmlinux(); 2217 ret = init_vmlinux();
2079 if (ret < 0) 2218 if (ret < 0)
2080 return ret; 2219 return ret;
2081 2220
2082 map = kernel_get_module_map(target); 2221 map = kernel_get_module_map(module);
2083 if (!map) { 2222 if (!map) {
2084 pr_err("Failed to find %s map.\n", (target) ? : "kernel"); 2223 pr_err("Failed to find %s map.\n", (module) ? : "kernel");
2085 return -EINVAL; 2224 return -EINVAL;
2086 } 2225 }
2226 return __show_available_funcs(map);
2227}
2228
2229static int available_user_funcs(const char *target)
2230{
2231 struct map *map;
2232 int ret;
2233
2234 ret = init_user_exec();
2235 if (ret < 0)
2236 return ret;
2237
2238 map = dso__new_map(target);
2239 ret = __show_available_funcs(map);
2240 dso__delete(map->dso);
2241 map__delete(map);
2242 return ret;
2243}
2244
2245int show_available_funcs(const char *target, struct strfilter *_filter,
2246 bool user)
2247{
2248 setup_pager();
2087 available_func_filter = _filter; 2249 available_func_filter = _filter;
2250
2251 if (!user)
2252 return available_kernel_funcs(target);
2253
2254 return available_user_funcs(target);
2255}
2256
2257/*
2258 * uprobe_events only accepts address:
2259 * Convert function and any offset to address
2260 */
2261static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec)
2262{
2263 struct perf_probe_point *pp = &pev->point;
2264 struct symbol *sym;
2265 struct map *map = NULL;
2266 char *function = NULL, *name = NULL;
2267 int ret = -EINVAL;
2268 unsigned long long vaddr = 0;
2269
2270 if (!pp->function) {
2271 pr_warning("No function specified for uprobes");
2272 goto out;
2273 }
2274
2275 function = strdup(pp->function);
2276 if (!function) {
2277 pr_warning("Failed to allocate memory by strdup.\n");
2278 ret = -ENOMEM;
2279 goto out;
2280 }
2281
2282 name = realpath(exec, NULL);
2283 if (!name) {
2284 pr_warning("Cannot find realpath for %s.\n", exec);
2285 goto out;
2286 }
2287 map = dso__new_map(name);
2288 if (!map) {
2289 pr_warning("Cannot find appropriate DSO for %s.\n", exec);
2290 goto out;
2291 }
2292 available_func_filter = strfilter__new(function, NULL);
2088 if (map__load(map, filter_available_functions)) { 2293 if (map__load(map, filter_available_functions)) {
2089 pr_err("Failed to load map.\n"); 2294 pr_err("Failed to load map.\n");
2090 return -EINVAL; 2295 goto out;
2091 } 2296 }
2092 if (!dso__sorted_by_name(map->dso, map->type))
2093 dso__sort_by_name(map->dso, map->type);
2094 2297
2095 dso__fprintf_symbols_by_name(map->dso, map->type, stdout); 2298 sym = map__find_symbol_by_name(map, function, NULL);
2096 return 0; 2299 if (!sym) {
2300 pr_warning("Cannot find %s in DSO %s\n", function, exec);
2301 goto out;
2302 }
2303
2304 if (map->start > sym->start)
2305 vaddr = map->start;
2306 vaddr += sym->start + pp->offset + map->pgoff;
2307 pp->offset = 0;
2308
2309 if (!pev->event) {
2310 pev->event = function;
2311 function = NULL;
2312 }
2313 if (!pev->group) {
2314 char *ptr1, *ptr2;
2315
2316 pev->group = zalloc(sizeof(char *) * 64);
2317 ptr1 = strdup(basename(exec));
2318 if (ptr1) {
2319 ptr2 = strpbrk(ptr1, "-._");
2320 if (ptr2)
2321 *ptr2 = '\0';
2322 e_snprintf(pev->group, 64, "%s_%s", PERFPROBE_GROUP,
2323 ptr1);
2324 free(ptr1);
2325 }
2326 }
2327 free(pp->function);
2328 pp->function = zalloc(sizeof(char *) * MAX_PROBE_ARGS);
2329 if (!pp->function) {
2330 ret = -ENOMEM;
2331 pr_warning("Failed to allocate memory by zalloc.\n");
2332 goto out;
2333 }
2334 e_snprintf(pp->function, MAX_PROBE_ARGS, "0x%llx", vaddr);
2335 ret = 0;
2336
2337out:
2338 if (map) {
2339 dso__delete(map->dso);
2340 map__delete(map);
2341 }
2342 if (function)
2343 free(function);
2344 if (name)
2345 free(name);
2346 return ret;
2097} 2347}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index a7dee835f49c..f9f3de8b4220 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -7,7 +7,7 @@
7 7
8extern bool probe_event_dry_run; 8extern bool probe_event_dry_run;
9 9
10/* kprobe-tracer tracing point */ 10/* kprobe-tracer and uprobe-tracer tracing point */
11struct probe_trace_point { 11struct probe_trace_point {
12 char *symbol; /* Base symbol */ 12 char *symbol; /* Base symbol */
13 char *module; /* Module name */ 13 char *module; /* Module name */
@@ -21,7 +21,7 @@ struct probe_trace_arg_ref {
21 long offset; /* Offset value */ 21 long offset; /* Offset value */
22}; 22};
23 23
24/* kprobe-tracer tracing argument */ 24/* kprobe-tracer and uprobe-tracer tracing argument */
25struct probe_trace_arg { 25struct probe_trace_arg {
26 char *name; /* Argument name */ 26 char *name; /* Argument name */
27 char *value; /* Base value */ 27 char *value; /* Base value */
@@ -29,12 +29,13 @@ struct probe_trace_arg {
29 struct probe_trace_arg_ref *ref; /* Referencing offset */ 29 struct probe_trace_arg_ref *ref; /* Referencing offset */
30}; 30};
31 31
32/* kprobe-tracer tracing event (point + arg) */ 32/* kprobe-tracer and uprobe-tracer tracing event (point + arg) */
33struct probe_trace_event { 33struct probe_trace_event {
34 char *event; /* Event name */ 34 char *event; /* Event name */
35 char *group; /* Group name */ 35 char *group; /* Group name */
36 struct probe_trace_point point; /* Trace point */ 36 struct probe_trace_point point; /* Trace point */
37 int nargs; /* Number of args */ 37 int nargs; /* Number of args */
38 bool uprobes; /* uprobes only */
38 struct probe_trace_arg *args; /* Arguments */ 39 struct probe_trace_arg *args; /* Arguments */
39}; 40};
40 41
@@ -70,6 +71,7 @@ struct perf_probe_event {
70 char *group; /* Group name */ 71 char *group; /* Group name */
71 struct perf_probe_point point; /* Probe point */ 72 struct perf_probe_point point; /* Probe point */
72 int nargs; /* Number of arguments */ 73 int nargs; /* Number of arguments */
74 bool uprobes;
73 struct perf_probe_arg *args; /* Arguments */ 75 struct perf_probe_arg *args; /* Arguments */
74}; 76};
75 77
@@ -129,8 +131,8 @@ extern int show_line_range(struct line_range *lr, const char *module);
129extern int show_available_vars(struct perf_probe_event *pevs, int npevs, 131extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
130 int max_probe_points, const char *module, 132 int max_probe_points, const char *module,
131 struct strfilter *filter, bool externs); 133 struct strfilter *filter, bool externs);
132extern int show_available_funcs(const char *module, struct strfilter *filter); 134extern int show_available_funcs(const char *module, struct strfilter *filter,
133 135 bool user);
134 136
135/* Maximum index number of event-name postfix */ 137/* Maximum index number of event-name postfix */
136#define MAX_EVENT_INDEX 1024 138#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ab9867b2b433..e2ba8858f3e1 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2783,3 +2783,11 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
2783 2783
2784 return ret; 2784 return ret;
2785} 2785}
2786
2787struct map *dso__new_map(const char *name)
2788{
2789 struct dso *dso = dso__new(name);
2790 struct map *map = map__new2(0, dso, MAP__FUNCTION);
2791
2792 return map;
2793}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 1f003884f1ab..5649d63798cb 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -242,6 +242,7 @@ void dso__set_long_name(struct dso *dso, char *name);
242void dso__set_build_id(struct dso *dso, void *build_id); 242void dso__set_build_id(struct dso *dso, void *build_id);
243void dso__read_running_kernel_build_id(struct dso *dso, 243void dso__read_running_kernel_build_id(struct dso *dso,
244 struct machine *machine); 244 struct machine *machine);
245struct map *dso__new_map(const char *name);
245struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, 246struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
246 u64 addr); 247 u64 addr);
247struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, 248struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,