aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-04-16 05:04:10 -0400
committerIngo Molnar <mingo@kernel.org>2013-04-16 05:04:10 -0400
commitb5210b2a34bae35fc00675462333af45676d727c (patch)
tree43f67fb370571aedb0ec8a1ad7e8da61834d19b2
parentf8378f5259647710f0b4ecb814b0a1b0d9040de0 (diff)
parent515619f209114697fabd21eed1623bfa69746815 (diff)
Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core
Pull uprobes updates from Oleg Nesterov: - "uretprobes" - an optimization to uprobes, like kretprobes are an optimization to kprobes. "perf probe -x file sym%return" now works like kretprobes. - PowerPC fixes plus a couple of cleanups/optimizations in uprobes and trace_uprobes. Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/trace/uprobetracer.txt114
-rw-r--r--arch/powerpc/include/asm/uprobes.h1
-rw-r--r--arch/powerpc/kernel/uprobes.c29
-rw-r--r--arch/x86/include/asm/uprobes.h1
-rw-r--r--arch/x86/kernel/uprobes.c29
-rw-r--r--include/linux/uprobes.h8
-rw-r--r--kernel/events/uprobes.c300
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_uprobe.c203
9 files changed, 526 insertions, 164 deletions
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index 24ce6823a09e..d9c3e682312c 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -1,6 +1,8 @@
1 Uprobe-tracer: Uprobe-based Event Tracing 1 Uprobe-tracer: Uprobe-based Event Tracing
2 ========================================= 2 =========================================
3 Documentation written by Srikar Dronamraju 3
4 Documentation written by Srikar Dronamraju
5
4 6
5Overview 7Overview
6-------- 8--------
@@ -13,78 +15,94 @@ current_tracer. Instead of that, add probe points via
13/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled. 15/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled.
14 16
15However unlike kprobe-event tracer, the uprobe event interface expects the 17However unlike kprobe-event tracer, the uprobe event interface expects the
16user to calculate the offset of the probepoint in the object 18user to calculate the offset of the probepoint in the object.
17 19
18Synopsis of uprobe_tracer 20Synopsis of uprobe_tracer
19------------------------- 21-------------------------
20 p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe 22 p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe
23 r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe)
24 -:[GRP/]EVENT : Clear uprobe or uretprobe event
21 25
22 GRP : Group name. If omitted, use "uprobes" for it. 26 GRP : Group name. If omitted, "uprobes" is the default value.
23 EVENT : Event name. If omitted, the event name is generated 27 EVENT : Event name. If omitted, the event name is generated based
24 based on SYMBOL+offs. 28 on SYMBOL+offs.
25 PATH : path to an executable or a library. 29 PATH : Path to an executable or a library.
26 SYMBOL[+offs] : Symbol+offset where the probe is inserted. 30 SYMBOL[+offs] : Symbol+offset where the probe is inserted.
27 31
28 FETCHARGS : Arguments. Each probe can have up to 128 args. 32 FETCHARGS : Arguments. Each probe can have up to 128 args.
29 %REG : Fetch register REG 33 %REG : Fetch register REG
30 34
31Event Profiling 35Event Profiling
32--------------- 36---------------
33 You can check the total number of probe hits and probe miss-hits via 37You can check the total number of probe hits and probe miss-hits via
34/sys/kernel/debug/tracing/uprobe_profile. 38/sys/kernel/debug/tracing/uprobe_profile.
35 The first column is event name, the second is the number of probe hits, 39The first column is event name, the second is the number of probe hits,
36the third is the number of probe miss-hits. 40the third is the number of probe miss-hits.
37 41
38Usage examples 42Usage examples
39-------------- 43--------------
40To add a probe as a new event, write a new definition to uprobe_events 44 * Add a probe as a new uprobe event, write a new definition to uprobe_events
41as below. 45as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash)
46
47 echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
48
49 * Add a probe as a new uretprobe event:
50
51 echo 'r: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
52
53 * Unset registered event:
42 54
43 echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events 55 echo '-:bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events
44 56
45 This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash 57 * Print out the events that are registered:
46 58
47 echo > /sys/kernel/debug/tracing/uprobe_events 59 cat /sys/kernel/debug/tracing/uprobe_events
48 60
49 This clears all probe points. 61 * Clear all events:
50 62
51The following example shows how to dump the instruction pointer and %ax 63 echo > /sys/kernel/debug/tracing/uprobe_events
52a register at the probed text address. Here we are trying to probe 64
53function zfree in /bin/zsh 65Following example shows how to dump the instruction pointer and %ax register
66at the probed text address. Probe zfree function in /bin/zsh:
54 67
55 # cd /sys/kernel/debug/tracing/ 68 # cd /sys/kernel/debug/tracing/
56 # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp 69 # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
57 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh 70 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
58 # objdump -T /bin/zsh | grep -w zfree 71 # objdump -T /bin/zsh | grep -w zfree
59 0000000000446420 g DF .text 0000000000000012 Base zfree 72 0000000000446420 g DF .text 0000000000000012 Base zfree
60 73
610x46420 is the offset of zfree in object /bin/zsh that is loaded at 74 0x46420 is the offset of zfree in object /bin/zsh that is loaded at
620x00400000. Hence the command to probe would be : 75 0x00400000. Hence the command to uprobe would be:
76
77 # echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events
78
79 And the same for the uretprobe would be:
63 80
64 # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events 81 # echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events
65 82
66Please note: User has to explicitly calculate the offset of the probepoint 83Please note: User has to explicitly calculate the offset of the probe-point
67in the object. We can see the events that are registered by looking at the 84in the object. We can see the events that are registered by looking at the
68uprobe_events file. 85uprobe_events file.
69 86
70 # cat uprobe_events 87 # cat uprobe_events
71 p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax 88 p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax
89 r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax
72 90
73The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format 91Format of events can be seen by viewing the file events/uprobes/zfree_entry/format
74 92
75 # cat events/uprobes/p_zsh_0x46420/format 93 # cat events/uprobes/zfree_entry/format
76 name: p_zsh_0x46420 94 name: zfree_entry
77 ID: 922 95 ID: 922
78 format: 96 format:
79 field:unsigned short common_type; offset:0; size:2; signed:0; 97 field:unsigned short common_type; offset:0; size:2; signed:0;
80 field:unsigned char common_flags; offset:2; size:1; signed:0; 98 field:unsigned char common_flags; offset:2; size:1; signed:0;
81 field:unsigned char common_preempt_count; offset:3; size:1; signed:0; 99 field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
82 field:int common_pid; offset:4; size:4; signed:1; 100 field:int common_pid; offset:4; size:4; signed:1;
83 field:int common_padding; offset:8; size:4; signed:1; 101 field:int common_padding; offset:8; size:4; signed:1;
84 102
85 field:unsigned long __probe_ip; offset:12; size:4; signed:0; 103 field:unsigned long __probe_ip; offset:12; size:4; signed:0;
86 field:u32 arg1; offset:16; size:4; signed:0; 104 field:u32 arg1; offset:16; size:4; signed:0;
87 field:u32 arg2; offset:20; size:4; signed:0; 105 field:u32 arg2; offset:20; size:4; signed:0;
88 106
89 print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2 107 print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2
90 108
@@ -94,6 +112,7 @@ events, you need to enable it by:
94 # echo 1 > events/uprobes/enable 112 # echo 1 > events/uprobes/enable
95 113
96Lets disable the event after sleeping for some time. 114Lets disable the event after sleeping for some time.
115
97 # sleep 20 116 # sleep 20
98 # echo 0 > events/uprobes/enable 117 # echo 0 > events/uprobes/enable
99 118
@@ -104,10 +123,11 @@ And you can see the traced information via /sys/kernel/debug/tracing/trace.
104 # 123 #
105 # TASK-PID CPU# TIMESTAMP FUNCTION 124 # TASK-PID CPU# TIMESTAMP FUNCTION
106 # | | | | | 125 # | | | | |
107 zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 126 zsh-24842 [006] 258544.995456: zfree_entry: (0x446420) arg1=446420 arg2=79
108 zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 127 zsh-24842 [007] 258545.000270: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0
109 zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 128 zsh-24842 [002] 258545.043929: zfree_entry: (0x446420) arg1=446420 arg2=79
110 zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 129 zsh-24842 [004] 258547.046129: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0
111 130
112Each line shows us probes were triggered for a pid 24842 with ip being 131Output shows us uprobe was triggered for a pid 24842 with ip being 0x446420
1130x446421 and contents of ax register being 79. 132and contents of ax register being 79. And uretprobe was triggered with ip at
1330x446540 with counterpart function entry at 0x446420.
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index b532060d0916..23016020915e 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -51,4 +51,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
51extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); 51extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
52extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); 52extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
53extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); 53extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
54extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
54#endif /* _ASM_UPROBES_H */ 55#endif /* _ASM_UPROBES_H */
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index bc77834dbf43..59f419b935f2 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -31,6 +31,16 @@
31#define UPROBE_TRAP_NR UINT_MAX 31#define UPROBE_TRAP_NR UINT_MAX
32 32
33/** 33/**
34 * is_trap_insn - check if the instruction is a trap variant
35 * @insn: instruction to be checked.
36 * Returns true if @insn is a trap variant.
37 */
38bool is_trap_insn(uprobe_opcode_t *insn)
39{
40 return (is_trap(*insn));
41}
42
43/**
34 * arch_uprobe_analyze_insn 44 * arch_uprobe_analyze_insn
35 * @mm: the probed address space. 45 * @mm: the probed address space.
36 * @arch_uprobe: the probepoint information. 46 * @arch_uprobe: the probepoint information.
@@ -43,12 +53,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
43 if (addr & 0x03) 53 if (addr & 0x03)
44 return -EINVAL; 54 return -EINVAL;
45 55
46 /*
47 * We currently don't support a uprobe on an already
48 * existing breakpoint instruction underneath
49 */
50 if (is_trap(auprobe->ainsn))
51 return -ENOTSUPP;
52 return 0; 56 return 0;
53} 57}
54 58
@@ -188,3 +192,16 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
188 192
189 return false; 193 return false;
190} 194}
195
196unsigned long
197arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
198{
199 unsigned long orig_ret_vaddr;
200
201 orig_ret_vaddr = regs->link;
202
203 /* Replace the return addr with trampoline addr */
204 regs->link = trampoline_vaddr;
205
206 return orig_ret_vaddr;
207}
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 8ff8be7835ab..6e5197910fd8 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -55,4 +55,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
55extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); 55extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
56extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); 56extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
57extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); 57extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
58extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
58#endif /* _ASM_UPROBES_H */ 59#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 0ba4cfb4f412..2ed845928b5f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -697,3 +697,32 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
697 send_sig(SIGTRAP, current, 0); 697 send_sig(SIGTRAP, current, 0);
698 return ret; 698 return ret;
699} 699}
700
701unsigned long
702arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
703{
704 int rasize, ncopied;
705 unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
706
707 rasize = is_ia32_task() ? 4 : 8;
708 ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize);
709 if (unlikely(ncopied))
710 return -1;
711
712 /* check whether address has been already hijacked */
713 if (orig_ret_vaddr == trampoline_vaddr)
714 return orig_ret_vaddr;
715
716 ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize);
717 if (likely(!ncopied))
718 return orig_ret_vaddr;
719
720 if (ncopied != rasize) {
721 pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "
722 "%%ip=%#lx\n", current->pid, regs->sp, regs->ip);
723
724 force_sig_info(SIGSEGV, SEND_SIG_FORCED, current);
725 }
726
727 return -1;
728}
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 02b83db8e2c5..06f28beed7c2 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -38,6 +38,8 @@ struct inode;
38#define UPROBE_HANDLER_REMOVE 1 38#define UPROBE_HANDLER_REMOVE 1
39#define UPROBE_HANDLER_MASK 1 39#define UPROBE_HANDLER_MASK 1
40 40
41#define MAX_URETPROBE_DEPTH 64
42
41enum uprobe_filter_ctx { 43enum uprobe_filter_ctx {
42 UPROBE_FILTER_REGISTER, 44 UPROBE_FILTER_REGISTER,
43 UPROBE_FILTER_UNREGISTER, 45 UPROBE_FILTER_UNREGISTER,
@@ -46,6 +48,9 @@ enum uprobe_filter_ctx {
46 48
47struct uprobe_consumer { 49struct uprobe_consumer {
48 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); 50 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
51 int (*ret_handler)(struct uprobe_consumer *self,
52 unsigned long func,
53 struct pt_regs *regs);
49 bool (*filter)(struct uprobe_consumer *self, 54 bool (*filter)(struct uprobe_consumer *self,
50 enum uprobe_filter_ctx ctx, 55 enum uprobe_filter_ctx ctx,
51 struct mm_struct *mm); 56 struct mm_struct *mm);
@@ -68,6 +73,8 @@ struct uprobe_task {
68 enum uprobe_task_state state; 73 enum uprobe_task_state state;
69 struct arch_uprobe_task autask; 74 struct arch_uprobe_task autask;
70 75
76 struct return_instance *return_instances;
77 unsigned int depth;
71 struct uprobe *active_uprobe; 78 struct uprobe *active_uprobe;
72 79
73 unsigned long xol_vaddr; 80 unsigned long xol_vaddr;
@@ -100,6 +107,7 @@ struct uprobes_state {
100extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); 107extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
101extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); 108extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
102extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); 109extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
110extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
103extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 111extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
104extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); 112extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
105extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 113extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a567c8c7ef31..f3569747d629 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -75,6 +75,15 @@ struct uprobe {
75 struct arch_uprobe arch; 75 struct arch_uprobe arch;
76}; 76};
77 77
78struct return_instance {
79 struct uprobe *uprobe;
80 unsigned long func;
81 unsigned long orig_ret_vaddr; /* original return address */
82 bool chained; /* true, if instance is nested */
83
84 struct return_instance *next; /* keep as stack */
85};
86
78/* 87/*
79 * valid_vma: Verify if the specified vma is an executable vma 88 * valid_vma: Verify if the specified vma is an executable vma
80 * Relax restrictions while unregistering: vm_flags might have 89 * Relax restrictions while unregistering: vm_flags might have
@@ -173,10 +182,31 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)
173 return *insn == UPROBE_SWBP_INSN; 182 return *insn == UPROBE_SWBP_INSN;
174} 183}
175 184
176static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) 185/**
186 * is_trap_insn - check if instruction is breakpoint instruction.
187 * @insn: instruction to be checked.
188 * Default implementation of is_trap_insn
189 * Returns true if @insn is a breakpoint instruction.
190 *
191 * This function is needed for the case where an architecture has multiple
192 * trap instructions (like powerpc).
193 */
194bool __weak is_trap_insn(uprobe_opcode_t *insn)
195{
196 return is_swbp_insn(insn);
197}
198
199static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len)
177{ 200{
178 void *kaddr = kmap_atomic(page); 201 void *kaddr = kmap_atomic(page);
179 memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); 202 memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len);
203 kunmap_atomic(kaddr);
204}
205
206static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len)
207{
208 void *kaddr = kmap_atomic(page);
209 memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
180 kunmap_atomic(kaddr); 210 kunmap_atomic(kaddr);
181} 211}
182 212
@@ -185,7 +215,16 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
185 uprobe_opcode_t old_opcode; 215 uprobe_opcode_t old_opcode;
186 bool is_swbp; 216 bool is_swbp;
187 217
188 copy_opcode(page, vaddr, &old_opcode); 218 /*
219 * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here.
220 * We do not check if it is any other 'trap variant' which could
221 * be conditional trap instruction such as the one powerpc supports.
222 *
223 * The logic is that we do not care if the underlying instruction
224 * is a trap variant; uprobes always wins over any other (gdb)
225 * breakpoint.
226 */
227 copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE);
189 is_swbp = is_swbp_insn(&old_opcode); 228 is_swbp = is_swbp_insn(&old_opcode);
190 229
191 if (is_swbp_insn(new_opcode)) { 230 if (is_swbp_insn(new_opcode)) {
@@ -204,7 +243,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
204 * Expect the breakpoint instruction to be the smallest size instruction for 243 * Expect the breakpoint instruction to be the smallest size instruction for
205 * the architecture. If an arch has variable length instruction and the 244 * the architecture. If an arch has variable length instruction and the
206 * breakpoint instruction is not of the smallest length instruction 245 * breakpoint instruction is not of the smallest length instruction
207 * supported by that architecture then we need to modify is_swbp_at_addr and 246 * supported by that architecture then we need to modify is_trap_at_addr and
208 * write_opcode accordingly. This would never be a problem for archs that 247 * write_opcode accordingly. This would never be a problem for archs that
209 * have fixed length instructions. 248 * have fixed length instructions.
210 */ 249 */
@@ -225,7 +264,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
225 uprobe_opcode_t opcode) 264 uprobe_opcode_t opcode)
226{ 265{
227 struct page *old_page, *new_page; 266 struct page *old_page, *new_page;
228 void *vaddr_old, *vaddr_new;
229 struct vm_area_struct *vma; 267 struct vm_area_struct *vma;
230 int ret; 268 int ret;
231 269
@@ -246,15 +284,8 @@ retry:
246 284
247 __SetPageUptodate(new_page); 285 __SetPageUptodate(new_page);
248 286
249 /* copy the page now that we've got it stable */ 287 copy_highpage(new_page, old_page);
250 vaddr_old = kmap_atomic(old_page); 288 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
251 vaddr_new = kmap_atomic(new_page);
252
253 memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
254 memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
255
256 kunmap_atomic(vaddr_new);
257 kunmap_atomic(vaddr_old);
258 289
259 ret = anon_vma_prepare(vma); 290 ret = anon_vma_prepare(vma);
260 if (ret) 291 if (ret)
@@ -477,30 +508,18 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
477 unsigned long nbytes, loff_t offset) 508 unsigned long nbytes, loff_t offset)
478{ 509{
479 struct page *page; 510 struct page *page;
480 void *vaddr;
481 unsigned long off;
482 pgoff_t idx;
483
484 if (!filp)
485 return -EINVAL;
486 511
487 if (!mapping->a_ops->readpage) 512 if (!mapping->a_ops->readpage)
488 return -EIO; 513 return -EIO;
489
490 idx = offset >> PAGE_CACHE_SHIFT;
491 off = offset & ~PAGE_MASK;
492
493 /* 514 /*
494 * Ensure that the page that has the original instruction is 515 * Ensure that the page that has the original instruction is
495 * populated and in page-cache. 516 * populated and in page-cache.
496 */ 517 */
497 page = read_mapping_page(mapping, idx, filp); 518 page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
498 if (IS_ERR(page)) 519 if (IS_ERR(page))
499 return PTR_ERR(page); 520 return PTR_ERR(page);
500 521
501 vaddr = kmap_atomic(page); 522 copy_from_page(page, offset, insn, nbytes);
502 memcpy(insn, vaddr + off, nbytes);
503 kunmap_atomic(vaddr);
504 page_cache_release(page); 523 page_cache_release(page);
505 524
506 return 0; 525 return 0;
@@ -550,7 +569,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
550 goto out; 569 goto out;
551 570
552 ret = -ENOTSUPP; 571 ret = -ENOTSUPP;
553 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) 572 if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn))
554 goto out; 573 goto out;
555 574
556 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); 575 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
@@ -758,7 +777,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
758 down_write(&mm->mmap_sem); 777 down_write(&mm->mmap_sem);
759 vma = find_vma(mm, info->vaddr); 778 vma = find_vma(mm, info->vaddr);
760 if (!vma || !valid_vma(vma, is_register) || 779 if (!vma || !valid_vma(vma, is_register) ||
761 vma->vm_file->f_mapping->host != uprobe->inode) 780 file_inode(vma->vm_file) != uprobe->inode)
762 goto unlock; 781 goto unlock;
763 782
764 if (vma->vm_start > info->vaddr || 783 if (vma->vm_start > info->vaddr ||
@@ -828,6 +847,10 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
828 struct uprobe *uprobe; 847 struct uprobe *uprobe;
829 int ret; 848 int ret;
830 849
850 /* Uprobe must have at least one set consumer */
851 if (!uc->handler && !uc->ret_handler)
852 return -EINVAL;
853
831 /* Racy, just to catch the obvious mistakes */ 854 /* Racy, just to catch the obvious mistakes */
832 if (offset > i_size_read(inode)) 855 if (offset > i_size_read(inode))
833 return -EINVAL; 856 return -EINVAL;
@@ -917,7 +940,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
917 loff_t offset; 940 loff_t offset;
918 941
919 if (!valid_vma(vma, false) || 942 if (!valid_vma(vma, false) ||
920 vma->vm_file->f_mapping->host != uprobe->inode) 943 file_inode(vma->vm_file) != uprobe->inode)
921 continue; 944 continue;
922 945
923 offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; 946 offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
@@ -1010,7 +1033,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
1010 if (no_uprobe_events() || !valid_vma(vma, true)) 1033 if (no_uprobe_events() || !valid_vma(vma, true))
1011 return 0; 1034 return 0;
1012 1035
1013 inode = vma->vm_file->f_mapping->host; 1036 inode = file_inode(vma->vm_file);
1014 if (!inode) 1037 if (!inode)
1015 return 0; 1038 return 0;
1016 1039
@@ -1041,7 +1064,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
1041 struct inode *inode; 1064 struct inode *inode;
1042 struct rb_node *n; 1065 struct rb_node *n;
1043 1066
1044 inode = vma->vm_file->f_mapping->host; 1067 inode = file_inode(vma->vm_file);
1045 1068
1046 min = vaddr_to_offset(vma, start); 1069 min = vaddr_to_offset(vma, start);
1047 max = min + (end - start) - 1; 1070 max = min + (end - start) - 1;
@@ -1114,6 +1137,7 @@ static struct xol_area *get_xol_area(void)
1114{ 1137{
1115 struct mm_struct *mm = current->mm; 1138 struct mm_struct *mm = current->mm;
1116 struct xol_area *area; 1139 struct xol_area *area;
1140 uprobe_opcode_t insn = UPROBE_SWBP_INSN;
1117 1141
1118 area = mm->uprobes_state.xol_area; 1142 area = mm->uprobes_state.xol_area;
1119 if (area) 1143 if (area)
@@ -1131,7 +1155,12 @@ static struct xol_area *get_xol_area(void)
1131 if (!area->page) 1155 if (!area->page)
1132 goto free_bitmap; 1156 goto free_bitmap;
1133 1157
1158 /* allocate first slot of task's xol_area for the return probes */
1159 set_bit(0, area->bitmap);
1160 copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
1161 atomic_set(&area->slot_count, 1);
1134 init_waitqueue_head(&area->wq); 1162 init_waitqueue_head(&area->wq);
1163
1135 if (!xol_add_vma(area)) 1164 if (!xol_add_vma(area))
1136 return area; 1165 return area;
1137 1166
@@ -1216,9 +1245,7 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
1216static unsigned long xol_get_insn_slot(struct uprobe *uprobe) 1245static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
1217{ 1246{
1218 struct xol_area *area; 1247 struct xol_area *area;
1219 unsigned long offset;
1220 unsigned long xol_vaddr; 1248 unsigned long xol_vaddr;
1221 void *vaddr;
1222 1249
1223 area = get_xol_area(); 1250 area = get_xol_area();
1224 if (!area) 1251 if (!area)
@@ -1229,10 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
1229 return 0; 1256 return 0;
1230 1257
1231 /* Initialize the slot */ 1258 /* Initialize the slot */
1232 offset = xol_vaddr & ~PAGE_MASK; 1259 copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES);
1233 vaddr = kmap_atomic(area->page);
1234 memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
1235 kunmap_atomic(vaddr);
1236 /* 1260 /*
1237 * We probably need flush_icache_user_range() but it needs vma. 1261 * We probably need flush_icache_user_range() but it needs vma.
1238 * This should work on supported architectures too. 1262 * This should work on supported architectures too.
@@ -1298,6 +1322,7 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1298void uprobe_free_utask(struct task_struct *t) 1322void uprobe_free_utask(struct task_struct *t)
1299{ 1323{
1300 struct uprobe_task *utask = t->utask; 1324 struct uprobe_task *utask = t->utask;
1325 struct return_instance *ri, *tmp;
1301 1326
1302 if (!utask) 1327 if (!utask)
1303 return; 1328 return;
@@ -1305,6 +1330,15 @@ void uprobe_free_utask(struct task_struct *t)
1305 if (utask->active_uprobe) 1330 if (utask->active_uprobe)
1306 put_uprobe(utask->active_uprobe); 1331 put_uprobe(utask->active_uprobe);
1307 1332
1333 ri = utask->return_instances;
1334 while (ri) {
1335 tmp = ri;
1336 ri = ri->next;
1337
1338 put_uprobe(tmp->uprobe);
1339 kfree(tmp);
1340 }
1341
1308 xol_free_insn_slot(t); 1342 xol_free_insn_slot(t);
1309 kfree(utask); 1343 kfree(utask);
1310 t->utask = NULL; 1344 t->utask = NULL;
@@ -1333,6 +1367,93 @@ static struct uprobe_task *get_utask(void)
1333 return current->utask; 1367 return current->utask;
1334} 1368}
1335 1369
1370/*
1371 * Current area->vaddr notion assume the trampoline address is always
1372 * equal area->vaddr.
1373 *
1374 * Returns -1 in case the xol_area is not allocated.
1375 */
1376static unsigned long get_trampoline_vaddr(void)
1377{
1378 struct xol_area *area;
1379 unsigned long trampoline_vaddr = -1;
1380
1381 area = current->mm->uprobes_state.xol_area;
1382 smp_read_barrier_depends();
1383 if (area)
1384 trampoline_vaddr = area->vaddr;
1385
1386 return trampoline_vaddr;
1387}
1388
1389static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
1390{
1391 struct return_instance *ri;
1392 struct uprobe_task *utask;
1393 unsigned long orig_ret_vaddr, trampoline_vaddr;
1394 bool chained = false;
1395
1396 if (!get_xol_area())
1397 return;
1398
1399 utask = get_utask();
1400 if (!utask)
1401 return;
1402
1403 if (utask->depth >= MAX_URETPROBE_DEPTH) {
1404 printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
1405 " nestedness limit pid/tgid=%d/%d\n",
1406 current->pid, current->tgid);
1407 return;
1408 }
1409
1410 ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL);
1411 if (!ri)
1412 goto fail;
1413
1414 trampoline_vaddr = get_trampoline_vaddr();
1415 orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
1416 if (orig_ret_vaddr == -1)
1417 goto fail;
1418
1419 /*
1420 * We don't want to keep trampoline address in stack, rather keep the
1421 * original return address of first caller thru all the consequent
1422 * instances. This also makes breakpoint unwrapping easier.
1423 */
1424 if (orig_ret_vaddr == trampoline_vaddr) {
1425 if (!utask->return_instances) {
1426 /*
1427 * This situation is not possible. Likely we have an
1428 * attack from user-space.
1429 */
1430 pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n",
1431 current->pid, current->tgid);
1432 goto fail;
1433 }
1434
1435 chained = true;
1436 orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
1437 }
1438
1439 atomic_inc(&uprobe->ref);
1440 ri->uprobe = uprobe;
1441 ri->func = instruction_pointer(regs);
1442 ri->orig_ret_vaddr = orig_ret_vaddr;
1443 ri->chained = chained;
1444
1445 utask->depth++;
1446
1447 /* add instance to the stack */
1448 ri->next = utask->return_instances;
1449 utask->return_instances = ri;
1450
1451 return;
1452
1453 fail:
1454 kfree(ri);
1455}
1456
1336/* Prepare to single-step probed instruction out of line. */ 1457/* Prepare to single-step probed instruction out of line. */
1337static int 1458static int
1338pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) 1459pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
@@ -1431,7 +1552,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
1431 clear_bit(MMF_HAS_UPROBES, &mm->flags); 1552 clear_bit(MMF_HAS_UPROBES, &mm->flags);
1432} 1553}
1433 1554
1434static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) 1555static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
1435{ 1556{
1436 struct page *page; 1557 struct page *page;
1437 uprobe_opcode_t opcode; 1558 uprobe_opcode_t opcode;
@@ -1449,10 +1570,11 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
1449 if (result < 0) 1570 if (result < 0)
1450 return result; 1571 return result;
1451 1572
1452 copy_opcode(page, vaddr, &opcode); 1573 copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
1453 put_page(page); 1574 put_page(page);
1454 out: 1575 out:
1455 return is_swbp_insn(&opcode); 1576 /* This needs to return true for any variant of the trap insn */
1577 return is_trap_insn(&opcode);
1456} 1578}
1457 1579
1458static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) 1580static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
@@ -1465,14 +1587,14 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1465 vma = find_vma(mm, bp_vaddr); 1587 vma = find_vma(mm, bp_vaddr);
1466 if (vma && vma->vm_start <= bp_vaddr) { 1588 if (vma && vma->vm_start <= bp_vaddr) {
1467 if (valid_vma(vma, false)) { 1589 if (valid_vma(vma, false)) {
1468 struct inode *inode = vma->vm_file->f_mapping->host; 1590 struct inode *inode = file_inode(vma->vm_file);
1469 loff_t offset = vaddr_to_offset(vma, bp_vaddr); 1591 loff_t offset = vaddr_to_offset(vma, bp_vaddr);
1470 1592
1471 uprobe = find_uprobe(inode, offset); 1593 uprobe = find_uprobe(inode, offset);
1472 } 1594 }
1473 1595
1474 if (!uprobe) 1596 if (!uprobe)
1475 *is_swbp = is_swbp_at_addr(mm, bp_vaddr); 1597 *is_swbp = is_trap_at_addr(mm, bp_vaddr);
1476 } else { 1598 } else {
1477 *is_swbp = -EFAULT; 1599 *is_swbp = -EFAULT;
1478 } 1600 }
@@ -1488,16 +1610,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
1488{ 1610{
1489 struct uprobe_consumer *uc; 1611 struct uprobe_consumer *uc;
1490 int remove = UPROBE_HANDLER_REMOVE; 1612 int remove = UPROBE_HANDLER_REMOVE;
1613 bool need_prep = false; /* prepare return uprobe, when needed */
1491 1614
1492 down_read(&uprobe->register_rwsem); 1615 down_read(&uprobe->register_rwsem);
1493 for (uc = uprobe->consumers; uc; uc = uc->next) { 1616 for (uc = uprobe->consumers; uc; uc = uc->next) {
1494 int rc = uc->handler(uc, regs); 1617 int rc = 0;
1618
1619 if (uc->handler) {
1620 rc = uc->handler(uc, regs);
1621 WARN(rc & ~UPROBE_HANDLER_MASK,
1622 "bad rc=0x%x from %pf()\n", rc, uc->handler);
1623 }
1624
1625 if (uc->ret_handler)
1626 need_prep = true;
1495 1627
1496 WARN(rc & ~UPROBE_HANDLER_MASK,
1497 "bad rc=0x%x from %pf()\n", rc, uc->handler);
1498 remove &= rc; 1628 remove &= rc;
1499 } 1629 }
1500 1630
1631 if (need_prep && !remove)
1632 prepare_uretprobe(uprobe, regs); /* put bp at return */
1633
1501 if (remove && uprobe->consumers) { 1634 if (remove && uprobe->consumers) {
1502 WARN_ON(!uprobe_is_active(uprobe)); 1635 WARN_ON(!uprobe_is_active(uprobe));
1503 unapply_uprobe(uprobe, current->mm); 1636 unapply_uprobe(uprobe, current->mm);
@@ -1505,6 +1638,64 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
1505 up_read(&uprobe->register_rwsem); 1638 up_read(&uprobe->register_rwsem);
1506} 1639}
1507 1640
1641static void
1642handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
1643{
1644 struct uprobe *uprobe = ri->uprobe;
1645 struct uprobe_consumer *uc;
1646
1647 down_read(&uprobe->register_rwsem);
1648 for (uc = uprobe->consumers; uc; uc = uc->next) {
1649 if (uc->ret_handler)
1650 uc->ret_handler(uc, ri->func, regs);
1651 }
1652 up_read(&uprobe->register_rwsem);
1653}
1654
1655static bool handle_trampoline(struct pt_regs *regs)
1656{
1657 struct uprobe_task *utask;
1658 struct return_instance *ri, *tmp;
1659 bool chained;
1660
1661 utask = current->utask;
1662 if (!utask)
1663 return false;
1664
1665 ri = utask->return_instances;
1666 if (!ri)
1667 return false;
1668
1669 /*
1670 * TODO: we should throw out return_instance's invalidated by
1671 * longjmp(), currently we assume that the probed function always
1672 * returns.
1673 */
1674 instruction_pointer_set(regs, ri->orig_ret_vaddr);
1675
1676 for (;;) {
1677 handle_uretprobe_chain(ri, regs);
1678
1679 chained = ri->chained;
1680 put_uprobe(ri->uprobe);
1681
1682 tmp = ri;
1683 ri = ri->next;
1684 kfree(tmp);
1685
1686 if (!chained)
1687 break;
1688
1689 utask->depth--;
1690
1691 BUG_ON(!ri);
1692 }
1693
1694 utask->return_instances = ri;
1695
1696 return true;
1697}
1698
1508/* 1699/*
1509 * Run handler and ask thread to singlestep. 1700 * Run handler and ask thread to singlestep.
1510 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1701 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
@@ -1516,8 +1707,15 @@ static void handle_swbp(struct pt_regs *regs)
1516 int uninitialized_var(is_swbp); 1707 int uninitialized_var(is_swbp);
1517 1708
1518 bp_vaddr = uprobe_get_swbp_addr(regs); 1709 bp_vaddr = uprobe_get_swbp_addr(regs);
1519 uprobe = find_active_uprobe(bp_vaddr, &is_swbp); 1710 if (bp_vaddr == get_trampoline_vaddr()) {
1711 if (handle_trampoline(regs))
1712 return;
1713
1714 pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n",
1715 current->pid, current->tgid);
1716 }
1520 1717
1718 uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
1521 if (!uprobe) { 1719 if (!uprobe) {
1522 if (is_swbp > 0) { 1720 if (is_swbp > 0) {
1523 /* No matching uprobe; signal SIGTRAP. */ 1721 /* No matching uprobe; signal SIGTRAP. */
@@ -1616,7 +1814,11 @@ void uprobe_notify_resume(struct pt_regs *regs)
1616 */ 1814 */
1617int uprobe_pre_sstep_notifier(struct pt_regs *regs) 1815int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1618{ 1816{
1619 if (!current->mm || !test_bit(MMF_HAS_UPROBES, &current->mm->flags)) 1817 if (!current->mm)
1818 return 0;
1819
1820 if (!test_bit(MMF_HAS_UPROBES, &current->mm->flags) &&
1821 (!current->utask || !current->utask->return_instances))
1620 return 0; 1822 return 0;
1621 1823
1622 set_thread_flag(TIF_UPROBE); 1824 set_thread_flag(TIF_UPROBE);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2081971367ea..8bed1dfcb938 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -103,11 +103,6 @@ struct kretprobe_trace_entry_head {
103 unsigned long ret_ip; 103 unsigned long ret_ip;
104}; 104};
105 105
106struct uprobe_trace_entry_head {
107 struct trace_entry ent;
108 unsigned long ip;
109};
110
111/* 106/*
112 * trace_flag_type is an enumeration that holds different 107 * trace_flag_type is an enumeration that holds different
113 * states when a trace occurs. These are: 108 * states when a trace occurs. These are:
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8dad2a92dee9..32494fb0ee64 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -28,6 +28,18 @@
28 28
29#define UPROBE_EVENT_SYSTEM "uprobes" 29#define UPROBE_EVENT_SYSTEM "uprobes"
30 30
31struct uprobe_trace_entry_head {
32 struct trace_entry ent;
33 unsigned long vaddr[];
34};
35
36#define SIZEOF_TRACE_ENTRY(is_return) \
37 (sizeof(struct uprobe_trace_entry_head) + \
38 sizeof(unsigned long) * (is_return ? 2 : 1))
39
40#define DATAOF_TRACE_ENTRY(entry, is_return) \
41 ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
42
31struct trace_uprobe_filter { 43struct trace_uprobe_filter {
32 rwlock_t rwlock; 44 rwlock_t rwlock;
33 int nr_systemwide; 45 int nr_systemwide;
@@ -64,6 +76,8 @@ static DEFINE_MUTEX(uprobe_lock);
64static LIST_HEAD(uprobe_list); 76static LIST_HEAD(uprobe_list);
65 77
66static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); 78static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
79static int uretprobe_dispatcher(struct uprobe_consumer *con,
80 unsigned long func, struct pt_regs *regs);
67 81
68static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) 82static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
69{ 83{
@@ -77,11 +91,16 @@ static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
77 return !filter->nr_systemwide && list_empty(&filter->perf_events); 91 return !filter->nr_systemwide && list_empty(&filter->perf_events);
78} 92}
79 93
94static inline bool is_ret_probe(struct trace_uprobe *tu)
95{
96 return tu->consumer.ret_handler != NULL;
97}
98
80/* 99/*
81 * Allocate new trace_uprobe and initialize it (including uprobes). 100 * Allocate new trace_uprobe and initialize it (including uprobes).
82 */ 101 */
83static struct trace_uprobe * 102static struct trace_uprobe *
84alloc_trace_uprobe(const char *group, const char *event, int nargs) 103alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
85{ 104{
86 struct trace_uprobe *tu; 105 struct trace_uprobe *tu;
87 106
@@ -106,6 +125,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
106 125
107 INIT_LIST_HEAD(&tu->list); 126 INIT_LIST_HEAD(&tu->list);
108 tu->consumer.handler = uprobe_dispatcher; 127 tu->consumer.handler = uprobe_dispatcher;
128 if (is_ret)
129 tu->consumer.ret_handler = uretprobe_dispatcher;
109 init_trace_uprobe_filter(&tu->filter); 130 init_trace_uprobe_filter(&tu->filter);
110 return tu; 131 return tu;
111 132
@@ -180,7 +201,7 @@ end:
180 201
181/* 202/*
182 * Argument syntax: 203 * Argument syntax:
183 * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] 204 * - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS]
184 * 205 *
185 * - Remove uprobe: -:[GRP/]EVENT 206 * - Remove uprobe: -:[GRP/]EVENT
186 */ 207 */
@@ -192,20 +213,23 @@ static int create_trace_uprobe(int argc, char **argv)
192 char buf[MAX_EVENT_NAME_LEN]; 213 char buf[MAX_EVENT_NAME_LEN];
193 struct path path; 214 struct path path;
194 unsigned long offset; 215 unsigned long offset;
195 bool is_delete; 216 bool is_delete, is_return;
196 int i, ret; 217 int i, ret;
197 218
198 inode = NULL; 219 inode = NULL;
199 ret = 0; 220 ret = 0;
200 is_delete = false; 221 is_delete = false;
222 is_return = false;
201 event = NULL; 223 event = NULL;
202 group = NULL; 224 group = NULL;
203 225
204 /* argc must be >= 1 */ 226 /* argc must be >= 1 */
205 if (argv[0][0] == '-') 227 if (argv[0][0] == '-')
206 is_delete = true; 228 is_delete = true;
229 else if (argv[0][0] == 'r')
230 is_return = true;
207 else if (argv[0][0] != 'p') { 231 else if (argv[0][0] != 'p') {
208 pr_info("Probe definition must be started with 'p' or '-'.\n"); 232 pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
209 return -EINVAL; 233 return -EINVAL;
210 } 234 }
211 235
@@ -303,7 +327,7 @@ static int create_trace_uprobe(int argc, char **argv)
303 kfree(tail); 327 kfree(tail);
304 } 328 }
305 329
306 tu = alloc_trace_uprobe(group, event, argc); 330 tu = alloc_trace_uprobe(group, event, argc, is_return);
307 if (IS_ERR(tu)) { 331 if (IS_ERR(tu)) {
308 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); 332 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
309 ret = PTR_ERR(tu); 333 ret = PTR_ERR(tu);
@@ -414,9 +438,10 @@ static void probes_seq_stop(struct seq_file *m, void *v)
414static int probes_seq_show(struct seq_file *m, void *v) 438static int probes_seq_show(struct seq_file *m, void *v)
415{ 439{
416 struct trace_uprobe *tu = v; 440 struct trace_uprobe *tu = v;
441 char c = is_ret_probe(tu) ? 'r' : 'p';
417 int i; 442 int i;
418 443
419 seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name); 444 seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name);
420 seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); 445 seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
421 446
422 for (i = 0; i < tu->nr_args; i++) 447 for (i = 0; i < tu->nr_args; i++)
@@ -485,65 +510,81 @@ static const struct file_operations uprobe_profile_ops = {
485 .release = seq_release, 510 .release = seq_release,
486}; 511};
487 512
488/* uprobe handler */ 513static void uprobe_trace_print(struct trace_uprobe *tu,
489static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) 514 unsigned long func, struct pt_regs *regs)
490{ 515{
491 struct uprobe_trace_entry_head *entry; 516 struct uprobe_trace_entry_head *entry;
492 struct ring_buffer_event *event; 517 struct ring_buffer_event *event;
493 struct ring_buffer *buffer; 518 struct ring_buffer *buffer;
494 u8 *data; 519 void *data;
495 int size, i, pc; 520 int size, i;
496 unsigned long irq_flags;
497 struct ftrace_event_call *call = &tu->call; 521 struct ftrace_event_call *call = &tu->call;
498 522
499 local_save_flags(irq_flags); 523 size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
500 pc = preempt_count();
501
502 size = sizeof(*entry) + tu->size;
503
504 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 524 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
505 size, irq_flags, pc); 525 size + tu->size, 0, 0);
506 if (!event) 526 if (!event)
507 return 0; 527 return;
508 528
509 entry = ring_buffer_event_data(event); 529 entry = ring_buffer_event_data(event);
510 entry->ip = instruction_pointer(task_pt_regs(current)); 530 if (is_ret_probe(tu)) {
511 data = (u8 *)&entry[1]; 531 entry->vaddr[0] = func;
532 entry->vaddr[1] = instruction_pointer(regs);
533 data = DATAOF_TRACE_ENTRY(entry, true);
534 } else {
535 entry->vaddr[0] = instruction_pointer(regs);
536 data = DATAOF_TRACE_ENTRY(entry, false);
537 }
538
512 for (i = 0; i < tu->nr_args; i++) 539 for (i = 0; i < tu->nr_args; i++)
513 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 540 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
514 541
515 if (!filter_current_check_discard(buffer, call, entry, event)) 542 if (!filter_current_check_discard(buffer, call, entry, event))
516 trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 543 trace_buffer_unlock_commit(buffer, event, 0, 0);
544}
517 545
546/* uprobe handler */
547static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
548{
549 if (!is_ret_probe(tu))
550 uprobe_trace_print(tu, 0, regs);
518 return 0; 551 return 0;
519} 552}
520 553
554static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
555 struct pt_regs *regs)
556{
557 uprobe_trace_print(tu, func, regs);
558}
559
521/* Event entry printers */ 560/* Event entry printers */
522static enum print_line_t 561static enum print_line_t
523print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) 562print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
524{ 563{
525 struct uprobe_trace_entry_head *field; 564 struct uprobe_trace_entry_head *entry;
526 struct trace_seq *s = &iter->seq; 565 struct trace_seq *s = &iter->seq;
527 struct trace_uprobe *tu; 566 struct trace_uprobe *tu;
528 u8 *data; 567 u8 *data;
529 int i; 568 int i;
530 569
531 field = (struct uprobe_trace_entry_head *)iter->ent; 570 entry = (struct uprobe_trace_entry_head *)iter->ent;
532 tu = container_of(event, struct trace_uprobe, call.event); 571 tu = container_of(event, struct trace_uprobe, call.event);
533 572
534 if (!trace_seq_printf(s, "%s: (", tu->call.name)) 573 if (is_ret_probe(tu)) {
535 goto partial; 574 if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name,
536 575 entry->vaddr[1], entry->vaddr[0]))
537 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) 576 goto partial;
538 goto partial; 577 data = DATAOF_TRACE_ENTRY(entry, true);
539 578 } else {
540 if (!trace_seq_puts(s, ")")) 579 if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name,
541 goto partial; 580 entry->vaddr[0]))
581 goto partial;
582 data = DATAOF_TRACE_ENTRY(entry, false);
583 }
542 584
543 data = (u8 *)&field[1];
544 for (i = 0; i < tu->nr_args; i++) { 585 for (i = 0; i < tu->nr_args; i++) {
545 if (!tu->args[i].type->print(s, tu->args[i].name, 586 if (!tu->args[i].type->print(s, tu->args[i].name,
546 data + tu->args[i].offset, field)) 587 data + tu->args[i].offset, entry))
547 goto partial; 588 goto partial;
548 } 589 }
549 590
@@ -595,16 +636,23 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag)
595 636
596static int uprobe_event_define_fields(struct ftrace_event_call *event_call) 637static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
597{ 638{
598 int ret, i; 639 int ret, i, size;
599 struct uprobe_trace_entry_head field; 640 struct uprobe_trace_entry_head field;
600 struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data; 641 struct trace_uprobe *tu = event_call->data;
601 642
602 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 643 if (is_ret_probe(tu)) {
644 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0);
645 DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0);
646 size = SIZEOF_TRACE_ENTRY(true);
647 } else {
648 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
649 size = SIZEOF_TRACE_ENTRY(false);
650 }
603 /* Set argument names as fields */ 651 /* Set argument names as fields */
604 for (i = 0; i < tu->nr_args; i++) { 652 for (i = 0; i < tu->nr_args; i++) {
605 ret = trace_define_field(event_call, tu->args[i].type->fmttype, 653 ret = trace_define_field(event_call, tu->args[i].type->fmttype,
606 tu->args[i].name, 654 tu->args[i].name,
607 sizeof(field) + tu->args[i].offset, 655 size + tu->args[i].offset,
608 tu->args[i].type->size, 656 tu->args[i].type->size,
609 tu->args[i].type->is_signed, 657 tu->args[i].type->is_signed,
610 FILTER_OTHER); 658 FILTER_OTHER);
@@ -622,8 +670,13 @@ static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
622 int i; 670 int i;
623 int pos = 0; 671 int pos = 0;
624 672
625 fmt = "(%lx)"; 673 if (is_ret_probe(tu)) {
626 arg = "REC->" FIELD_STRING_IP; 674 fmt = "(%lx <- %lx)";
675 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
676 } else {
677 fmt = "(%lx)";
678 arg = "REC->" FIELD_STRING_IP;
679 }
627 680
628 /* When len=0, we just calculate the needed length */ 681 /* When len=0, we just calculate the needed length */
629 682
@@ -752,49 +805,68 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc,
752 return ret; 805 return ret;
753} 806}
754 807
755/* uprobe profile handler */ 808static void uprobe_perf_print(struct trace_uprobe *tu,
756static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) 809 unsigned long func, struct pt_regs *regs)
757{ 810{
758 struct ftrace_event_call *call = &tu->call; 811 struct ftrace_event_call *call = &tu->call;
759 struct uprobe_trace_entry_head *entry; 812 struct uprobe_trace_entry_head *entry;
760 struct hlist_head *head; 813 struct hlist_head *head;
761 u8 *data; 814 void *data;
762 int size, __size, i; 815 int size, rctx, i;
763 int rctx;
764 816
765 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) 817 size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
766 return UPROBE_HANDLER_REMOVE; 818 size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
767
768 __size = sizeof(*entry) + tu->size;
769 size = ALIGN(__size + sizeof(u32), sizeof(u64));
770 size -= sizeof(u32);
771 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 819 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
772 return 0; 820 return;
773 821
774 preempt_disable(); 822 preempt_disable();
823 head = this_cpu_ptr(call->perf_events);
824 if (hlist_empty(head))
825 goto out;
775 826
776 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 827 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
777 if (!entry) 828 if (!entry)
778 goto out; 829 goto out;
779 830
780 entry->ip = instruction_pointer(task_pt_regs(current)); 831 if (is_ret_probe(tu)) {
781 data = (u8 *)&entry[1]; 832 entry->vaddr[0] = func;
833 entry->vaddr[1] = instruction_pointer(regs);
834 data = DATAOF_TRACE_ENTRY(entry, true);
835 } else {
836 entry->vaddr[0] = instruction_pointer(regs);
837 data = DATAOF_TRACE_ENTRY(entry, false);
838 }
839
782 for (i = 0; i < tu->nr_args; i++) 840 for (i = 0; i < tu->nr_args; i++)
783 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 841 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
784 842
785 head = this_cpu_ptr(call->perf_events); 843 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
786 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
787
788 out: 844 out:
789 preempt_enable(); 845 preempt_enable();
846}
847
848/* uprobe profile handler */
849static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
850{
851 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
852 return UPROBE_HANDLER_REMOVE;
853
854 if (!is_ret_probe(tu))
855 uprobe_perf_print(tu, 0, regs);
790 return 0; 856 return 0;
791} 857}
858
859static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
860 struct pt_regs *regs)
861{
862 uprobe_perf_print(tu, func, regs);
863}
792#endif /* CONFIG_PERF_EVENTS */ 864#endif /* CONFIG_PERF_EVENTS */
793 865
794static 866static
795int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) 867int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
796{ 868{
797 struct trace_uprobe *tu = (struct trace_uprobe *)event->data; 869 struct trace_uprobe *tu = event->data;
798 870
799 switch (type) { 871 switch (type) {
800 case TRACE_REG_REGISTER: 872 case TRACE_REG_REGISTER:
@@ -843,6 +915,23 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
843 return ret; 915 return ret;
844} 916}
845 917
918static int uretprobe_dispatcher(struct uprobe_consumer *con,
919 unsigned long func, struct pt_regs *regs)
920{
921 struct trace_uprobe *tu;
922
923 tu = container_of(con, struct trace_uprobe, consumer);
924
925 if (tu->flags & TP_FLAG_TRACE)
926 uretprobe_trace_func(tu, func, regs);
927
928#ifdef CONFIG_PERF_EVENTS
929 if (tu->flags & TP_FLAG_PROFILE)
930 uretprobe_perf_func(tu, func, regs);
931#endif
932 return 0;
933}
934
846static struct trace_event_functions uprobe_funcs = { 935static struct trace_event_functions uprobe_funcs = {
847 .trace = print_uprobe_event 936 .trace = print_uprobe_event
848}; 937};