diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-04-16 05:04:10 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-04-16 05:04:10 -0400 |
commit | b5210b2a34bae35fc00675462333af45676d727c (patch) | |
tree | 43f67fb370571aedb0ec8a1ad7e8da61834d19b2 | |
parent | f8378f5259647710f0b4ecb814b0a1b0d9040de0 (diff) | |
parent | 515619f209114697fabd21eed1623bfa69746815 (diff) |
Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core
Pull uprobes updates from Oleg Nesterov:
- "uretprobes" - an optimization to uprobes, like kretprobes are an optimization
to kprobes. "perf probe -x file sym%return" now works like kretprobes.
- PowerPC fixes plus a couple of cleanups/optimizations in uprobes and trace_uprobes.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | Documentation/trace/uprobetracer.txt | 114 | ||||
-rw-r--r-- | arch/powerpc/include/asm/uprobes.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/uprobes.c | 29 | ||||
-rw-r--r-- | arch/x86/include/asm/uprobes.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 29 | ||||
-rw-r--r-- | include/linux/uprobes.h | 8 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 300 | ||||
-rw-r--r-- | kernel/trace/trace.h | 5 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 203 |
9 files changed, 526 insertions, 164 deletions
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index 24ce6823a09e..d9c3e682312c 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt | |||
@@ -1,6 +1,8 @@ | |||
1 | Uprobe-tracer: Uprobe-based Event Tracing | 1 | Uprobe-tracer: Uprobe-based Event Tracing |
2 | ========================================= | 2 | ========================================= |
3 | Documentation written by Srikar Dronamraju | 3 | |
4 | Documentation written by Srikar Dronamraju | ||
5 | |||
4 | 6 | ||
5 | Overview | 7 | Overview |
6 | -------- | 8 | -------- |
@@ -13,78 +15,94 @@ current_tracer. Instead of that, add probe points via | |||
13 | /sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled. | 15 | /sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled. |
14 | 16 | ||
15 | However unlike kprobe-event tracer, the uprobe event interface expects the | 17 | However unlike kprobe-event tracer, the uprobe event interface expects the |
16 | user to calculate the offset of the probepoint in the object | 18 | user to calculate the offset of the probepoint in the object. |
17 | 19 | ||
18 | Synopsis of uprobe_tracer | 20 | Synopsis of uprobe_tracer |
19 | ------------------------- | 21 | ------------------------- |
20 | p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe | 22 | p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe |
23 | r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe) | ||
24 | -:[GRP/]EVENT : Clear uprobe or uretprobe event | ||
21 | 25 | ||
22 | GRP : Group name. If omitted, use "uprobes" for it. | 26 | GRP : Group name. If omitted, "uprobes" is the default value. |
23 | EVENT : Event name. If omitted, the event name is generated | 27 | EVENT : Event name. If omitted, the event name is generated based |
24 | based on SYMBOL+offs. | 28 | on SYMBOL+offs. |
25 | PATH : path to an executable or a library. | 29 | PATH : Path to an executable or a library. |
26 | SYMBOL[+offs] : Symbol+offset where the probe is inserted. | 30 | SYMBOL[+offs] : Symbol+offset where the probe is inserted. |
27 | 31 | ||
28 | FETCHARGS : Arguments. Each probe can have up to 128 args. | 32 | FETCHARGS : Arguments. Each probe can have up to 128 args. |
29 | %REG : Fetch register REG | 33 | %REG : Fetch register REG |
30 | 34 | ||
31 | Event Profiling | 35 | Event Profiling |
32 | --------------- | 36 | --------------- |
33 | You can check the total number of probe hits and probe miss-hits via | 37 | You can check the total number of probe hits and probe miss-hits via |
34 | /sys/kernel/debug/tracing/uprobe_profile. | 38 | /sys/kernel/debug/tracing/uprobe_profile. |
35 | The first column is event name, the second is the number of probe hits, | 39 | The first column is event name, the second is the number of probe hits, |
36 | the third is the number of probe miss-hits. | 40 | the third is the number of probe miss-hits. |
37 | 41 | ||
38 | Usage examples | 42 | Usage examples |
39 | -------------- | 43 | -------------- |
40 | To add a probe as a new event, write a new definition to uprobe_events | 44 | * Add a probe as a new uprobe event, write a new definition to uprobe_events |
41 | as below. | 45 | as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash) |
46 | |||
47 | echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events | ||
48 | |||
49 | * Add a probe as a new uretprobe event: | ||
50 | |||
51 | echo 'r: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events | ||
52 | |||
53 | * Unset registered event: | ||
42 | 54 | ||
43 | echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events | 55 | echo '-:bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events |
44 | 56 | ||
45 | This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash | 57 | * Print out the events that are registered: |
46 | 58 | ||
47 | echo > /sys/kernel/debug/tracing/uprobe_events | 59 | cat /sys/kernel/debug/tracing/uprobe_events |
48 | 60 | ||
49 | This clears all probe points. | 61 | * Clear all events: |
50 | 62 | ||
51 | The following example shows how to dump the instruction pointer and %ax | 63 | echo > /sys/kernel/debug/tracing/uprobe_events |
52 | a register at the probed text address. Here we are trying to probe | 64 | |
53 | function zfree in /bin/zsh | 65 | Following example shows how to dump the instruction pointer and %ax register |
66 | at the probed text address. Probe zfree function in /bin/zsh: | ||
54 | 67 | ||
55 | # cd /sys/kernel/debug/tracing/ | 68 | # cd /sys/kernel/debug/tracing/ |
56 | # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp | 69 | # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp |
57 | 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh | 70 | 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh |
58 | # objdump -T /bin/zsh | grep -w zfree | 71 | # objdump -T /bin/zsh | grep -w zfree |
59 | 0000000000446420 g DF .text 0000000000000012 Base zfree | 72 | 0000000000446420 g DF .text 0000000000000012 Base zfree |
60 | 73 | ||
61 | 0x46420 is the offset of zfree in object /bin/zsh that is loaded at | 74 | 0x46420 is the offset of zfree in object /bin/zsh that is loaded at |
62 | 0x00400000. Hence the command to probe would be : | 75 | 0x00400000. Hence the command to uprobe would be: |
76 | |||
77 | # echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events | ||
78 | |||
79 | And the same for the uretprobe would be: | ||
63 | 80 | ||
64 | # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events | 81 | # echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events |
65 | 82 | ||
66 | Please note: User has to explicitly calculate the offset of the probepoint | 83 | Please note: User has to explicitly calculate the offset of the probe-point |
67 | in the object. We can see the events that are registered by looking at the | 84 | in the object. We can see the events that are registered by looking at the |
68 | uprobe_events file. | 85 | uprobe_events file. |
69 | 86 | ||
70 | # cat uprobe_events | 87 | # cat uprobe_events |
71 | p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax | 88 | p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax |
89 | r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax | ||
72 | 90 | ||
73 | The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format | 91 | Format of events can be seen by viewing the file events/uprobes/zfree_entry/format |
74 | 92 | ||
75 | # cat events/uprobes/p_zsh_0x46420/format | 93 | # cat events/uprobes/zfree_entry/format |
76 | name: p_zsh_0x46420 | 94 | name: zfree_entry |
77 | ID: 922 | 95 | ID: 922 |
78 | format: | 96 | format: |
79 | field:unsigned short common_type; offset:0; size:2; signed:0; | 97 | field:unsigned short common_type; offset:0; size:2; signed:0; |
80 | field:unsigned char common_flags; offset:2; size:1; signed:0; | 98 | field:unsigned char common_flags; offset:2; size:1; signed:0; |
81 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | 99 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; |
82 | field:int common_pid; offset:4; size:4; signed:1; | 100 | field:int common_pid; offset:4; size:4; signed:1; |
83 | field:int common_padding; offset:8; size:4; signed:1; | 101 | field:int common_padding; offset:8; size:4; signed:1; |
84 | 102 | ||
85 | field:unsigned long __probe_ip; offset:12; size:4; signed:0; | 103 | field:unsigned long __probe_ip; offset:12; size:4; signed:0; |
86 | field:u32 arg1; offset:16; size:4; signed:0; | 104 | field:u32 arg1; offset:16; size:4; signed:0; |
87 | field:u32 arg2; offset:20; size:4; signed:0; | 105 | field:u32 arg2; offset:20; size:4; signed:0; |
88 | 106 | ||
89 | print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2 | 107 | print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2 |
90 | 108 | ||
@@ -94,6 +112,7 @@ events, you need to enable it by: | |||
94 | # echo 1 > events/uprobes/enable | 112 | # echo 1 > events/uprobes/enable |
95 | 113 | ||
96 | Lets disable the event after sleeping for some time. | 114 | Lets disable the event after sleeping for some time. |
115 | |||
97 | # sleep 20 | 116 | # sleep 20 |
98 | # echo 0 > events/uprobes/enable | 117 | # echo 0 > events/uprobes/enable |
99 | 118 | ||
@@ -104,10 +123,11 @@ And you can see the traced information via /sys/kernel/debug/tracing/trace. | |||
104 | # | 123 | # |
105 | # TASK-PID CPU# TIMESTAMP FUNCTION | 124 | # TASK-PID CPU# TIMESTAMP FUNCTION |
106 | # | | | | | | 125 | # | | | | | |
107 | zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 | 126 | zsh-24842 [006] 258544.995456: zfree_entry: (0x446420) arg1=446420 arg2=79 |
108 | zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 | 127 | zsh-24842 [007] 258545.000270: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0 |
109 | zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 | 128 | zsh-24842 [002] 258545.043929: zfree_entry: (0x446420) arg1=446420 arg2=79 |
110 | zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 | 129 | zsh-24842 [004] 258547.046129: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0 |
111 | 130 | ||
112 | Each line shows us probes were triggered for a pid 24842 with ip being | 131 | Output shows us uprobe was triggered for a pid 24842 with ip being 0x446420 |
113 | 0x446421 and contents of ax register being 79. | 132 | and contents of ax register being 79. And uretprobe was triggered with ip at |
133 | 0x446540 with counterpart function entry at 0x446420. | ||
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index b532060d0916..23016020915e 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h | |||
@@ -51,4 +51,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); | |||
51 | extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); | 51 | extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); |
52 | extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); | 52 | extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); |
53 | extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); | 53 | extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); |
54 | extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); | ||
54 | #endif /* _ASM_UPROBES_H */ | 55 | #endif /* _ASM_UPROBES_H */ |
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index bc77834dbf43..59f419b935f2 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c | |||
@@ -31,6 +31,16 @@ | |||
31 | #define UPROBE_TRAP_NR UINT_MAX | 31 | #define UPROBE_TRAP_NR UINT_MAX |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * is_trap_insn - check if the instruction is a trap variant | ||
35 | * @insn: instruction to be checked. | ||
36 | * Returns true if @insn is a trap variant. | ||
37 | */ | ||
38 | bool is_trap_insn(uprobe_opcode_t *insn) | ||
39 | { | ||
40 | return (is_trap(*insn)); | ||
41 | } | ||
42 | |||
43 | /** | ||
34 | * arch_uprobe_analyze_insn | 44 | * arch_uprobe_analyze_insn |
35 | * @mm: the probed address space. | 45 | * @mm: the probed address space. |
36 | * @arch_uprobe: the probepoint information. | 46 | * @arch_uprobe: the probepoint information. |
@@ -43,12 +53,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, | |||
43 | if (addr & 0x03) | 53 | if (addr & 0x03) |
44 | return -EINVAL; | 54 | return -EINVAL; |
45 | 55 | ||
46 | /* | ||
47 | * We currently don't support a uprobe on an already | ||
48 | * existing breakpoint instruction underneath | ||
49 | */ | ||
50 | if (is_trap(auprobe->ainsn)) | ||
51 | return -ENOTSUPP; | ||
52 | return 0; | 56 | return 0; |
53 | } | 57 | } |
54 | 58 | ||
@@ -188,3 +192,16 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
188 | 192 | ||
189 | return false; | 193 | return false; |
190 | } | 194 | } |
195 | |||
196 | unsigned long | ||
197 | arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) | ||
198 | { | ||
199 | unsigned long orig_ret_vaddr; | ||
200 | |||
201 | orig_ret_vaddr = regs->link; | ||
202 | |||
203 | /* Replace the return addr with trampoline addr */ | ||
204 | regs->link = trampoline_vaddr; | ||
205 | |||
206 | return orig_ret_vaddr; | ||
207 | } | ||
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 8ff8be7835ab..6e5197910fd8 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -55,4 +55,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); | |||
55 | extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); | 55 | extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); |
56 | extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); | 56 | extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); |
57 | extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); | 57 | extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); |
58 | extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); | ||
58 | #endif /* _ASM_UPROBES_H */ | 59 | #endif /* _ASM_UPROBES_H */ |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 0ba4cfb4f412..2ed845928b5f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -697,3 +697,32 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
697 | send_sig(SIGTRAP, current, 0); | 697 | send_sig(SIGTRAP, current, 0); |
698 | return ret; | 698 | return ret; |
699 | } | 699 | } |
700 | |||
701 | unsigned long | ||
702 | arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) | ||
703 | { | ||
704 | int rasize, ncopied; | ||
705 | unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ | ||
706 | |||
707 | rasize = is_ia32_task() ? 4 : 8; | ||
708 | ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize); | ||
709 | if (unlikely(ncopied)) | ||
710 | return -1; | ||
711 | |||
712 | /* check whether address has been already hijacked */ | ||
713 | if (orig_ret_vaddr == trampoline_vaddr) | ||
714 | return orig_ret_vaddr; | ||
715 | |||
716 | ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); | ||
717 | if (likely(!ncopied)) | ||
718 | return orig_ret_vaddr; | ||
719 | |||
720 | if (ncopied != rasize) { | ||
721 | pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " | ||
722 | "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); | ||
723 | |||
724 | force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); | ||
725 | } | ||
726 | |||
727 | return -1; | ||
728 | } | ||
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 02b83db8e2c5..06f28beed7c2 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h | |||
@@ -38,6 +38,8 @@ struct inode; | |||
38 | #define UPROBE_HANDLER_REMOVE 1 | 38 | #define UPROBE_HANDLER_REMOVE 1 |
39 | #define UPROBE_HANDLER_MASK 1 | 39 | #define UPROBE_HANDLER_MASK 1 |
40 | 40 | ||
41 | #define MAX_URETPROBE_DEPTH 64 | ||
42 | |||
41 | enum uprobe_filter_ctx { | 43 | enum uprobe_filter_ctx { |
42 | UPROBE_FILTER_REGISTER, | 44 | UPROBE_FILTER_REGISTER, |
43 | UPROBE_FILTER_UNREGISTER, | 45 | UPROBE_FILTER_UNREGISTER, |
@@ -46,6 +48,9 @@ enum uprobe_filter_ctx { | |||
46 | 48 | ||
47 | struct uprobe_consumer { | 49 | struct uprobe_consumer { |
48 | int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); | 50 | int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); |
51 | int (*ret_handler)(struct uprobe_consumer *self, | ||
52 | unsigned long func, | ||
53 | struct pt_regs *regs); | ||
49 | bool (*filter)(struct uprobe_consumer *self, | 54 | bool (*filter)(struct uprobe_consumer *self, |
50 | enum uprobe_filter_ctx ctx, | 55 | enum uprobe_filter_ctx ctx, |
51 | struct mm_struct *mm); | 56 | struct mm_struct *mm); |
@@ -68,6 +73,8 @@ struct uprobe_task { | |||
68 | enum uprobe_task_state state; | 73 | enum uprobe_task_state state; |
69 | struct arch_uprobe_task autask; | 74 | struct arch_uprobe_task autask; |
70 | 75 | ||
76 | struct return_instance *return_instances; | ||
77 | unsigned int depth; | ||
71 | struct uprobe *active_uprobe; | 78 | struct uprobe *active_uprobe; |
72 | 79 | ||
73 | unsigned long xol_vaddr; | 80 | unsigned long xol_vaddr; |
@@ -100,6 +107,7 @@ struct uprobes_state { | |||
100 | extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); | 107 | extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); |
101 | extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); | 108 | extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); |
102 | extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); | 109 | extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); |
110 | extern bool __weak is_trap_insn(uprobe_opcode_t *insn); | ||
103 | extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); | 111 | extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); |
104 | extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); | 112 | extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); |
105 | extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); | 113 | extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a567c8c7ef31..f3569747d629 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -75,6 +75,15 @@ struct uprobe { | |||
75 | struct arch_uprobe arch; | 75 | struct arch_uprobe arch; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | struct return_instance { | ||
79 | struct uprobe *uprobe; | ||
80 | unsigned long func; | ||
81 | unsigned long orig_ret_vaddr; /* original return address */ | ||
82 | bool chained; /* true, if instance is nested */ | ||
83 | |||
84 | struct return_instance *next; /* keep as stack */ | ||
85 | }; | ||
86 | |||
78 | /* | 87 | /* |
79 | * valid_vma: Verify if the specified vma is an executable vma | 88 | * valid_vma: Verify if the specified vma is an executable vma |
80 | * Relax restrictions while unregistering: vm_flags might have | 89 | * Relax restrictions while unregistering: vm_flags might have |
@@ -173,10 +182,31 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn) | |||
173 | return *insn == UPROBE_SWBP_INSN; | 182 | return *insn == UPROBE_SWBP_INSN; |
174 | } | 183 | } |
175 | 184 | ||
176 | static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) | 185 | /** |
186 | * is_trap_insn - check if instruction is breakpoint instruction. | ||
187 | * @insn: instruction to be checked. | ||
188 | * Default implementation of is_trap_insn | ||
189 | * Returns true if @insn is a breakpoint instruction. | ||
190 | * | ||
191 | * This function is needed for the case where an architecture has multiple | ||
192 | * trap instructions (like powerpc). | ||
193 | */ | ||
194 | bool __weak is_trap_insn(uprobe_opcode_t *insn) | ||
195 | { | ||
196 | return is_swbp_insn(insn); | ||
197 | } | ||
198 | |||
199 | static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) | ||
177 | { | 200 | { |
178 | void *kaddr = kmap_atomic(page); | 201 | void *kaddr = kmap_atomic(page); |
179 | memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); | 202 | memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len); |
203 | kunmap_atomic(kaddr); | ||
204 | } | ||
205 | |||
206 | static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len) | ||
207 | { | ||
208 | void *kaddr = kmap_atomic(page); | ||
209 | memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len); | ||
180 | kunmap_atomic(kaddr); | 210 | kunmap_atomic(kaddr); |
181 | } | 211 | } |
182 | 212 | ||
@@ -185,7 +215,16 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
185 | uprobe_opcode_t old_opcode; | 215 | uprobe_opcode_t old_opcode; |
186 | bool is_swbp; | 216 | bool is_swbp; |
187 | 217 | ||
188 | copy_opcode(page, vaddr, &old_opcode); | 218 | /* |
219 | * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here. | ||
220 | * We do not check if it is any other 'trap variant' which could | ||
221 | * be conditional trap instruction such as the one powerpc supports. | ||
222 | * | ||
223 | * The logic is that we do not care if the underlying instruction | ||
224 | * is a trap variant; uprobes always wins over any other (gdb) | ||
225 | * breakpoint. | ||
226 | */ | ||
227 | copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); | ||
189 | is_swbp = is_swbp_insn(&old_opcode); | 228 | is_swbp = is_swbp_insn(&old_opcode); |
190 | 229 | ||
191 | if (is_swbp_insn(new_opcode)) { | 230 | if (is_swbp_insn(new_opcode)) { |
@@ -204,7 +243,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
204 | * Expect the breakpoint instruction to be the smallest size instruction for | 243 | * Expect the breakpoint instruction to be the smallest size instruction for |
205 | * the architecture. If an arch has variable length instruction and the | 244 | * the architecture. If an arch has variable length instruction and the |
206 | * breakpoint instruction is not of the smallest length instruction | 245 | * breakpoint instruction is not of the smallest length instruction |
207 | * supported by that architecture then we need to modify is_swbp_at_addr and | 246 | * supported by that architecture then we need to modify is_trap_at_addr and |
208 | * write_opcode accordingly. This would never be a problem for archs that | 247 | * write_opcode accordingly. This would never be a problem for archs that |
209 | * have fixed length instructions. | 248 | * have fixed length instructions. |
210 | */ | 249 | */ |
@@ -225,7 +264,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr, | |||
225 | uprobe_opcode_t opcode) | 264 | uprobe_opcode_t opcode) |
226 | { | 265 | { |
227 | struct page *old_page, *new_page; | 266 | struct page *old_page, *new_page; |
228 | void *vaddr_old, *vaddr_new; | ||
229 | struct vm_area_struct *vma; | 267 | struct vm_area_struct *vma; |
230 | int ret; | 268 | int ret; |
231 | 269 | ||
@@ -246,15 +284,8 @@ retry: | |||
246 | 284 | ||
247 | __SetPageUptodate(new_page); | 285 | __SetPageUptodate(new_page); |
248 | 286 | ||
249 | /* copy the page now that we've got it stable */ | 287 | copy_highpage(new_page, old_page); |
250 | vaddr_old = kmap_atomic(old_page); | 288 | copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
251 | vaddr_new = kmap_atomic(new_page); | ||
252 | |||
253 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); | ||
254 | memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); | ||
255 | |||
256 | kunmap_atomic(vaddr_new); | ||
257 | kunmap_atomic(vaddr_old); | ||
258 | 289 | ||
259 | ret = anon_vma_prepare(vma); | 290 | ret = anon_vma_prepare(vma); |
260 | if (ret) | 291 | if (ret) |
@@ -477,30 +508,18 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, | |||
477 | unsigned long nbytes, loff_t offset) | 508 | unsigned long nbytes, loff_t offset) |
478 | { | 509 | { |
479 | struct page *page; | 510 | struct page *page; |
480 | void *vaddr; | ||
481 | unsigned long off; | ||
482 | pgoff_t idx; | ||
483 | |||
484 | if (!filp) | ||
485 | return -EINVAL; | ||
486 | 511 | ||
487 | if (!mapping->a_ops->readpage) | 512 | if (!mapping->a_ops->readpage) |
488 | return -EIO; | 513 | return -EIO; |
489 | |||
490 | idx = offset >> PAGE_CACHE_SHIFT; | ||
491 | off = offset & ~PAGE_MASK; | ||
492 | |||
493 | /* | 514 | /* |
494 | * Ensure that the page that has the original instruction is | 515 | * Ensure that the page that has the original instruction is |
495 | * populated and in page-cache. | 516 | * populated and in page-cache. |
496 | */ | 517 | */ |
497 | page = read_mapping_page(mapping, idx, filp); | 518 | page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); |
498 | if (IS_ERR(page)) | 519 | if (IS_ERR(page)) |
499 | return PTR_ERR(page); | 520 | return PTR_ERR(page); |
500 | 521 | ||
501 | vaddr = kmap_atomic(page); | 522 | copy_from_page(page, offset, insn, nbytes); |
502 | memcpy(insn, vaddr + off, nbytes); | ||
503 | kunmap_atomic(vaddr); | ||
504 | page_cache_release(page); | 523 | page_cache_release(page); |
505 | 524 | ||
506 | return 0; | 525 | return 0; |
@@ -550,7 +569,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, | |||
550 | goto out; | 569 | goto out; |
551 | 570 | ||
552 | ret = -ENOTSUPP; | 571 | ret = -ENOTSUPP; |
553 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) | 572 | if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn)) |
554 | goto out; | 573 | goto out; |
555 | 574 | ||
556 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); | 575 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); |
@@ -758,7 +777,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) | |||
758 | down_write(&mm->mmap_sem); | 777 | down_write(&mm->mmap_sem); |
759 | vma = find_vma(mm, info->vaddr); | 778 | vma = find_vma(mm, info->vaddr); |
760 | if (!vma || !valid_vma(vma, is_register) || | 779 | if (!vma || !valid_vma(vma, is_register) || |
761 | vma->vm_file->f_mapping->host != uprobe->inode) | 780 | file_inode(vma->vm_file) != uprobe->inode) |
762 | goto unlock; | 781 | goto unlock; |
763 | 782 | ||
764 | if (vma->vm_start > info->vaddr || | 783 | if (vma->vm_start > info->vaddr || |
@@ -828,6 +847,10 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * | |||
828 | struct uprobe *uprobe; | 847 | struct uprobe *uprobe; |
829 | int ret; | 848 | int ret; |
830 | 849 | ||
850 | /* Uprobe must have at least one set consumer */ | ||
851 | if (!uc->handler && !uc->ret_handler) | ||
852 | return -EINVAL; | ||
853 | |||
831 | /* Racy, just to catch the obvious mistakes */ | 854 | /* Racy, just to catch the obvious mistakes */ |
832 | if (offset > i_size_read(inode)) | 855 | if (offset > i_size_read(inode)) |
833 | return -EINVAL; | 856 | return -EINVAL; |
@@ -917,7 +940,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) | |||
917 | loff_t offset; | 940 | loff_t offset; |
918 | 941 | ||
919 | if (!valid_vma(vma, false) || | 942 | if (!valid_vma(vma, false) || |
920 | vma->vm_file->f_mapping->host != uprobe->inode) | 943 | file_inode(vma->vm_file) != uprobe->inode) |
921 | continue; | 944 | continue; |
922 | 945 | ||
923 | offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; | 946 | offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; |
@@ -1010,7 +1033,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1010 | if (no_uprobe_events() || !valid_vma(vma, true)) | 1033 | if (no_uprobe_events() || !valid_vma(vma, true)) |
1011 | return 0; | 1034 | return 0; |
1012 | 1035 | ||
1013 | inode = vma->vm_file->f_mapping->host; | 1036 | inode = file_inode(vma->vm_file); |
1014 | if (!inode) | 1037 | if (!inode) |
1015 | return 0; | 1038 | return 0; |
1016 | 1039 | ||
@@ -1041,7 +1064,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e | |||
1041 | struct inode *inode; | 1064 | struct inode *inode; |
1042 | struct rb_node *n; | 1065 | struct rb_node *n; |
1043 | 1066 | ||
1044 | inode = vma->vm_file->f_mapping->host; | 1067 | inode = file_inode(vma->vm_file); |
1045 | 1068 | ||
1046 | min = vaddr_to_offset(vma, start); | 1069 | min = vaddr_to_offset(vma, start); |
1047 | max = min + (end - start) - 1; | 1070 | max = min + (end - start) - 1; |
@@ -1114,6 +1137,7 @@ static struct xol_area *get_xol_area(void) | |||
1114 | { | 1137 | { |
1115 | struct mm_struct *mm = current->mm; | 1138 | struct mm_struct *mm = current->mm; |
1116 | struct xol_area *area; | 1139 | struct xol_area *area; |
1140 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; | ||
1117 | 1141 | ||
1118 | area = mm->uprobes_state.xol_area; | 1142 | area = mm->uprobes_state.xol_area; |
1119 | if (area) | 1143 | if (area) |
@@ -1131,7 +1155,12 @@ static struct xol_area *get_xol_area(void) | |||
1131 | if (!area->page) | 1155 | if (!area->page) |
1132 | goto free_bitmap; | 1156 | goto free_bitmap; |
1133 | 1157 | ||
1158 | /* allocate first slot of task's xol_area for the return probes */ | ||
1159 | set_bit(0, area->bitmap); | ||
1160 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); | ||
1161 | atomic_set(&area->slot_count, 1); | ||
1134 | init_waitqueue_head(&area->wq); | 1162 | init_waitqueue_head(&area->wq); |
1163 | |||
1135 | if (!xol_add_vma(area)) | 1164 | if (!xol_add_vma(area)) |
1136 | return area; | 1165 | return area; |
1137 | 1166 | ||
@@ -1216,9 +1245,7 @@ static unsigned long xol_take_insn_slot(struct xol_area *area) | |||
1216 | static unsigned long xol_get_insn_slot(struct uprobe *uprobe) | 1245 | static unsigned long xol_get_insn_slot(struct uprobe *uprobe) |
1217 | { | 1246 | { |
1218 | struct xol_area *area; | 1247 | struct xol_area *area; |
1219 | unsigned long offset; | ||
1220 | unsigned long xol_vaddr; | 1248 | unsigned long xol_vaddr; |
1221 | void *vaddr; | ||
1222 | 1249 | ||
1223 | area = get_xol_area(); | 1250 | area = get_xol_area(); |
1224 | if (!area) | 1251 | if (!area) |
@@ -1229,10 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) | |||
1229 | return 0; | 1256 | return 0; |
1230 | 1257 | ||
1231 | /* Initialize the slot */ | 1258 | /* Initialize the slot */ |
1232 | offset = xol_vaddr & ~PAGE_MASK; | 1259 | copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); |
1233 | vaddr = kmap_atomic(area->page); | ||
1234 | memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); | ||
1235 | kunmap_atomic(vaddr); | ||
1236 | /* | 1260 | /* |
1237 | * We probably need flush_icache_user_range() but it needs vma. | 1261 | * We probably need flush_icache_user_range() but it needs vma. |
1238 | * This should work on supported architectures too. | 1262 | * This should work on supported architectures too. |
@@ -1298,6 +1322,7 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) | |||
1298 | void uprobe_free_utask(struct task_struct *t) | 1322 | void uprobe_free_utask(struct task_struct *t) |
1299 | { | 1323 | { |
1300 | struct uprobe_task *utask = t->utask; | 1324 | struct uprobe_task *utask = t->utask; |
1325 | struct return_instance *ri, *tmp; | ||
1301 | 1326 | ||
1302 | if (!utask) | 1327 | if (!utask) |
1303 | return; | 1328 | return; |
@@ -1305,6 +1330,15 @@ void uprobe_free_utask(struct task_struct *t) | |||
1305 | if (utask->active_uprobe) | 1330 | if (utask->active_uprobe) |
1306 | put_uprobe(utask->active_uprobe); | 1331 | put_uprobe(utask->active_uprobe); |
1307 | 1332 | ||
1333 | ri = utask->return_instances; | ||
1334 | while (ri) { | ||
1335 | tmp = ri; | ||
1336 | ri = ri->next; | ||
1337 | |||
1338 | put_uprobe(tmp->uprobe); | ||
1339 | kfree(tmp); | ||
1340 | } | ||
1341 | |||
1308 | xol_free_insn_slot(t); | 1342 | xol_free_insn_slot(t); |
1309 | kfree(utask); | 1343 | kfree(utask); |
1310 | t->utask = NULL; | 1344 | t->utask = NULL; |
@@ -1333,6 +1367,93 @@ static struct uprobe_task *get_utask(void) | |||
1333 | return current->utask; | 1367 | return current->utask; |
1334 | } | 1368 | } |
1335 | 1369 | ||
1370 | /* | ||
1371 | * Current area->vaddr notion assume the trampoline address is always | ||
1372 | * equal area->vaddr. | ||
1373 | * | ||
1374 | * Returns -1 in case the xol_area is not allocated. | ||
1375 | */ | ||
1376 | static unsigned long get_trampoline_vaddr(void) | ||
1377 | { | ||
1378 | struct xol_area *area; | ||
1379 | unsigned long trampoline_vaddr = -1; | ||
1380 | |||
1381 | area = current->mm->uprobes_state.xol_area; | ||
1382 | smp_read_barrier_depends(); | ||
1383 | if (area) | ||
1384 | trampoline_vaddr = area->vaddr; | ||
1385 | |||
1386 | return trampoline_vaddr; | ||
1387 | } | ||
1388 | |||
1389 | static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) | ||
1390 | { | ||
1391 | struct return_instance *ri; | ||
1392 | struct uprobe_task *utask; | ||
1393 | unsigned long orig_ret_vaddr, trampoline_vaddr; | ||
1394 | bool chained = false; | ||
1395 | |||
1396 | if (!get_xol_area()) | ||
1397 | return; | ||
1398 | |||
1399 | utask = get_utask(); | ||
1400 | if (!utask) | ||
1401 | return; | ||
1402 | |||
1403 | if (utask->depth >= MAX_URETPROBE_DEPTH) { | ||
1404 | printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" | ||
1405 | " nestedness limit pid/tgid=%d/%d\n", | ||
1406 | current->pid, current->tgid); | ||
1407 | return; | ||
1408 | } | ||
1409 | |||
1410 | ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL); | ||
1411 | if (!ri) | ||
1412 | goto fail; | ||
1413 | |||
1414 | trampoline_vaddr = get_trampoline_vaddr(); | ||
1415 | orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); | ||
1416 | if (orig_ret_vaddr == -1) | ||
1417 | goto fail; | ||
1418 | |||
1419 | /* | ||
1420 | * We don't want to keep trampoline address in stack, rather keep the | ||
1421 | * original return address of first caller thru all the consequent | ||
1422 | * instances. This also makes breakpoint unwrapping easier. | ||
1423 | */ | ||
1424 | if (orig_ret_vaddr == trampoline_vaddr) { | ||
1425 | if (!utask->return_instances) { | ||
1426 | /* | ||
1427 | * This situation is not possible. Likely we have an | ||
1428 | * attack from user-space. | ||
1429 | */ | ||
1430 | pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n", | ||
1431 | current->pid, current->tgid); | ||
1432 | goto fail; | ||
1433 | } | ||
1434 | |||
1435 | chained = true; | ||
1436 | orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; | ||
1437 | } | ||
1438 | |||
1439 | atomic_inc(&uprobe->ref); | ||
1440 | ri->uprobe = uprobe; | ||
1441 | ri->func = instruction_pointer(regs); | ||
1442 | ri->orig_ret_vaddr = orig_ret_vaddr; | ||
1443 | ri->chained = chained; | ||
1444 | |||
1445 | utask->depth++; | ||
1446 | |||
1447 | /* add instance to the stack */ | ||
1448 | ri->next = utask->return_instances; | ||
1449 | utask->return_instances = ri; | ||
1450 | |||
1451 | return; | ||
1452 | |||
1453 | fail: | ||
1454 | kfree(ri); | ||
1455 | } | ||
1456 | |||
1336 | /* Prepare to single-step probed instruction out of line. */ | 1457 | /* Prepare to single-step probed instruction out of line. */ |
1337 | static int | 1458 | static int |
1338 | pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) | 1459 | pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) |
@@ -1431,7 +1552,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) | |||
1431 | clear_bit(MMF_HAS_UPROBES, &mm->flags); | 1552 | clear_bit(MMF_HAS_UPROBES, &mm->flags); |
1432 | } | 1553 | } |
1433 | 1554 | ||
1434 | static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | 1555 | static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) |
1435 | { | 1556 | { |
1436 | struct page *page; | 1557 | struct page *page; |
1437 | uprobe_opcode_t opcode; | 1558 | uprobe_opcode_t opcode; |
@@ -1449,10 +1570,11 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
1449 | if (result < 0) | 1570 | if (result < 0) |
1450 | return result; | 1571 | return result; |
1451 | 1572 | ||
1452 | copy_opcode(page, vaddr, &opcode); | 1573 | copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
1453 | put_page(page); | 1574 | put_page(page); |
1454 | out: | 1575 | out: |
1455 | return is_swbp_insn(&opcode); | 1576 | /* This needs to return true for any variant of the trap insn */ |
1577 | return is_trap_insn(&opcode); | ||
1456 | } | 1578 | } |
1457 | 1579 | ||
1458 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | 1580 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) |
@@ -1465,14 +1587,14 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | |||
1465 | vma = find_vma(mm, bp_vaddr); | 1587 | vma = find_vma(mm, bp_vaddr); |
1466 | if (vma && vma->vm_start <= bp_vaddr) { | 1588 | if (vma && vma->vm_start <= bp_vaddr) { |
1467 | if (valid_vma(vma, false)) { | 1589 | if (valid_vma(vma, false)) { |
1468 | struct inode *inode = vma->vm_file->f_mapping->host; | 1590 | struct inode *inode = file_inode(vma->vm_file); |
1469 | loff_t offset = vaddr_to_offset(vma, bp_vaddr); | 1591 | loff_t offset = vaddr_to_offset(vma, bp_vaddr); |
1470 | 1592 | ||
1471 | uprobe = find_uprobe(inode, offset); | 1593 | uprobe = find_uprobe(inode, offset); |
1472 | } | 1594 | } |
1473 | 1595 | ||
1474 | if (!uprobe) | 1596 | if (!uprobe) |
1475 | *is_swbp = is_swbp_at_addr(mm, bp_vaddr); | 1597 | *is_swbp = is_trap_at_addr(mm, bp_vaddr); |
1476 | } else { | 1598 | } else { |
1477 | *is_swbp = -EFAULT; | 1599 | *is_swbp = -EFAULT; |
1478 | } | 1600 | } |
@@ -1488,16 +1610,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) | |||
1488 | { | 1610 | { |
1489 | struct uprobe_consumer *uc; | 1611 | struct uprobe_consumer *uc; |
1490 | int remove = UPROBE_HANDLER_REMOVE; | 1612 | int remove = UPROBE_HANDLER_REMOVE; |
1613 | bool need_prep = false; /* prepare return uprobe, when needed */ | ||
1491 | 1614 | ||
1492 | down_read(&uprobe->register_rwsem); | 1615 | down_read(&uprobe->register_rwsem); |
1493 | for (uc = uprobe->consumers; uc; uc = uc->next) { | 1616 | for (uc = uprobe->consumers; uc; uc = uc->next) { |
1494 | int rc = uc->handler(uc, regs); | 1617 | int rc = 0; |
1618 | |||
1619 | if (uc->handler) { | ||
1620 | rc = uc->handler(uc, regs); | ||
1621 | WARN(rc & ~UPROBE_HANDLER_MASK, | ||
1622 | "bad rc=0x%x from %pf()\n", rc, uc->handler); | ||
1623 | } | ||
1624 | |||
1625 | if (uc->ret_handler) | ||
1626 | need_prep = true; | ||
1495 | 1627 | ||
1496 | WARN(rc & ~UPROBE_HANDLER_MASK, | ||
1497 | "bad rc=0x%x from %pf()\n", rc, uc->handler); | ||
1498 | remove &= rc; | 1628 | remove &= rc; |
1499 | } | 1629 | } |
1500 | 1630 | ||
1631 | if (need_prep && !remove) | ||
1632 | prepare_uretprobe(uprobe, regs); /* put bp at return */ | ||
1633 | |||
1501 | if (remove && uprobe->consumers) { | 1634 | if (remove && uprobe->consumers) { |
1502 | WARN_ON(!uprobe_is_active(uprobe)); | 1635 | WARN_ON(!uprobe_is_active(uprobe)); |
1503 | unapply_uprobe(uprobe, current->mm); | 1636 | unapply_uprobe(uprobe, current->mm); |
@@ -1505,6 +1638,64 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) | |||
1505 | up_read(&uprobe->register_rwsem); | 1638 | up_read(&uprobe->register_rwsem); |
1506 | } | 1639 | } |
1507 | 1640 | ||
1641 | static void | ||
1642 | handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) | ||
1643 | { | ||
1644 | struct uprobe *uprobe = ri->uprobe; | ||
1645 | struct uprobe_consumer *uc; | ||
1646 | |||
1647 | down_read(&uprobe->register_rwsem); | ||
1648 | for (uc = uprobe->consumers; uc; uc = uc->next) { | ||
1649 | if (uc->ret_handler) | ||
1650 | uc->ret_handler(uc, ri->func, regs); | ||
1651 | } | ||
1652 | up_read(&uprobe->register_rwsem); | ||
1653 | } | ||
1654 | |||
1655 | static bool handle_trampoline(struct pt_regs *regs) | ||
1656 | { | ||
1657 | struct uprobe_task *utask; | ||
1658 | struct return_instance *ri, *tmp; | ||
1659 | bool chained; | ||
1660 | |||
1661 | utask = current->utask; | ||
1662 | if (!utask) | ||
1663 | return false; | ||
1664 | |||
1665 | ri = utask->return_instances; | ||
1666 | if (!ri) | ||
1667 | return false; | ||
1668 | |||
1669 | /* | ||
1670 | * TODO: we should throw out return_instance's invalidated by | ||
1671 | * longjmp(), currently we assume that the probed function always | ||
1672 | * returns. | ||
1673 | */ | ||
1674 | instruction_pointer_set(regs, ri->orig_ret_vaddr); | ||
1675 | |||
1676 | for (;;) { | ||
1677 | handle_uretprobe_chain(ri, regs); | ||
1678 | |||
1679 | chained = ri->chained; | ||
1680 | put_uprobe(ri->uprobe); | ||
1681 | |||
1682 | tmp = ri; | ||
1683 | ri = ri->next; | ||
1684 | kfree(tmp); | ||
1685 | |||
1686 | if (!chained) | ||
1687 | break; | ||
1688 | |||
1689 | utask->depth--; | ||
1690 | |||
1691 | BUG_ON(!ri); | ||
1692 | } | ||
1693 | |||
1694 | utask->return_instances = ri; | ||
1695 | |||
1696 | return true; | ||
1697 | } | ||
1698 | |||
1508 | /* | 1699 | /* |
1509 | * Run handler and ask thread to singlestep. | 1700 | * Run handler and ask thread to singlestep. |
1510 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. | 1701 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. |
@@ -1516,8 +1707,15 @@ static void handle_swbp(struct pt_regs *regs) | |||
1516 | int uninitialized_var(is_swbp); | 1707 | int uninitialized_var(is_swbp); |
1517 | 1708 | ||
1518 | bp_vaddr = uprobe_get_swbp_addr(regs); | 1709 | bp_vaddr = uprobe_get_swbp_addr(regs); |
1519 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); | 1710 | if (bp_vaddr == get_trampoline_vaddr()) { |
1711 | if (handle_trampoline(regs)) | ||
1712 | return; | ||
1713 | |||
1714 | pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n", | ||
1715 | current->pid, current->tgid); | ||
1716 | } | ||
1520 | 1717 | ||
1718 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); | ||
1521 | if (!uprobe) { | 1719 | if (!uprobe) { |
1522 | if (is_swbp > 0) { | 1720 | if (is_swbp > 0) { |
1523 | /* No matching uprobe; signal SIGTRAP. */ | 1721 | /* No matching uprobe; signal SIGTRAP. */ |
@@ -1616,7 +1814,11 @@ void uprobe_notify_resume(struct pt_regs *regs) | |||
1616 | */ | 1814 | */ |
1617 | int uprobe_pre_sstep_notifier(struct pt_regs *regs) | 1815 | int uprobe_pre_sstep_notifier(struct pt_regs *regs) |
1618 | { | 1816 | { |
1619 | if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) | 1817 | if (!current->mm) |
1818 | return 0; | ||
1819 | |||
1820 | if (!test_bit(MMF_HAS_UPROBES, ¤t->mm->flags) && | ||
1821 | (!current->utask || !current->utask->return_instances)) | ||
1620 | return 0; | 1822 | return 0; |
1621 | 1823 | ||
1622 | set_thread_flag(TIF_UPROBE); | 1824 | set_thread_flag(TIF_UPROBE); |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2081971367ea..8bed1dfcb938 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -103,11 +103,6 @@ struct kretprobe_trace_entry_head { | |||
103 | unsigned long ret_ip; | 103 | unsigned long ret_ip; |
104 | }; | 104 | }; |
105 | 105 | ||
106 | struct uprobe_trace_entry_head { | ||
107 | struct trace_entry ent; | ||
108 | unsigned long ip; | ||
109 | }; | ||
110 | |||
111 | /* | 106 | /* |
112 | * trace_flag_type is an enumeration that holds different | 107 | * trace_flag_type is an enumeration that holds different |
113 | * states when a trace occurs. These are: | 108 | * states when a trace occurs. These are: |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8dad2a92dee9..32494fb0ee64 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -28,6 +28,18 @@ | |||
28 | 28 | ||
29 | #define UPROBE_EVENT_SYSTEM "uprobes" | 29 | #define UPROBE_EVENT_SYSTEM "uprobes" |
30 | 30 | ||
31 | struct uprobe_trace_entry_head { | ||
32 | struct trace_entry ent; | ||
33 | unsigned long vaddr[]; | ||
34 | }; | ||
35 | |||
36 | #define SIZEOF_TRACE_ENTRY(is_return) \ | ||
37 | (sizeof(struct uprobe_trace_entry_head) + \ | ||
38 | sizeof(unsigned long) * (is_return ? 2 : 1)) | ||
39 | |||
40 | #define DATAOF_TRACE_ENTRY(entry, is_return) \ | ||
41 | ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) | ||
42 | |||
31 | struct trace_uprobe_filter { | 43 | struct trace_uprobe_filter { |
32 | rwlock_t rwlock; | 44 | rwlock_t rwlock; |
33 | int nr_systemwide; | 45 | int nr_systemwide; |
@@ -64,6 +76,8 @@ static DEFINE_MUTEX(uprobe_lock); | |||
64 | static LIST_HEAD(uprobe_list); | 76 | static LIST_HEAD(uprobe_list); |
65 | 77 | ||
66 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); | 78 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); |
79 | static int uretprobe_dispatcher(struct uprobe_consumer *con, | ||
80 | unsigned long func, struct pt_regs *regs); | ||
67 | 81 | ||
68 | static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) | 82 | static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) |
69 | { | 83 | { |
@@ -77,11 +91,16 @@ static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter) | |||
77 | return !filter->nr_systemwide && list_empty(&filter->perf_events); | 91 | return !filter->nr_systemwide && list_empty(&filter->perf_events); |
78 | } | 92 | } |
79 | 93 | ||
94 | static inline bool is_ret_probe(struct trace_uprobe *tu) | ||
95 | { | ||
96 | return tu->consumer.ret_handler != NULL; | ||
97 | } | ||
98 | |||
80 | /* | 99 | /* |
81 | * Allocate new trace_uprobe and initialize it (including uprobes). | 100 | * Allocate new trace_uprobe and initialize it (including uprobes). |
82 | */ | 101 | */ |
83 | static struct trace_uprobe * | 102 | static struct trace_uprobe * |
84 | alloc_trace_uprobe(const char *group, const char *event, int nargs) | 103 | alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) |
85 | { | 104 | { |
86 | struct trace_uprobe *tu; | 105 | struct trace_uprobe *tu; |
87 | 106 | ||
@@ -106,6 +125,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs) | |||
106 | 125 | ||
107 | INIT_LIST_HEAD(&tu->list); | 126 | INIT_LIST_HEAD(&tu->list); |
108 | tu->consumer.handler = uprobe_dispatcher; | 127 | tu->consumer.handler = uprobe_dispatcher; |
128 | if (is_ret) | ||
129 | tu->consumer.ret_handler = uretprobe_dispatcher; | ||
109 | init_trace_uprobe_filter(&tu->filter); | 130 | init_trace_uprobe_filter(&tu->filter); |
110 | return tu; | 131 | return tu; |
111 | 132 | ||
@@ -180,7 +201,7 @@ end: | |||
180 | 201 | ||
181 | /* | 202 | /* |
182 | * Argument syntax: | 203 | * Argument syntax: |
183 | * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] | 204 | * - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS] |
184 | * | 205 | * |
185 | * - Remove uprobe: -:[GRP/]EVENT | 206 | * - Remove uprobe: -:[GRP/]EVENT |
186 | */ | 207 | */ |
@@ -192,20 +213,23 @@ static int create_trace_uprobe(int argc, char **argv) | |||
192 | char buf[MAX_EVENT_NAME_LEN]; | 213 | char buf[MAX_EVENT_NAME_LEN]; |
193 | struct path path; | 214 | struct path path; |
194 | unsigned long offset; | 215 | unsigned long offset; |
195 | bool is_delete; | 216 | bool is_delete, is_return; |
196 | int i, ret; | 217 | int i, ret; |
197 | 218 | ||
198 | inode = NULL; | 219 | inode = NULL; |
199 | ret = 0; | 220 | ret = 0; |
200 | is_delete = false; | 221 | is_delete = false; |
222 | is_return = false; | ||
201 | event = NULL; | 223 | event = NULL; |
202 | group = NULL; | 224 | group = NULL; |
203 | 225 | ||
204 | /* argc must be >= 1 */ | 226 | /* argc must be >= 1 */ |
205 | if (argv[0][0] == '-') | 227 | if (argv[0][0] == '-') |
206 | is_delete = true; | 228 | is_delete = true; |
229 | else if (argv[0][0] == 'r') | ||
230 | is_return = true; | ||
207 | else if (argv[0][0] != 'p') { | 231 | else if (argv[0][0] != 'p') { |
208 | pr_info("Probe definition must be started with 'p' or '-'.\n"); | 232 | pr_info("Probe definition must be started with 'p', 'r' or '-'.\n"); |
209 | return -EINVAL; | 233 | return -EINVAL; |
210 | } | 234 | } |
211 | 235 | ||
@@ -303,7 +327,7 @@ static int create_trace_uprobe(int argc, char **argv) | |||
303 | kfree(tail); | 327 | kfree(tail); |
304 | } | 328 | } |
305 | 329 | ||
306 | tu = alloc_trace_uprobe(group, event, argc); | 330 | tu = alloc_trace_uprobe(group, event, argc, is_return); |
307 | if (IS_ERR(tu)) { | 331 | if (IS_ERR(tu)) { |
308 | pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); | 332 | pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); |
309 | ret = PTR_ERR(tu); | 333 | ret = PTR_ERR(tu); |
@@ -414,9 +438,10 @@ static void probes_seq_stop(struct seq_file *m, void *v) | |||
414 | static int probes_seq_show(struct seq_file *m, void *v) | 438 | static int probes_seq_show(struct seq_file *m, void *v) |
415 | { | 439 | { |
416 | struct trace_uprobe *tu = v; | 440 | struct trace_uprobe *tu = v; |
441 | char c = is_ret_probe(tu) ? 'r' : 'p'; | ||
417 | int i; | 442 | int i; |
418 | 443 | ||
419 | seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name); | 444 | seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name); |
420 | seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); | 445 | seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); |
421 | 446 | ||
422 | for (i = 0; i < tu->nr_args; i++) | 447 | for (i = 0; i < tu->nr_args; i++) |
@@ -485,65 +510,81 @@ static const struct file_operations uprobe_profile_ops = { | |||
485 | .release = seq_release, | 510 | .release = seq_release, |
486 | }; | 511 | }; |
487 | 512 | ||
488 | /* uprobe handler */ | 513 | static void uprobe_trace_print(struct trace_uprobe *tu, |
489 | static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) | 514 | unsigned long func, struct pt_regs *regs) |
490 | { | 515 | { |
491 | struct uprobe_trace_entry_head *entry; | 516 | struct uprobe_trace_entry_head *entry; |
492 | struct ring_buffer_event *event; | 517 | struct ring_buffer_event *event; |
493 | struct ring_buffer *buffer; | 518 | struct ring_buffer *buffer; |
494 | u8 *data; | 519 | void *data; |
495 | int size, i, pc; | 520 | int size, i; |
496 | unsigned long irq_flags; | ||
497 | struct ftrace_event_call *call = &tu->call; | 521 | struct ftrace_event_call *call = &tu->call; |
498 | 522 | ||
499 | local_save_flags(irq_flags); | 523 | size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
500 | pc = preempt_count(); | ||
501 | |||
502 | size = sizeof(*entry) + tu->size; | ||
503 | |||
504 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, | 524 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, |
505 | size, irq_flags, pc); | 525 | size + tu->size, 0, 0); |
506 | if (!event) | 526 | if (!event) |
507 | return 0; | 527 | return; |
508 | 528 | ||
509 | entry = ring_buffer_event_data(event); | 529 | entry = ring_buffer_event_data(event); |
510 | entry->ip = instruction_pointer(task_pt_regs(current)); | 530 | if (is_ret_probe(tu)) { |
511 | data = (u8 *)&entry[1]; | 531 | entry->vaddr[0] = func; |
532 | entry->vaddr[1] = instruction_pointer(regs); | ||
533 | data = DATAOF_TRACE_ENTRY(entry, true); | ||
534 | } else { | ||
535 | entry->vaddr[0] = instruction_pointer(regs); | ||
536 | data = DATAOF_TRACE_ENTRY(entry, false); | ||
537 | } | ||
538 | |||
512 | for (i = 0; i < tu->nr_args; i++) | 539 | for (i = 0; i < tu->nr_args; i++) |
513 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | 540 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); |
514 | 541 | ||
515 | if (!filter_current_check_discard(buffer, call, entry, event)) | 542 | if (!filter_current_check_discard(buffer, call, entry, event)) |
516 | trace_buffer_unlock_commit(buffer, event, irq_flags, pc); | 543 | trace_buffer_unlock_commit(buffer, event, 0, 0); |
544 | } | ||
517 | 545 | ||
546 | /* uprobe handler */ | ||
547 | static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) | ||
548 | { | ||
549 | if (!is_ret_probe(tu)) | ||
550 | uprobe_trace_print(tu, 0, regs); | ||
518 | return 0; | 551 | return 0; |
519 | } | 552 | } |
520 | 553 | ||
554 | static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, | ||
555 | struct pt_regs *regs) | ||
556 | { | ||
557 | uprobe_trace_print(tu, func, regs); | ||
558 | } | ||
559 | |||
521 | /* Event entry printers */ | 560 | /* Event entry printers */ |
522 | static enum print_line_t | 561 | static enum print_line_t |
523 | print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) | 562 | print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) |
524 | { | 563 | { |
525 | struct uprobe_trace_entry_head *field; | 564 | struct uprobe_trace_entry_head *entry; |
526 | struct trace_seq *s = &iter->seq; | 565 | struct trace_seq *s = &iter->seq; |
527 | struct trace_uprobe *tu; | 566 | struct trace_uprobe *tu; |
528 | u8 *data; | 567 | u8 *data; |
529 | int i; | 568 | int i; |
530 | 569 | ||
531 | field = (struct uprobe_trace_entry_head *)iter->ent; | 570 | entry = (struct uprobe_trace_entry_head *)iter->ent; |
532 | tu = container_of(event, struct trace_uprobe, call.event); | 571 | tu = container_of(event, struct trace_uprobe, call.event); |
533 | 572 | ||
534 | if (!trace_seq_printf(s, "%s: (", tu->call.name)) | 573 | if (is_ret_probe(tu)) { |
535 | goto partial; | 574 | if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name, |
536 | 575 | entry->vaddr[1], entry->vaddr[0])) | |
537 | if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) | 576 | goto partial; |
538 | goto partial; | 577 | data = DATAOF_TRACE_ENTRY(entry, true); |
539 | 578 | } else { | |
540 | if (!trace_seq_puts(s, ")")) | 579 | if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, |
541 | goto partial; | 580 | entry->vaddr[0])) |
581 | goto partial; | ||
582 | data = DATAOF_TRACE_ENTRY(entry, false); | ||
583 | } | ||
542 | 584 | ||
543 | data = (u8 *)&field[1]; | ||
544 | for (i = 0; i < tu->nr_args; i++) { | 585 | for (i = 0; i < tu->nr_args; i++) { |
545 | if (!tu->args[i].type->print(s, tu->args[i].name, | 586 | if (!tu->args[i].type->print(s, tu->args[i].name, |
546 | data + tu->args[i].offset, field)) | 587 | data + tu->args[i].offset, entry)) |
547 | goto partial; | 588 | goto partial; |
548 | } | 589 | } |
549 | 590 | ||
@@ -595,16 +636,23 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag) | |||
595 | 636 | ||
596 | static int uprobe_event_define_fields(struct ftrace_event_call *event_call) | 637 | static int uprobe_event_define_fields(struct ftrace_event_call *event_call) |
597 | { | 638 | { |
598 | int ret, i; | 639 | int ret, i, size; |
599 | struct uprobe_trace_entry_head field; | 640 | struct uprobe_trace_entry_head field; |
600 | struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data; | 641 | struct trace_uprobe *tu = event_call->data; |
601 | 642 | ||
602 | DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); | 643 | if (is_ret_probe(tu)) { |
644 | DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); | ||
645 | DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0); | ||
646 | size = SIZEOF_TRACE_ENTRY(true); | ||
647 | } else { | ||
648 | DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); | ||
649 | size = SIZEOF_TRACE_ENTRY(false); | ||
650 | } | ||
603 | /* Set argument names as fields */ | 651 | /* Set argument names as fields */ |
604 | for (i = 0; i < tu->nr_args; i++) { | 652 | for (i = 0; i < tu->nr_args; i++) { |
605 | ret = trace_define_field(event_call, tu->args[i].type->fmttype, | 653 | ret = trace_define_field(event_call, tu->args[i].type->fmttype, |
606 | tu->args[i].name, | 654 | tu->args[i].name, |
607 | sizeof(field) + tu->args[i].offset, | 655 | size + tu->args[i].offset, |
608 | tu->args[i].type->size, | 656 | tu->args[i].type->size, |
609 | tu->args[i].type->is_signed, | 657 | tu->args[i].type->is_signed, |
610 | FILTER_OTHER); | 658 | FILTER_OTHER); |
@@ -622,8 +670,13 @@ static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len) | |||
622 | int i; | 670 | int i; |
623 | int pos = 0; | 671 | int pos = 0; |
624 | 672 | ||
625 | fmt = "(%lx)"; | 673 | if (is_ret_probe(tu)) { |
626 | arg = "REC->" FIELD_STRING_IP; | 674 | fmt = "(%lx <- %lx)"; |
675 | arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; | ||
676 | } else { | ||
677 | fmt = "(%lx)"; | ||
678 | arg = "REC->" FIELD_STRING_IP; | ||
679 | } | ||
627 | 680 | ||
628 | /* When len=0, we just calculate the needed length */ | 681 | /* When len=0, we just calculate the needed length */ |
629 | 682 | ||
@@ -752,49 +805,68 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, | |||
752 | return ret; | 805 | return ret; |
753 | } | 806 | } |
754 | 807 | ||
755 | /* uprobe profile handler */ | 808 | static void uprobe_perf_print(struct trace_uprobe *tu, |
756 | static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | 809 | unsigned long func, struct pt_regs *regs) |
757 | { | 810 | { |
758 | struct ftrace_event_call *call = &tu->call; | 811 | struct ftrace_event_call *call = &tu->call; |
759 | struct uprobe_trace_entry_head *entry; | 812 | struct uprobe_trace_entry_head *entry; |
760 | struct hlist_head *head; | 813 | struct hlist_head *head; |
761 | u8 *data; | 814 | void *data; |
762 | int size, __size, i; | 815 | int size, rctx, i; |
763 | int rctx; | ||
764 | 816 | ||
765 | if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) | 817 | size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
766 | return UPROBE_HANDLER_REMOVE; | 818 | size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); |
767 | |||
768 | __size = sizeof(*entry) + tu->size; | ||
769 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | ||
770 | size -= sizeof(u32); | ||
771 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) | 819 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) |
772 | return 0; | 820 | return; |
773 | 821 | ||
774 | preempt_disable(); | 822 | preempt_disable(); |
823 | head = this_cpu_ptr(call->perf_events); | ||
824 | if (hlist_empty(head)) | ||
825 | goto out; | ||
775 | 826 | ||
776 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 827 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); |
777 | if (!entry) | 828 | if (!entry) |
778 | goto out; | 829 | goto out; |
779 | 830 | ||
780 | entry->ip = instruction_pointer(task_pt_regs(current)); | 831 | if (is_ret_probe(tu)) { |
781 | data = (u8 *)&entry[1]; | 832 | entry->vaddr[0] = func; |
833 | entry->vaddr[1] = instruction_pointer(regs); | ||
834 | data = DATAOF_TRACE_ENTRY(entry, true); | ||
835 | } else { | ||
836 | entry->vaddr[0] = instruction_pointer(regs); | ||
837 | data = DATAOF_TRACE_ENTRY(entry, false); | ||
838 | } | ||
839 | |||
782 | for (i = 0; i < tu->nr_args; i++) | 840 | for (i = 0; i < tu->nr_args; i++) |
783 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | 841 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); |
784 | 842 | ||
785 | head = this_cpu_ptr(call->perf_events); | 843 | perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); |
786 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL); | ||
787 | |||
788 | out: | 844 | out: |
789 | preempt_enable(); | 845 | preempt_enable(); |
846 | } | ||
847 | |||
848 | /* uprobe profile handler */ | ||
849 | static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | ||
850 | { | ||
851 | if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) | ||
852 | return UPROBE_HANDLER_REMOVE; | ||
853 | |||
854 | if (!is_ret_probe(tu)) | ||
855 | uprobe_perf_print(tu, 0, regs); | ||
790 | return 0; | 856 | return 0; |
791 | } | 857 | } |
858 | |||
859 | static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, | ||
860 | struct pt_regs *regs) | ||
861 | { | ||
862 | uprobe_perf_print(tu, func, regs); | ||
863 | } | ||
792 | #endif /* CONFIG_PERF_EVENTS */ | 864 | #endif /* CONFIG_PERF_EVENTS */ |
793 | 865 | ||
794 | static | 866 | static |
795 | int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) | 867 | int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) |
796 | { | 868 | { |
797 | struct trace_uprobe *tu = (struct trace_uprobe *)event->data; | 869 | struct trace_uprobe *tu = event->data; |
798 | 870 | ||
799 | switch (type) { | 871 | switch (type) { |
800 | case TRACE_REG_REGISTER: | 872 | case TRACE_REG_REGISTER: |
@@ -843,6 +915,23 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) | |||
843 | return ret; | 915 | return ret; |
844 | } | 916 | } |
845 | 917 | ||
918 | static int uretprobe_dispatcher(struct uprobe_consumer *con, | ||
919 | unsigned long func, struct pt_regs *regs) | ||
920 | { | ||
921 | struct trace_uprobe *tu; | ||
922 | |||
923 | tu = container_of(con, struct trace_uprobe, consumer); | ||
924 | |||
925 | if (tu->flags & TP_FLAG_TRACE) | ||
926 | uretprobe_trace_func(tu, func, regs); | ||
927 | |||
928 | #ifdef CONFIG_PERF_EVENTS | ||
929 | if (tu->flags & TP_FLAG_PROFILE) | ||
930 | uretprobe_perf_func(tu, func, regs); | ||
931 | #endif | ||
932 | return 0; | ||
933 | } | ||
934 | |||
846 | static struct trace_event_functions uprobe_funcs = { | 935 | static struct trace_event_functions uprobe_funcs = { |
847 | .trace = print_uprobe_event | 936 | .trace = print_uprobe_event |
848 | }; | 937 | }; |