diff options
author | Srikar Dronamraju <srikar@linux.vnet.ibm.com> | 2012-04-11 06:30:43 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-05-07 08:30:17 -0400 |
commit | f3f096cfedf8113380c56fc855275cc75cd8cf55 (patch) | |
tree | b8d0553afc8cebf6dd320d094206e93df5d95794 /kernel/trace | |
parent | 8ab83f56475ec9151645a888dfe1941f4a92091d (diff) |
tracing: Provide trace events interface for uprobes
Implements trace_event support for uprobes. In its current form
it can be used to put probes at a specified offset in a file and
dump the required registers when the code flow reaches the
probed address.
The following example shows how to dump the instruction pointer
and %ax a register at the probed text address. Here we are
trying to probe zfree in /bin/zsh:
# cd /sys/kernel/debug/tracing/
# cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
# objdump -T /bin/zsh | grep -w zfree
0000000000446420 g DF .text 0000000000000012 Base
zfree # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events
# cat uprobe_events
p:uprobes/p_zsh_0x46420 /bin/zsh:0x0000000000046420
# echo 1 > events/uprobes/enable
# sleep 20
# echo 0 > events/uprobes/enable
# cat trace
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@linux.vnet.ibm.com>
Cc: Linux-mm <linux-mm@kvack.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120411103043.GB29437@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 16 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/trace.h | 5 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 3 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 788 |
7 files changed, 823 insertions, 6 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ce5a5c54ac8f..ea4bff6295fc 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -386,6 +386,22 @@ config KPROBE_EVENT | |||
386 | This option is also required by perf-probe subcommand of perf tools. | 386 | This option is also required by perf-probe subcommand of perf tools. |
387 | If you want to use perf tools, this option is strongly recommended. | 387 | If you want to use perf tools, this option is strongly recommended. |
388 | 388 | ||
389 | config UPROBE_EVENT | ||
390 | bool "Enable uprobes-based dynamic events" | ||
391 | depends on ARCH_SUPPORTS_UPROBES | ||
392 | depends on MMU | ||
393 | select UPROBES | ||
394 | select PROBE_EVENTS | ||
395 | select TRACING | ||
396 | default n | ||
397 | help | ||
398 | This allows the user to add tracing events on top of userspace | ||
399 | dynamic events (similar to tracepoints) on the fly via the trace | ||
400 | events interface. Those events can be inserted wherever uprobes | ||
401 | can probe, and record various registers. | ||
402 | This option is required if you plan to use perf-probe subcommand | ||
403 | of perf tools on user space applications. | ||
404 | |||
389 | config PROBE_EVENTS | 405 | config PROBE_EVENTS |
390 | def_bool n | 406 | def_bool n |
391 | 407 | ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index fa10d5ca5ab1..1734c03e048b 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -62,5 +62,6 @@ ifeq ($(CONFIG_TRACING),y) | |||
62 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o | 62 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o |
63 | endif | 63 | endif |
64 | obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o | 64 | obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o |
65 | obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o | ||
65 | 66 | ||
66 | libftrace-y := ftrace.o | 67 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 95059f091a24..1bcdbec95a11 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -103,6 +103,11 @@ struct kretprobe_trace_entry_head { | |||
103 | unsigned long ret_ip; | 103 | unsigned long ret_ip; |
104 | }; | 104 | }; |
105 | 105 | ||
106 | struct uprobe_trace_entry_head { | ||
107 | struct trace_entry ent; | ||
108 | unsigned long ip; | ||
109 | }; | ||
110 | |||
106 | /* | 111 | /* |
107 | * trace_flag_type is an enumeration that holds different | 112 | * trace_flag_type is an enumeration that holds different |
108 | * states when a trace occurs. These are: | 113 | * states when a trace occurs. These are: |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f8b777367d8e..b31d3d5699fe 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -525,7 +525,7 @@ static int create_trace_probe(int argc, char **argv) | |||
525 | 525 | ||
526 | /* Parse fetch argument */ | 526 | /* Parse fetch argument */ |
527 | ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], | 527 | ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], |
528 | is_return); | 528 | is_return, true); |
529 | if (ret) { | 529 | if (ret) { |
530 | pr_info("Parse error at argument[%d]. (%d)\n", i, ret); | 530 | pr_info("Parse error at argument[%d]. (%d)\n", i, ret); |
531 | goto error; | 531 | goto error; |
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8e526b9286e9..daa9980153af 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c | |||
@@ -550,7 +550,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, | |||
550 | 550 | ||
551 | /* Recursive argument parser */ | 551 | /* Recursive argument parser */ |
552 | static int parse_probe_arg(char *arg, const struct fetch_type *t, | 552 | static int parse_probe_arg(char *arg, const struct fetch_type *t, |
553 | struct fetch_param *f, bool is_return) | 553 | struct fetch_param *f, bool is_return, bool is_kprobe) |
554 | { | 554 | { |
555 | unsigned long param; | 555 | unsigned long param; |
556 | long offset; | 556 | long offset; |
@@ -558,6 +558,11 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, | |||
558 | int ret; | 558 | int ret; |
559 | 559 | ||
560 | ret = 0; | 560 | ret = 0; |
561 | |||
562 | /* Until uprobe_events supports only reg arguments */ | ||
563 | if (!is_kprobe && arg[0] != '%') | ||
564 | return -EINVAL; | ||
565 | |||
561 | switch (arg[0]) { | 566 | switch (arg[0]) { |
562 | case '$': | 567 | case '$': |
563 | ret = parse_probe_vars(arg + 1, t, f, is_return); | 568 | ret = parse_probe_vars(arg + 1, t, f, is_return); |
@@ -619,7 +624,8 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, | |||
619 | return -ENOMEM; | 624 | return -ENOMEM; |
620 | 625 | ||
621 | dprm->offset = offset; | 626 | dprm->offset = offset; |
622 | ret = parse_probe_arg(arg, t2, &dprm->orig, is_return); | 627 | ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, |
628 | is_kprobe); | ||
623 | if (ret) | 629 | if (ret) |
624 | kfree(dprm); | 630 | kfree(dprm); |
625 | else { | 631 | else { |
@@ -677,7 +683,7 @@ static int __parse_bitfield_probe_arg(const char *bf, | |||
677 | 683 | ||
678 | /* String length checking wrapper */ | 684 | /* String length checking wrapper */ |
679 | int traceprobe_parse_probe_arg(char *arg, ssize_t *size, | 685 | int traceprobe_parse_probe_arg(char *arg, ssize_t *size, |
680 | struct probe_arg *parg, bool is_return) | 686 | struct probe_arg *parg, bool is_return, bool is_kprobe) |
681 | { | 687 | { |
682 | const char *t; | 688 | const char *t; |
683 | int ret; | 689 | int ret; |
@@ -703,7 +709,7 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, | |||
703 | } | 709 | } |
704 | parg->offset = *size; | 710 | parg->offset = *size; |
705 | *size += parg->type->size; | 711 | *size += parg->type->size; |
706 | ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return); | 712 | ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe); |
707 | 713 | ||
708 | if (ret >= 0 && t != NULL) | 714 | if (ret >= 0 && t != NULL) |
709 | ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); | 715 | ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); |
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2df9a18e0252..933708677814 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h | |||
@@ -66,6 +66,7 @@ | |||
66 | #define TP_FLAG_TRACE 1 | 66 | #define TP_FLAG_TRACE 1 |
67 | #define TP_FLAG_PROFILE 2 | 67 | #define TP_FLAG_PROFILE 2 |
68 | #define TP_FLAG_REGISTERED 4 | 68 | #define TP_FLAG_REGISTERED 4 |
69 | #define TP_FLAG_UPROBE 8 | ||
69 | 70 | ||
70 | 71 | ||
71 | /* data_rloc: data relative location, compatible with u32 */ | 72 | /* data_rloc: data relative location, compatible with u32 */ |
@@ -143,7 +144,7 @@ static inline int is_good_name(const char *name) | |||
143 | } | 144 | } |
144 | 145 | ||
145 | extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, | 146 | extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, |
146 | struct probe_arg *parg, bool is_return); | 147 | struct probe_arg *parg, bool is_return, bool is_kprobe); |
147 | 148 | ||
148 | extern int traceprobe_conflict_field_name(const char *name, | 149 | extern int traceprobe_conflict_field_name(const char *name, |
149 | struct probe_arg *args, int narg); | 150 | struct probe_arg *args, int narg); |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c new file mode 100644 index 000000000000..2b36ac68549e --- /dev/null +++ b/kernel/trace/trace_uprobe.c | |||
@@ -0,0 +1,788 @@ | |||
1 | /* | ||
2 | * uprobes-based tracing events | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write to the Free Software | ||
15 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
16 | * | ||
17 | * Copyright (C) IBM Corporation, 2010-2012 | ||
18 | * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> | ||
19 | */ | ||
20 | |||
21 | #include <linux/module.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/uprobes.h> | ||
24 | #include <linux/namei.h> | ||
25 | |||
26 | #include "trace_probe.h" | ||
27 | |||
28 | #define UPROBE_EVENT_SYSTEM "uprobes" | ||
29 | |||
30 | /* | ||
31 | * uprobe event core functions | ||
32 | */ | ||
33 | struct trace_uprobe; | ||
34 | struct uprobe_trace_consumer { | ||
35 | struct uprobe_consumer cons; | ||
36 | struct trace_uprobe *tu; | ||
37 | }; | ||
38 | |||
39 | struct trace_uprobe { | ||
40 | struct list_head list; | ||
41 | struct ftrace_event_class class; | ||
42 | struct ftrace_event_call call; | ||
43 | struct uprobe_trace_consumer *consumer; | ||
44 | struct inode *inode; | ||
45 | char *filename; | ||
46 | unsigned long offset; | ||
47 | unsigned long nhit; | ||
48 | unsigned int flags; /* For TP_FLAG_* */ | ||
49 | ssize_t size; /* trace entry size */ | ||
50 | unsigned int nr_args; | ||
51 | struct probe_arg args[]; | ||
52 | }; | ||
53 | |||
54 | #define SIZEOF_TRACE_UPROBE(n) \ | ||
55 | (offsetof(struct trace_uprobe, args) + \ | ||
56 | (sizeof(struct probe_arg) * (n))) | ||
57 | |||
58 | static int register_uprobe_event(struct trace_uprobe *tu); | ||
59 | static void unregister_uprobe_event(struct trace_uprobe *tu); | ||
60 | |||
61 | static DEFINE_MUTEX(uprobe_lock); | ||
62 | static LIST_HEAD(uprobe_list); | ||
63 | |||
64 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); | ||
65 | |||
66 | /* | ||
67 | * Allocate new trace_uprobe and initialize it (including uprobes). | ||
68 | */ | ||
69 | static struct trace_uprobe * | ||
70 | alloc_trace_uprobe(const char *group, const char *event, int nargs) | ||
71 | { | ||
72 | struct trace_uprobe *tu; | ||
73 | |||
74 | if (!event || !is_good_name(event)) | ||
75 | return ERR_PTR(-EINVAL); | ||
76 | |||
77 | if (!group || !is_good_name(group)) | ||
78 | return ERR_PTR(-EINVAL); | ||
79 | |||
80 | tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL); | ||
81 | if (!tu) | ||
82 | return ERR_PTR(-ENOMEM); | ||
83 | |||
84 | tu->call.class = &tu->class; | ||
85 | tu->call.name = kstrdup(event, GFP_KERNEL); | ||
86 | if (!tu->call.name) | ||
87 | goto error; | ||
88 | |||
89 | tu->class.system = kstrdup(group, GFP_KERNEL); | ||
90 | if (!tu->class.system) | ||
91 | goto error; | ||
92 | |||
93 | INIT_LIST_HEAD(&tu->list); | ||
94 | return tu; | ||
95 | |||
96 | error: | ||
97 | kfree(tu->call.name); | ||
98 | kfree(tu); | ||
99 | |||
100 | return ERR_PTR(-ENOMEM); | ||
101 | } | ||
102 | |||
103 | static void free_trace_uprobe(struct trace_uprobe *tu) | ||
104 | { | ||
105 | int i; | ||
106 | |||
107 | for (i = 0; i < tu->nr_args; i++) | ||
108 | traceprobe_free_probe_arg(&tu->args[i]); | ||
109 | |||
110 | iput(tu->inode); | ||
111 | kfree(tu->call.class->system); | ||
112 | kfree(tu->call.name); | ||
113 | kfree(tu->filename); | ||
114 | kfree(tu); | ||
115 | } | ||
116 | |||
117 | static struct trace_uprobe *find_probe_event(const char *event, const char *group) | ||
118 | { | ||
119 | struct trace_uprobe *tu; | ||
120 | |||
121 | list_for_each_entry(tu, &uprobe_list, list) | ||
122 | if (strcmp(tu->call.name, event) == 0 && | ||
123 | strcmp(tu->call.class->system, group) == 0) | ||
124 | return tu; | ||
125 | |||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */ | ||
130 | static void unregister_trace_uprobe(struct trace_uprobe *tu) | ||
131 | { | ||
132 | list_del(&tu->list); | ||
133 | unregister_uprobe_event(tu); | ||
134 | free_trace_uprobe(tu); | ||
135 | } | ||
136 | |||
137 | /* Register a trace_uprobe and probe_event */ | ||
138 | static int register_trace_uprobe(struct trace_uprobe *tu) | ||
139 | { | ||
140 | struct trace_uprobe *old_tp; | ||
141 | int ret; | ||
142 | |||
143 | mutex_lock(&uprobe_lock); | ||
144 | |||
145 | /* register as an event */ | ||
146 | old_tp = find_probe_event(tu->call.name, tu->call.class->system); | ||
147 | if (old_tp) | ||
148 | /* delete old event */ | ||
149 | unregister_trace_uprobe(old_tp); | ||
150 | |||
151 | ret = register_uprobe_event(tu); | ||
152 | if (ret) { | ||
153 | pr_warning("Failed to register probe event(%d)\n", ret); | ||
154 | goto end; | ||
155 | } | ||
156 | |||
157 | list_add_tail(&tu->list, &uprobe_list); | ||
158 | |||
159 | end: | ||
160 | mutex_unlock(&uprobe_lock); | ||
161 | |||
162 | return ret; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Argument syntax: | ||
167 | * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] | ||
168 | * | ||
169 | * - Remove uprobe: -:[GRP/]EVENT | ||
170 | */ | ||
171 | static int create_trace_uprobe(int argc, char **argv) | ||
172 | { | ||
173 | struct trace_uprobe *tu; | ||
174 | struct inode *inode; | ||
175 | char *arg, *event, *group, *filename; | ||
176 | char buf[MAX_EVENT_NAME_LEN]; | ||
177 | struct path path; | ||
178 | unsigned long offset; | ||
179 | bool is_delete; | ||
180 | int i, ret; | ||
181 | |||
182 | inode = NULL; | ||
183 | ret = 0; | ||
184 | is_delete = false; | ||
185 | event = NULL; | ||
186 | group = NULL; | ||
187 | |||
188 | /* argc must be >= 1 */ | ||
189 | if (argv[0][0] == '-') | ||
190 | is_delete = true; | ||
191 | else if (argv[0][0] != 'p') { | ||
192 | pr_info("Probe definition must be started with 'p', 'r' or" " '-'.\n"); | ||
193 | return -EINVAL; | ||
194 | } | ||
195 | |||
196 | if (argv[0][1] == ':') { | ||
197 | event = &argv[0][2]; | ||
198 | arg = strchr(event, '/'); | ||
199 | |||
200 | if (arg) { | ||
201 | group = event; | ||
202 | event = arg + 1; | ||
203 | event[-1] = '\0'; | ||
204 | |||
205 | if (strlen(group) == 0) { | ||
206 | pr_info("Group name is not specified\n"); | ||
207 | return -EINVAL; | ||
208 | } | ||
209 | } | ||
210 | if (strlen(event) == 0) { | ||
211 | pr_info("Event name is not specified\n"); | ||
212 | return -EINVAL; | ||
213 | } | ||
214 | } | ||
215 | if (!group) | ||
216 | group = UPROBE_EVENT_SYSTEM; | ||
217 | |||
218 | if (is_delete) { | ||
219 | if (!event) { | ||
220 | pr_info("Delete command needs an event name.\n"); | ||
221 | return -EINVAL; | ||
222 | } | ||
223 | mutex_lock(&uprobe_lock); | ||
224 | tu = find_probe_event(event, group); | ||
225 | |||
226 | if (!tu) { | ||
227 | mutex_unlock(&uprobe_lock); | ||
228 | pr_info("Event %s/%s doesn't exist.\n", group, event); | ||
229 | return -ENOENT; | ||
230 | } | ||
231 | /* delete an event */ | ||
232 | unregister_trace_uprobe(tu); | ||
233 | mutex_unlock(&uprobe_lock); | ||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | if (argc < 2) { | ||
238 | pr_info("Probe point is not specified.\n"); | ||
239 | return -EINVAL; | ||
240 | } | ||
241 | if (isdigit(argv[1][0])) { | ||
242 | pr_info("probe point must be have a filename.\n"); | ||
243 | return -EINVAL; | ||
244 | } | ||
245 | arg = strchr(argv[1], ':'); | ||
246 | if (!arg) | ||
247 | goto fail_address_parse; | ||
248 | |||
249 | *arg++ = '\0'; | ||
250 | filename = argv[1]; | ||
251 | ret = kern_path(filename, LOOKUP_FOLLOW, &path); | ||
252 | if (ret) | ||
253 | goto fail_address_parse; | ||
254 | |||
255 | ret = strict_strtoul(arg, 0, &offset); | ||
256 | if (ret) | ||
257 | goto fail_address_parse; | ||
258 | |||
259 | inode = igrab(path.dentry->d_inode); | ||
260 | |||
261 | argc -= 2; | ||
262 | argv += 2; | ||
263 | |||
264 | /* setup a probe */ | ||
265 | if (!event) { | ||
266 | char *tail = strrchr(filename, '/'); | ||
267 | char *ptr; | ||
268 | |||
269 | ptr = kstrdup((tail ? tail + 1 : filename), GFP_KERNEL); | ||
270 | if (!ptr) { | ||
271 | ret = -ENOMEM; | ||
272 | goto fail_address_parse; | ||
273 | } | ||
274 | |||
275 | tail = ptr; | ||
276 | ptr = strpbrk(tail, ".-_"); | ||
277 | if (ptr) | ||
278 | *ptr = '\0'; | ||
279 | |||
280 | snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset); | ||
281 | event = buf; | ||
282 | kfree(tail); | ||
283 | } | ||
284 | |||
285 | tu = alloc_trace_uprobe(group, event, argc); | ||
286 | if (IS_ERR(tu)) { | ||
287 | pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); | ||
288 | ret = PTR_ERR(tu); | ||
289 | goto fail_address_parse; | ||
290 | } | ||
291 | tu->offset = offset; | ||
292 | tu->inode = inode; | ||
293 | tu->filename = kstrdup(filename, GFP_KERNEL); | ||
294 | |||
295 | if (!tu->filename) { | ||
296 | pr_info("Failed to allocate filename.\n"); | ||
297 | ret = -ENOMEM; | ||
298 | goto error; | ||
299 | } | ||
300 | |||
301 | /* parse arguments */ | ||
302 | ret = 0; | ||
303 | for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { | ||
304 | /* Increment count for freeing args in error case */ | ||
305 | tu->nr_args++; | ||
306 | |||
307 | /* Parse argument name */ | ||
308 | arg = strchr(argv[i], '='); | ||
309 | if (arg) { | ||
310 | *arg++ = '\0'; | ||
311 | tu->args[i].name = kstrdup(argv[i], GFP_KERNEL); | ||
312 | } else { | ||
313 | arg = argv[i]; | ||
314 | /* If argument name is omitted, set "argN" */ | ||
315 | snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); | ||
316 | tu->args[i].name = kstrdup(buf, GFP_KERNEL); | ||
317 | } | ||
318 | |||
319 | if (!tu->args[i].name) { | ||
320 | pr_info("Failed to allocate argument[%d] name.\n", i); | ||
321 | ret = -ENOMEM; | ||
322 | goto error; | ||
323 | } | ||
324 | |||
325 | if (!is_good_name(tu->args[i].name)) { | ||
326 | pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name); | ||
327 | ret = -EINVAL; | ||
328 | goto error; | ||
329 | } | ||
330 | |||
331 | if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) { | ||
332 | pr_info("Argument[%d] name '%s' conflicts with " | ||
333 | "another field.\n", i, argv[i]); | ||
334 | ret = -EINVAL; | ||
335 | goto error; | ||
336 | } | ||
337 | |||
338 | /* Parse fetch argument */ | ||
339 | ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false); | ||
340 | if (ret) { | ||
341 | pr_info("Parse error at argument[%d]. (%d)\n", i, ret); | ||
342 | goto error; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | ret = register_trace_uprobe(tu); | ||
347 | if (ret) | ||
348 | goto error; | ||
349 | return 0; | ||
350 | |||
351 | error: | ||
352 | free_trace_uprobe(tu); | ||
353 | return ret; | ||
354 | |||
355 | fail_address_parse: | ||
356 | if (inode) | ||
357 | iput(inode); | ||
358 | |||
359 | pr_info("Failed to parse address.\n"); | ||
360 | |||
361 | return ret; | ||
362 | } | ||
363 | |||
364 | static void cleanup_all_probes(void) | ||
365 | { | ||
366 | struct trace_uprobe *tu; | ||
367 | |||
368 | mutex_lock(&uprobe_lock); | ||
369 | while (!list_empty(&uprobe_list)) { | ||
370 | tu = list_entry(uprobe_list.next, struct trace_uprobe, list); | ||
371 | unregister_trace_uprobe(tu); | ||
372 | } | ||
373 | mutex_unlock(&uprobe_lock); | ||
374 | } | ||
375 | |||
376 | /* Probes listing interfaces */ | ||
377 | static void *probes_seq_start(struct seq_file *m, loff_t *pos) | ||
378 | { | ||
379 | mutex_lock(&uprobe_lock); | ||
380 | return seq_list_start(&uprobe_list, *pos); | ||
381 | } | ||
382 | |||
383 | static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) | ||
384 | { | ||
385 | return seq_list_next(v, &uprobe_list, pos); | ||
386 | } | ||
387 | |||
388 | static void probes_seq_stop(struct seq_file *m, void *v) | ||
389 | { | ||
390 | mutex_unlock(&uprobe_lock); | ||
391 | } | ||
392 | |||
393 | static int probes_seq_show(struct seq_file *m, void *v) | ||
394 | { | ||
395 | struct trace_uprobe *tu = v; | ||
396 | int i; | ||
397 | |||
398 | seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name); | ||
399 | seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); | ||
400 | |||
401 | for (i = 0; i < tu->nr_args; i++) | ||
402 | seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm); | ||
403 | |||
404 | seq_printf(m, "\n"); | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | static const struct seq_operations probes_seq_op = { | ||
409 | .start = probes_seq_start, | ||
410 | .next = probes_seq_next, | ||
411 | .stop = probes_seq_stop, | ||
412 | .show = probes_seq_show | ||
413 | }; | ||
414 | |||
415 | static int probes_open(struct inode *inode, struct file *file) | ||
416 | { | ||
417 | if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) | ||
418 | cleanup_all_probes(); | ||
419 | |||
420 | return seq_open(file, &probes_seq_op); | ||
421 | } | ||
422 | |||
423 | static ssize_t probes_write(struct file *file, const char __user *buffer, | ||
424 | size_t count, loff_t *ppos) | ||
425 | { | ||
426 | return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe); | ||
427 | } | ||
428 | |||
429 | static const struct file_operations uprobe_events_ops = { | ||
430 | .owner = THIS_MODULE, | ||
431 | .open = probes_open, | ||
432 | .read = seq_read, | ||
433 | .llseek = seq_lseek, | ||
434 | .release = seq_release, | ||
435 | .write = probes_write, | ||
436 | }; | ||
437 | |||
438 | /* Probes profiling interfaces */ | ||
439 | static int probes_profile_seq_show(struct seq_file *m, void *v) | ||
440 | { | ||
441 | struct trace_uprobe *tu = v; | ||
442 | |||
443 | seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit); | ||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | static const struct seq_operations profile_seq_op = { | ||
448 | .start = probes_seq_start, | ||
449 | .next = probes_seq_next, | ||
450 | .stop = probes_seq_stop, | ||
451 | .show = probes_profile_seq_show | ||
452 | }; | ||
453 | |||
454 | static int profile_open(struct inode *inode, struct file *file) | ||
455 | { | ||
456 | return seq_open(file, &profile_seq_op); | ||
457 | } | ||
458 | |||
459 | static const struct file_operations uprobe_profile_ops = { | ||
460 | .owner = THIS_MODULE, | ||
461 | .open = profile_open, | ||
462 | .read = seq_read, | ||
463 | .llseek = seq_lseek, | ||
464 | .release = seq_release, | ||
465 | }; | ||
466 | |||
467 | /* uprobe handler */ | ||
468 | static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) | ||
469 | { | ||
470 | struct uprobe_trace_entry_head *entry; | ||
471 | struct ring_buffer_event *event; | ||
472 | struct ring_buffer *buffer; | ||
473 | u8 *data; | ||
474 | int size, i, pc; | ||
475 | unsigned long irq_flags; | ||
476 | struct ftrace_event_call *call = &tu->call; | ||
477 | |||
478 | tu->nhit++; | ||
479 | |||
480 | local_save_flags(irq_flags); | ||
481 | pc = preempt_count(); | ||
482 | |||
483 | size = sizeof(*entry) + tu->size; | ||
484 | |||
485 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, | ||
486 | size, irq_flags, pc); | ||
487 | if (!event) | ||
488 | return; | ||
489 | |||
490 | entry = ring_buffer_event_data(event); | ||
491 | entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); | ||
492 | data = (u8 *)&entry[1]; | ||
493 | for (i = 0; i < tu->nr_args; i++) | ||
494 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | ||
495 | |||
496 | if (!filter_current_check_discard(buffer, call, entry, event)) | ||
497 | trace_buffer_unlock_commit(buffer, event, irq_flags, pc); | ||
498 | } | ||
499 | |||
500 | /* Event entry printers */ | ||
501 | static enum print_line_t | ||
502 | print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) | ||
503 | { | ||
504 | struct uprobe_trace_entry_head *field; | ||
505 | struct trace_seq *s = &iter->seq; | ||
506 | struct trace_uprobe *tu; | ||
507 | u8 *data; | ||
508 | int i; | ||
509 | |||
510 | field = (struct uprobe_trace_entry_head *)iter->ent; | ||
511 | tu = container_of(event, struct trace_uprobe, call.event); | ||
512 | |||
513 | if (!trace_seq_printf(s, "%s: (", tu->call.name)) | ||
514 | goto partial; | ||
515 | |||
516 | if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) | ||
517 | goto partial; | ||
518 | |||
519 | if (!trace_seq_puts(s, ")")) | ||
520 | goto partial; | ||
521 | |||
522 | data = (u8 *)&field[1]; | ||
523 | for (i = 0; i < tu->nr_args; i++) { | ||
524 | if (!tu->args[i].type->print(s, tu->args[i].name, | ||
525 | data + tu->args[i].offset, field)) | ||
526 | goto partial; | ||
527 | } | ||
528 | |||
529 | if (trace_seq_puts(s, "\n")) | ||
530 | return TRACE_TYPE_HANDLED; | ||
531 | |||
532 | partial: | ||
533 | return TRACE_TYPE_PARTIAL_LINE; | ||
534 | } | ||
535 | |||
536 | static int probe_event_enable(struct trace_uprobe *tu, int flag) | ||
537 | { | ||
538 | struct uprobe_trace_consumer *utc; | ||
539 | int ret = 0; | ||
540 | |||
541 | if (!tu->inode || tu->consumer) | ||
542 | return -EINTR; | ||
543 | |||
544 | utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); | ||
545 | if (!utc) | ||
546 | return -EINTR; | ||
547 | |||
548 | utc->cons.handler = uprobe_dispatcher; | ||
549 | utc->cons.filter = NULL; | ||
550 | ret = uprobe_register(tu->inode, tu->offset, &utc->cons); | ||
551 | if (ret) { | ||
552 | kfree(utc); | ||
553 | return ret; | ||
554 | } | ||
555 | |||
556 | tu->flags |= flag; | ||
557 | utc->tu = tu; | ||
558 | tu->consumer = utc; | ||
559 | |||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | static void probe_event_disable(struct trace_uprobe *tu, int flag) | ||
564 | { | ||
565 | if (!tu->inode || !tu->consumer) | ||
566 | return; | ||
567 | |||
568 | uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); | ||
569 | tu->flags &= ~flag; | ||
570 | kfree(tu->consumer); | ||
571 | tu->consumer = NULL; | ||
572 | } | ||
573 | |||
574 | static int uprobe_event_define_fields(struct ftrace_event_call *event_call) | ||
575 | { | ||
576 | int ret, i; | ||
577 | struct uprobe_trace_entry_head field; | ||
578 | struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data; | ||
579 | |||
580 | DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); | ||
581 | /* Set argument names as fields */ | ||
582 | for (i = 0; i < tu->nr_args; i++) { | ||
583 | ret = trace_define_field(event_call, tu->args[i].type->fmttype, | ||
584 | tu->args[i].name, | ||
585 | sizeof(field) + tu->args[i].offset, | ||
586 | tu->args[i].type->size, | ||
587 | tu->args[i].type->is_signed, | ||
588 | FILTER_OTHER); | ||
589 | |||
590 | if (ret) | ||
591 | return ret; | ||
592 | } | ||
593 | return 0; | ||
594 | } | ||
595 | |||
596 | #define LEN_OR_ZERO (len ? len - pos : 0) | ||
597 | static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len) | ||
598 | { | ||
599 | const char *fmt, *arg; | ||
600 | int i; | ||
601 | int pos = 0; | ||
602 | |||
603 | fmt = "(%lx)"; | ||
604 | arg = "REC->" FIELD_STRING_IP; | ||
605 | |||
606 | /* When len=0, we just calculate the needed length */ | ||
607 | |||
608 | pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); | ||
609 | |||
610 | for (i = 0; i < tu->nr_args; i++) { | ||
611 | pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", | ||
612 | tu->args[i].name, tu->args[i].type->fmt); | ||
613 | } | ||
614 | |||
615 | pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); | ||
616 | |||
617 | for (i = 0; i < tu->nr_args; i++) { | ||
618 | pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", | ||
619 | tu->args[i].name); | ||
620 | } | ||
621 | |||
622 | return pos; /* return the length of print_fmt */ | ||
623 | } | ||
624 | #undef LEN_OR_ZERO | ||
625 | |||
626 | static int set_print_fmt(struct trace_uprobe *tu) | ||
627 | { | ||
628 | char *print_fmt; | ||
629 | int len; | ||
630 | |||
631 | /* First: called with 0 length to calculate the needed length */ | ||
632 | len = __set_print_fmt(tu, NULL, 0); | ||
633 | print_fmt = kmalloc(len + 1, GFP_KERNEL); | ||
634 | if (!print_fmt) | ||
635 | return -ENOMEM; | ||
636 | |||
637 | /* Second: actually write the @print_fmt */ | ||
638 | __set_print_fmt(tu, print_fmt, len + 1); | ||
639 | tu->call.print_fmt = print_fmt; | ||
640 | |||
641 | return 0; | ||
642 | } | ||
643 | |||
644 | #ifdef CONFIG_PERF_EVENTS | ||
645 | /* uprobe profile handler */ | ||
646 | static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | ||
647 | { | ||
648 | struct ftrace_event_call *call = &tu->call; | ||
649 | struct uprobe_trace_entry_head *entry; | ||
650 | struct hlist_head *head; | ||
651 | u8 *data; | ||
652 | int size, __size, i; | ||
653 | int rctx; | ||
654 | |||
655 | __size = sizeof(*entry) + tu->size; | ||
656 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | ||
657 | size -= sizeof(u32); | ||
658 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) | ||
659 | return; | ||
660 | |||
661 | preempt_disable(); | ||
662 | |||
663 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | ||
664 | if (!entry) | ||
665 | goto out; | ||
666 | |||
667 | entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); | ||
668 | data = (u8 *)&entry[1]; | ||
669 | for (i = 0; i < tu->nr_args; i++) | ||
670 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | ||
671 | |||
672 | head = this_cpu_ptr(call->perf_events); | ||
673 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); | ||
674 | |||
675 | out: | ||
676 | preempt_enable(); | ||
677 | } | ||
678 | #endif /* CONFIG_PERF_EVENTS */ | ||
679 | |||
680 | static | ||
681 | int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) | ||
682 | { | ||
683 | struct trace_uprobe *tu = (struct trace_uprobe *)event->data; | ||
684 | |||
685 | switch (type) { | ||
686 | case TRACE_REG_REGISTER: | ||
687 | return probe_event_enable(tu, TP_FLAG_TRACE); | ||
688 | |||
689 | case TRACE_REG_UNREGISTER: | ||
690 | probe_event_disable(tu, TP_FLAG_TRACE); | ||
691 | return 0; | ||
692 | |||
693 | #ifdef CONFIG_PERF_EVENTS | ||
694 | case TRACE_REG_PERF_REGISTER: | ||
695 | return probe_event_enable(tu, TP_FLAG_PROFILE); | ||
696 | |||
697 | case TRACE_REG_PERF_UNREGISTER: | ||
698 | probe_event_disable(tu, TP_FLAG_PROFILE); | ||
699 | return 0; | ||
700 | #endif | ||
701 | default: | ||
702 | return 0; | ||
703 | } | ||
704 | return 0; | ||
705 | } | ||
706 | |||
707 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) | ||
708 | { | ||
709 | struct uprobe_trace_consumer *utc; | ||
710 | struct trace_uprobe *tu; | ||
711 | |||
712 | utc = container_of(con, struct uprobe_trace_consumer, cons); | ||
713 | tu = utc->tu; | ||
714 | if (!tu || tu->consumer != utc) | ||
715 | return 0; | ||
716 | |||
717 | if (tu->flags & TP_FLAG_TRACE) | ||
718 | uprobe_trace_func(tu, regs); | ||
719 | |||
720 | #ifdef CONFIG_PERF_EVENTS | ||
721 | if (tu->flags & TP_FLAG_PROFILE) | ||
722 | uprobe_perf_func(tu, regs); | ||
723 | #endif | ||
724 | return 0; | ||
725 | } | ||
726 | |||
727 | static struct trace_event_functions uprobe_funcs = { | ||
728 | .trace = print_uprobe_event | ||
729 | }; | ||
730 | |||
731 | static int register_uprobe_event(struct trace_uprobe *tu) | ||
732 | { | ||
733 | struct ftrace_event_call *call = &tu->call; | ||
734 | int ret; | ||
735 | |||
736 | /* Initialize ftrace_event_call */ | ||
737 | INIT_LIST_HEAD(&call->class->fields); | ||
738 | call->event.funcs = &uprobe_funcs; | ||
739 | call->class->define_fields = uprobe_event_define_fields; | ||
740 | |||
741 | if (set_print_fmt(tu) < 0) | ||
742 | return -ENOMEM; | ||
743 | |||
744 | ret = register_ftrace_event(&call->event); | ||
745 | if (!ret) { | ||
746 | kfree(call->print_fmt); | ||
747 | return -ENODEV; | ||
748 | } | ||
749 | call->flags = 0; | ||
750 | call->class->reg = trace_uprobe_register; | ||
751 | call->data = tu; | ||
752 | ret = trace_add_event_call(call); | ||
753 | |||
754 | if (ret) { | ||
755 | pr_info("Failed to register uprobe event: %s\n", call->name); | ||
756 | kfree(call->print_fmt); | ||
757 | unregister_ftrace_event(&call->event); | ||
758 | } | ||
759 | |||
760 | return ret; | ||
761 | } | ||
762 | |||
763 | static void unregister_uprobe_event(struct trace_uprobe *tu) | ||
764 | { | ||
765 | /* tu->event is unregistered in trace_remove_event_call() */ | ||
766 | trace_remove_event_call(&tu->call); | ||
767 | kfree(tu->call.print_fmt); | ||
768 | tu->call.print_fmt = NULL; | ||
769 | } | ||
770 | |||
771 | /* Make a trace interface for controling probe points */ | ||
772 | static __init int init_uprobe_trace(void) | ||
773 | { | ||
774 | struct dentry *d_tracer; | ||
775 | |||
776 | d_tracer = tracing_init_dentry(); | ||
777 | if (!d_tracer) | ||
778 | return 0; | ||
779 | |||
780 | trace_create_file("uprobe_events", 0644, d_tracer, | ||
781 | NULL, &uprobe_events_ops); | ||
782 | /* Profile interface */ | ||
783 | trace_create_file("uprobe_profile", 0444, d_tracer, | ||
784 | NULL, &uprobe_profile_ops); | ||
785 | return 0; | ||
786 | } | ||
787 | |||
788 | fs_initcall(init_uprobe_trace); | ||