aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 14:39:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 14:39:34 -0400
commit654443e20dfc0617231f28a07c96a979ee1a0239 (patch)
treea0dc3f093eb13892539082e663607c34b4fc2d07 /kernel/trace
parent2c01e7bc46f10e9190818437e564f7e0db875ae9 (diff)
parent9cba26e66d09bf394ae5a739627a1dc8b7cae6f4 (diff)
Merge branch 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull user-space probe instrumentation from Ingo Molnar: "The uprobes code originates from SystemTap and has been used for years in Fedora and RHEL kernels. This version is much rewritten, reviews from PeterZ, Oleg and myself shaped the end result. This tree includes uprobes support in 'perf probe' - but SystemTap (and other tools) can take advantage of user probe points as well. Sample usage of uprobes via perf, for example to profile malloc() calls without modifying user-space binaries. First boot a new kernel with CONFIG_UPROBE_EVENT=y enabled. If you don't know which function you want to probe you can pick one from 'perf top' or can get a list all functions that can be probed within libc (binaries can be specified as well): $ perf probe -F -x /lib/libc.so.6 To probe libc's malloc(): $ perf probe -x /lib64/libc.so.6 malloc Added new event: probe_libc:malloc (on 0x7eac0) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 Make use of it to create a call graph (as the flat profile is going to look very boring): $ perf record -e probe_libc:malloc -gR make [ perf record: Woken up 173 times to write data ] [ perf record: Captured and wrote 44.190 MB perf.data (~1930712 $ perf report | less 32.03% git libc-2.15.so [.] malloc | --- malloc 29.49% cc1 libc-2.15.so [.] malloc | --- malloc | |--0.95%-- 0x208eb1000000000 | |--0.63%-- htab_traverse_noresize 11.04% as libc-2.15.so [.] malloc | --- malloc | 7.15% ld libc-2.15.so [.] malloc | --- malloc | 5.07% sh libc-2.15.so [.] malloc | --- malloc | 4.99% python-config libc-2.15.so [.] malloc | --- malloc | 4.54% make libc-2.15.so [.] malloc | --- malloc | |--7.34%-- glob | | | |--93.18%-- 0x41588f | | | --6.82%-- glob | 0x41588f ... Or: $ perf report -g flat | less # Overhead Command Shared Object Symbol # ........ ............. ............. .......... # 32.03% git libc-2.15.so [.] malloc 27.19% malloc 29.49% cc1 libc-2.15.so [.] malloc 24.77% malloc 11.04% as libc-2.15.so [.] malloc 11.02% malloc 7.15% ld libc-2.15.so [.] malloc 6.57% malloc ... The core uprobes design is fairly straightforward: uprobes probe points register themselves at (inode:offset) addresses of libraries/binaries, after which all existing (or new) vmas that map that address will have a software breakpoint injected at that address. vmas are COW-ed to preserve original content. The probe points are kept in an rbtree. If user-space executes the probed inode:offset instruction address then an event is generated which can be recovered from the regular perf event channels and mmap-ed ring-buffer. Multiple probes at the same address are supported, they create a dynamic callback list of event consumers. The basic model is further complicated by the XOL speedup: the original instruction that is probed is copied (in an architecture specific fashion) and executed out of line when the probe triggers. The XOL area is a single vma per process, with a fixed number of entries (which limits probe execution parallelism). The API: uprobes are installed/removed via /sys/kernel/debug/tracing/uprobe_events, the API is integrated to align with the kprobes interface as much as possible, but is separate to it. Injecting a probe point is privileged operation, which can be relaxed by setting perf_paranoid to -1. You can use multiple probes as well and mix them with kprobes and regular PMU events or tracepoints, when instrumenting a task." Fix up trivial conflicts in mm/memory.c due to previous cleanup of unmap_single_vma(). * 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) perf probe: Detect probe target when m/x options are absent perf probe: Provide perf interface for uprobes tracing: Fix kconfig warning due to a typo tracing: Provide trace events interface for uprobes tracing: Extract out common code for kprobes/uprobes trace events tracing: Modify is_delete, is_return from int to bool uprobes/core: Decrement uprobe count before the pages are unmapped uprobes/core: Make background page replacement logic account for rss_stat counters uprobes/core: Optimize probe hits with the help of a counter uprobes/core: Allocate XOL slots for uprobes use uprobes/core: Handle breakpoint and singlestep exceptions uprobes/core: Rename bkpt to swbp uprobes/core: Make order of function parameters consistent across functions uprobes/core: Make macro names consistent uprobes: Update copyright notices uprobes/core: Move insn to arch specific structure uprobes/core: Remove uprobe_opcode_sz uprobes/core: Make instruction tables volatile uprobes: Move to kernel/events/ uprobes/core: Clean up, refactor and improve the code ...
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig20
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_kprobe.c899
-rw-r--r--kernel/trace/trace_probe.c839
-rw-r--r--kernel/trace/trace_probe.h161
-rw-r--r--kernel/trace/trace_uprobe.c788
7 files changed, 1838 insertions, 876 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f347ac91292d..8c4c07071cc5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -372,6 +372,7 @@ config KPROBE_EVENT
372 depends on HAVE_REGS_AND_STACK_ACCESS_API 372 depends on HAVE_REGS_AND_STACK_ACCESS_API
373 bool "Enable kprobes-based dynamic events" 373 bool "Enable kprobes-based dynamic events"
374 select TRACING 374 select TRACING
375 select PROBE_EVENTS
375 default y 376 default y
376 help 377 help
377 This allows the user to add tracing events (similar to tracepoints) 378 This allows the user to add tracing events (similar to tracepoints)
@@ -384,6 +385,25 @@ config KPROBE_EVENT
384 This option is also required by perf-probe subcommand of perf tools. 385 This option is also required by perf-probe subcommand of perf tools.
385 If you want to use perf tools, this option is strongly recommended. 386 If you want to use perf tools, this option is strongly recommended.
386 387
388config UPROBE_EVENT
389 bool "Enable uprobes-based dynamic events"
390 depends on ARCH_SUPPORTS_UPROBES
391 depends on MMU
392 select UPROBES
393 select PROBE_EVENTS
394 select TRACING
395 default n
396 help
397 This allows the user to add tracing events on top of userspace
398 dynamic events (similar to tracepoints) on the fly via the trace
399 events interface. Those events can be inserted wherever uprobes
400 can probe, and record various registers.
401 This option is required if you plan to use perf-probe subcommand
402 of perf tools on user space applications.
403
404config PROBE_EVENTS
405 def_bool n
406
387config DYNAMIC_FTRACE 407config DYNAMIC_FTRACE
388 bool "enable/disable ftrace tracepoints dynamically" 408 bool "enable/disable ftrace tracepoints dynamically"
389 depends on FUNCTION_TRACER 409 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index b3afe0e76f79..b831087c8200 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -60,5 +60,7 @@ endif
60ifeq ($(CONFIG_TRACING),y) 60ifeq ($(CONFIG_TRACING),y)
61obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 61obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
62endif 62endif
63obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
64obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
63 65
64libftrace-y := ftrace.o 66libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6c6f7933eede..5aec220d2de0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -103,6 +103,11 @@ struct kretprobe_trace_entry_head {
103 unsigned long ret_ip; 103 unsigned long ret_ip;
104}; 104};
105 105
106struct uprobe_trace_entry_head {
107 struct trace_entry ent;
108 unsigned long ip;
109};
110
106/* 111/*
107 * trace_flag_type is an enumeration that holds different 112 * trace_flag_type is an enumeration that holds different
108 * states when a trace occurs. These are: 113 * states when a trace occurs. These are:
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 580a05ec926b..b31d3d5699fe 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -19,547 +19,15 @@
19 19
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <asm/bitsperlong.h>
35
36#include "trace.h"
37#include "trace_output.h"
38
39#define MAX_TRACE_ARGS 128
40#define MAX_ARGSTR_LEN 63
41#define MAX_EVENT_NAME_LEN 64
42#define MAX_STRING_SIZE PATH_MAX
43#define KPROBE_EVENT_SYSTEM "kprobes"
44
45/* Reserved field names */
46#define FIELD_STRING_IP "__probe_ip"
47#define FIELD_STRING_RETIP "__probe_ret_ip"
48#define FIELD_STRING_FUNC "__probe_func"
49
50const char *reserved_field_names[] = {
51 "common_type",
52 "common_flags",
53 "common_preempt_count",
54 "common_pid",
55 "common_tgid",
56 FIELD_STRING_IP,
57 FIELD_STRING_RETIP,
58 FIELD_STRING_FUNC,
59};
60
61/* Printing function type */
62typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
63 void *);
64#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
65#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
66
67/* Printing in basic type function template */
68#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
69static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
70 const char *name, \
71 void *data, void *ent)\
72{ \
73 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
74} \
75static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
76
77DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
78DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
80DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
82DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
83DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
84DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
85
86/* data_rloc: data relative location, compatible with u32 */
87#define make_data_rloc(len, roffs) \
88 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
89#define get_rloc_len(dl) ((u32)(dl) >> 16)
90#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
91
92static inline void *get_rloc_data(u32 *dl)
93{
94 return (u8 *)dl + get_rloc_offs(*dl);
95}
96
97/* For data_loc conversion */
98static inline void *get_loc_data(u32 *dl, void *ent)
99{
100 return (u8 *)ent + get_rloc_offs(*dl);
101}
102
103/*
104 * Convert data_rloc to data_loc:
105 * data_rloc stores the offset from data_rloc itself, but data_loc
106 * stores the offset from event entry.
107 */
108#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
109
110/* For defining macros, define string/string_size types */
111typedef u32 string;
112typedef u32 string_size;
113
114/* Print type function for string type */
115static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
116 const char *name,
117 void *data, void *ent)
118{
119 int len = *(u32 *)data >> 16;
120
121 if (!len)
122 return trace_seq_printf(s, " %s=(fault)", name);
123 else
124 return trace_seq_printf(s, " %s=\"%s\"", name,
125 (const char *)get_loc_data(data, ent));
126}
127static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
128
129/* Data fetch function type */
130typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
131
132struct fetch_param {
133 fetch_func_t fn;
134 void *data;
135};
136
137static __kprobes void call_fetch(struct fetch_param *fprm,
138 struct pt_regs *regs, void *dest)
139{
140 return fprm->fn(regs, fprm->data, dest);
141}
142
143#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
144/*
145 * Define macro for basic types - we don't need to define s* types, because
146 * we have to care only about bitwidth at recording time.
147 */
148#define DEFINE_BASIC_FETCH_FUNCS(method) \
149DEFINE_FETCH_##method(u8) \
150DEFINE_FETCH_##method(u16) \
151DEFINE_FETCH_##method(u32) \
152DEFINE_FETCH_##method(u64)
153
154#define CHECK_FETCH_FUNCS(method, fn) \
155 (((FETCH_FUNC_NAME(method, u8) == fn) || \
156 (FETCH_FUNC_NAME(method, u16) == fn) || \
157 (FETCH_FUNC_NAME(method, u32) == fn) || \
158 (FETCH_FUNC_NAME(method, u64) == fn) || \
159 (FETCH_FUNC_NAME(method, string) == fn) || \
160 (FETCH_FUNC_NAME(method, string_size) == fn)) \
161 && (fn != NULL))
162
163/* Data fetch function templates */
164#define DEFINE_FETCH_reg(type) \
165static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
166 void *offset, void *dest) \
167{ \
168 *(type *)dest = (type)regs_get_register(regs, \
169 (unsigned int)((unsigned long)offset)); \
170}
171DEFINE_BASIC_FETCH_FUNCS(reg)
172/* No string on the register */
173#define fetch_reg_string NULL
174#define fetch_reg_string_size NULL
175
176#define DEFINE_FETCH_stack(type) \
177static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
178 void *offset, void *dest) \
179{ \
180 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
181 (unsigned int)((unsigned long)offset)); \
182}
183DEFINE_BASIC_FETCH_FUNCS(stack)
184/* No string on the stack entry */
185#define fetch_stack_string NULL
186#define fetch_stack_string_size NULL
187
188#define DEFINE_FETCH_retval(type) \
189static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
190 void *dummy, void *dest) \
191{ \
192 *(type *)dest = (type)regs_return_value(regs); \
193}
194DEFINE_BASIC_FETCH_FUNCS(retval)
195/* No string on the retval */
196#define fetch_retval_string NULL
197#define fetch_retval_string_size NULL
198
199#define DEFINE_FETCH_memory(type) \
200static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
201 void *addr, void *dest) \
202{ \
203 type retval; \
204 if (probe_kernel_address(addr, retval)) \
205 *(type *)dest = 0; \
206 else \
207 *(type *)dest = retval; \
208}
209DEFINE_BASIC_FETCH_FUNCS(memory)
210/*
211 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
212 * length and relative data location.
213 */
214static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
215 void *addr, void *dest)
216{
217 long ret;
218 int maxlen = get_rloc_len(*(u32 *)dest);
219 u8 *dst = get_rloc_data(dest);
220 u8 *src = addr;
221 mm_segment_t old_fs = get_fs();
222 if (!maxlen)
223 return;
224 /*
225 * Try to get string again, since the string can be changed while
226 * probing.
227 */
228 set_fs(KERNEL_DS);
229 pagefault_disable();
230 do
231 ret = __copy_from_user_inatomic(dst++, src++, 1);
232 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
233 dst[-1] = '\0';
234 pagefault_enable();
235 set_fs(old_fs);
236
237 if (ret < 0) { /* Failed to fetch string */
238 ((u8 *)get_rloc_data(dest))[0] = '\0';
239 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
240 } else
241 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
242 get_rloc_offs(*(u32 *)dest));
243}
244/* Return the length of string -- including null terminal byte */
245static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
246 void *addr, void *dest)
247{
248 int ret, len = 0;
249 u8 c;
250 mm_segment_t old_fs = get_fs();
251
252 set_fs(KERNEL_DS);
253 pagefault_disable();
254 do {
255 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
256 len++;
257 } while (c && ret == 0 && len < MAX_STRING_SIZE);
258 pagefault_enable();
259 set_fs(old_fs);
260
261 if (ret < 0) /* Failed to check the length */
262 *(u32 *)dest = 0;
263 else
264 *(u32 *)dest = len;
265}
266
267/* Memory fetching by symbol */
268struct symbol_cache {
269 char *symbol;
270 long offset;
271 unsigned long addr;
272};
273
274static unsigned long update_symbol_cache(struct symbol_cache *sc)
275{
276 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
277 if (sc->addr)
278 sc->addr += sc->offset;
279 return sc->addr;
280}
281
282static void free_symbol_cache(struct symbol_cache *sc)
283{
284 kfree(sc->symbol);
285 kfree(sc);
286}
287
288static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
289{
290 struct symbol_cache *sc;
291
292 if (!sym || strlen(sym) == 0)
293 return NULL;
294 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
295 if (!sc)
296 return NULL;
297
298 sc->symbol = kstrdup(sym, GFP_KERNEL);
299 if (!sc->symbol) {
300 kfree(sc);
301 return NULL;
302 }
303 sc->offset = offset;
304 22
305 update_symbol_cache(sc); 23#include "trace_probe.h"
306 return sc;
307}
308
309#define DEFINE_FETCH_symbol(type) \
310static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
311 void *data, void *dest) \
312{ \
313 struct symbol_cache *sc = data; \
314 if (sc->addr) \
315 fetch_memory_##type(regs, (void *)sc->addr, dest); \
316 else \
317 *(type *)dest = 0; \
318}
319DEFINE_BASIC_FETCH_FUNCS(symbol)
320DEFINE_FETCH_symbol(string)
321DEFINE_FETCH_symbol(string_size)
322
323/* Dereference memory access function */
324struct deref_fetch_param {
325 struct fetch_param orig;
326 long offset;
327};
328
329#define DEFINE_FETCH_deref(type) \
330static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
331 void *data, void *dest) \
332{ \
333 struct deref_fetch_param *dprm = data; \
334 unsigned long addr; \
335 call_fetch(&dprm->orig, regs, &addr); \
336 if (addr) { \
337 addr += dprm->offset; \
338 fetch_memory_##type(regs, (void *)addr, dest); \
339 } else \
340 *(type *)dest = 0; \
341}
342DEFINE_BASIC_FETCH_FUNCS(deref)
343DEFINE_FETCH_deref(string)
344DEFINE_FETCH_deref(string_size)
345
346static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
347{
348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
349 update_deref_fetch_param(data->orig.data);
350 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
351 update_symbol_cache(data->orig.data);
352}
353
354static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
355{
356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
357 free_deref_fetch_param(data->orig.data);
358 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
359 free_symbol_cache(data->orig.data);
360 kfree(data);
361}
362
363/* Bitfield fetch function */
364struct bitfield_fetch_param {
365 struct fetch_param orig;
366 unsigned char hi_shift;
367 unsigned char low_shift;
368};
369 24
370#define DEFINE_FETCH_bitfield(type) \ 25#define KPROBE_EVENT_SYSTEM "kprobes"
371static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
372 void *data, void *dest) \
373{ \
374 struct bitfield_fetch_param *bprm = data; \
375 type buf = 0; \
376 call_fetch(&bprm->orig, regs, &buf); \
377 if (buf) { \
378 buf <<= bprm->hi_shift; \
379 buf >>= bprm->low_shift; \
380 } \
381 *(type *)dest = buf; \
382}
383DEFINE_BASIC_FETCH_FUNCS(bitfield)
384#define fetch_bitfield_string NULL
385#define fetch_bitfield_string_size NULL
386
387static __kprobes void
388update_bitfield_fetch_param(struct bitfield_fetch_param *data)
389{
390 /*
391 * Don't check the bitfield itself, because this must be the
392 * last fetch function.
393 */
394 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
395 update_deref_fetch_param(data->orig.data);
396 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
397 update_symbol_cache(data->orig.data);
398}
399
400static __kprobes void
401free_bitfield_fetch_param(struct bitfield_fetch_param *data)
402{
403 /*
404 * Don't check the bitfield itself, because this must be the
405 * last fetch function.
406 */
407 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
408 free_deref_fetch_param(data->orig.data);
409 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
410 free_symbol_cache(data->orig.data);
411 kfree(data);
412}
413
414/* Default (unsigned long) fetch type */
415#define __DEFAULT_FETCH_TYPE(t) u##t
416#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
417#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
418#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
419
420/* Fetch types */
421enum {
422 FETCH_MTD_reg = 0,
423 FETCH_MTD_stack,
424 FETCH_MTD_retval,
425 FETCH_MTD_memory,
426 FETCH_MTD_symbol,
427 FETCH_MTD_deref,
428 FETCH_MTD_bitfield,
429 FETCH_MTD_END,
430};
431
432#define ASSIGN_FETCH_FUNC(method, type) \
433 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
434
435#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
436 {.name = _name, \
437 .size = _size, \
438 .is_signed = sign, \
439 .print = PRINT_TYPE_FUNC_NAME(ptype), \
440 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
441 .fmttype = _fmttype, \
442 .fetch = { \
443ASSIGN_FETCH_FUNC(reg, ftype), \
444ASSIGN_FETCH_FUNC(stack, ftype), \
445ASSIGN_FETCH_FUNC(retval, ftype), \
446ASSIGN_FETCH_FUNC(memory, ftype), \
447ASSIGN_FETCH_FUNC(symbol, ftype), \
448ASSIGN_FETCH_FUNC(deref, ftype), \
449ASSIGN_FETCH_FUNC(bitfield, ftype), \
450 } \
451 }
452
453#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
454 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
455
456#define FETCH_TYPE_STRING 0
457#define FETCH_TYPE_STRSIZE 1
458
459/* Fetch type information table */
460static const struct fetch_type {
461 const char *name; /* Name of type */
462 size_t size; /* Byte size of type */
463 int is_signed; /* Signed flag */
464 print_type_func_t print; /* Print functions */
465 const char *fmt; /* Fromat string */
466 const char *fmttype; /* Name in format file */
467 /* Fetch functions */
468 fetch_func_t fetch[FETCH_MTD_END];
469} fetch_type_table[] = {
470 /* Special types */
471 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
472 sizeof(u32), 1, "__data_loc char[]"),
473 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
474 string_size, sizeof(u32), 0, "u32"),
475 /* Basic types */
476 ASSIGN_FETCH_TYPE(u8, u8, 0),
477 ASSIGN_FETCH_TYPE(u16, u16, 0),
478 ASSIGN_FETCH_TYPE(u32, u32, 0),
479 ASSIGN_FETCH_TYPE(u64, u64, 0),
480 ASSIGN_FETCH_TYPE(s8, u8, 1),
481 ASSIGN_FETCH_TYPE(s16, u16, 1),
482 ASSIGN_FETCH_TYPE(s32, u32, 1),
483 ASSIGN_FETCH_TYPE(s64, u64, 1),
484};
485
486static const struct fetch_type *find_fetch_type(const char *type)
487{
488 int i;
489
490 if (!type)
491 type = DEFAULT_FETCH_TYPE_STR;
492
493 /* Special case: bitfield */
494 if (*type == 'b') {
495 unsigned long bs;
496 type = strchr(type, '/');
497 if (!type)
498 goto fail;
499 type++;
500 if (strict_strtoul(type, 0, &bs))
501 goto fail;
502 switch (bs) {
503 case 8:
504 return find_fetch_type("u8");
505 case 16:
506 return find_fetch_type("u16");
507 case 32:
508 return find_fetch_type("u32");
509 case 64:
510 return find_fetch_type("u64");
511 default:
512 goto fail;
513 }
514 }
515
516 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
517 if (strcmp(type, fetch_type_table[i].name) == 0)
518 return &fetch_type_table[i];
519fail:
520 return NULL;
521}
522
523/* Special function : only accept unsigned long */
524static __kprobes void fetch_stack_address(struct pt_regs *regs,
525 void *dummy, void *dest)
526{
527 *(unsigned long *)dest = kernel_stack_pointer(regs);
528}
529
530static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
531 fetch_func_t orig_fn)
532{
533 int i;
534
535 if (type != &fetch_type_table[FETCH_TYPE_STRING])
536 return NULL; /* Only string type needs size function */
537 for (i = 0; i < FETCH_MTD_END; i++)
538 if (type->fetch[i] == orig_fn)
539 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
540
541 WARN_ON(1); /* This should not happen */
542 return NULL;
543}
544 26
545/** 27/**
546 * Kprobe event core functions 28 * Kprobe event core functions
547 */ 29 */
548 30
549struct probe_arg {
550 struct fetch_param fetch;
551 struct fetch_param fetch_size;
552 unsigned int offset; /* Offset from argument entry */
553 const char *name; /* Name of this argument */
554 const char *comm; /* Command of this argument */
555 const struct fetch_type *type; /* Type of this argument */
556};
557
558/* Flags for trace_probe */
559#define TP_FLAG_TRACE 1
560#define TP_FLAG_PROFILE 2
561#define TP_FLAG_REGISTERED 4
562
563struct trace_probe { 31struct trace_probe {
564 struct list_head list; 32 struct list_head list;
565 struct kretprobe rp; /* Use rp.kp for kprobe use */ 33 struct kretprobe rp; /* Use rp.kp for kprobe use */
@@ -631,18 +99,6 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
631static int kretprobe_dispatcher(struct kretprobe_instance *ri, 99static int kretprobe_dispatcher(struct kretprobe_instance *ri,
632 struct pt_regs *regs); 100 struct pt_regs *regs);
633 101
634/* Check the name is good for event/group/fields */
635static int is_good_name(const char *name)
636{
637 if (!isalpha(*name) && *name != '_')
638 return 0;
639 while (*++name != '\0') {
640 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
641 return 0;
642 }
643 return 1;
644}
645
646/* 102/*
647 * Allocate new trace_probe and initialize it (including kprobes). 103 * Allocate new trace_probe and initialize it (including kprobes).
648 */ 104 */
@@ -651,7 +107,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
651 void *addr, 107 void *addr,
652 const char *symbol, 108 const char *symbol,
653 unsigned long offs, 109 unsigned long offs,
654 int nargs, int is_return) 110 int nargs, bool is_return)
655{ 111{
656 struct trace_probe *tp; 112 struct trace_probe *tp;
657 int ret = -ENOMEM; 113 int ret = -ENOMEM;
@@ -702,34 +158,12 @@ error:
702 return ERR_PTR(ret); 158 return ERR_PTR(ret);
703} 159}
704 160
705static void update_probe_arg(struct probe_arg *arg)
706{
707 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
708 update_bitfield_fetch_param(arg->fetch.data);
709 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
710 update_deref_fetch_param(arg->fetch.data);
711 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
712 update_symbol_cache(arg->fetch.data);
713}
714
715static void free_probe_arg(struct probe_arg *arg)
716{
717 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
718 free_bitfield_fetch_param(arg->fetch.data);
719 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
720 free_deref_fetch_param(arg->fetch.data);
721 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
722 free_symbol_cache(arg->fetch.data);
723 kfree(arg->name);
724 kfree(arg->comm);
725}
726
727static void free_trace_probe(struct trace_probe *tp) 161static void free_trace_probe(struct trace_probe *tp)
728{ 162{
729 int i; 163 int i;
730 164
731 for (i = 0; i < tp->nr_args; i++) 165 for (i = 0; i < tp->nr_args; i++)
732 free_probe_arg(&tp->args[i]); 166 traceprobe_free_probe_arg(&tp->args[i]);
733 167
734 kfree(tp->call.class->system); 168 kfree(tp->call.class->system);
735 kfree(tp->call.name); 169 kfree(tp->call.name);
@@ -787,7 +221,7 @@ static int __register_trace_probe(struct trace_probe *tp)
787 return -EINVAL; 221 return -EINVAL;
788 222
789 for (i = 0; i < tp->nr_args; i++) 223 for (i = 0; i < tp->nr_args; i++)
790 update_probe_arg(&tp->args[i]); 224 traceprobe_update_arg(&tp->args[i]);
791 225
792 /* Set/clear disabled flag according to tp->flag */ 226 /* Set/clear disabled flag according to tp->flag */
793 if (trace_probe_is_enabled(tp)) 227 if (trace_probe_is_enabled(tp))
@@ -919,227 +353,6 @@ static struct notifier_block trace_probe_module_nb = {
919 .priority = 1 /* Invoked after kprobe module callback */ 353 .priority = 1 /* Invoked after kprobe module callback */
920}; 354};
921 355
922/* Split symbol and offset. */
923static int split_symbol_offset(char *symbol, unsigned long *offset)
924{
925 char *tmp;
926 int ret;
927
928 if (!offset)
929 return -EINVAL;
930
931 tmp = strchr(symbol, '+');
932 if (tmp) {
933 /* skip sign because strict_strtol doesn't accept '+' */
934 ret = strict_strtoul(tmp + 1, 0, offset);
935 if (ret)
936 return ret;
937 *tmp = '\0';
938 } else
939 *offset = 0;
940 return 0;
941}
942
943#define PARAM_MAX_ARGS 16
944#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
945
946static int parse_probe_vars(char *arg, const struct fetch_type *t,
947 struct fetch_param *f, int is_return)
948{
949 int ret = 0;
950 unsigned long param;
951
952 if (strcmp(arg, "retval") == 0) {
953 if (is_return)
954 f->fn = t->fetch[FETCH_MTD_retval];
955 else
956 ret = -EINVAL;
957 } else if (strncmp(arg, "stack", 5) == 0) {
958 if (arg[5] == '\0') {
959 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
960 f->fn = fetch_stack_address;
961 else
962 ret = -EINVAL;
963 } else if (isdigit(arg[5])) {
964 ret = strict_strtoul(arg + 5, 10, &param);
965 if (ret || param > PARAM_MAX_STACK)
966 ret = -EINVAL;
967 else {
968 f->fn = t->fetch[FETCH_MTD_stack];
969 f->data = (void *)param;
970 }
971 } else
972 ret = -EINVAL;
973 } else
974 ret = -EINVAL;
975 return ret;
976}
977
978/* Recursive argument parser */
979static int __parse_probe_arg(char *arg, const struct fetch_type *t,
980 struct fetch_param *f, int is_return)
981{
982 int ret = 0;
983 unsigned long param;
984 long offset;
985 char *tmp;
986
987 switch (arg[0]) {
988 case '$':
989 ret = parse_probe_vars(arg + 1, t, f, is_return);
990 break;
991 case '%': /* named register */
992 ret = regs_query_register_offset(arg + 1);
993 if (ret >= 0) {
994 f->fn = t->fetch[FETCH_MTD_reg];
995 f->data = (void *)(unsigned long)ret;
996 ret = 0;
997 }
998 break;
999 case '@': /* memory or symbol */
1000 if (isdigit(arg[1])) {
1001 ret = strict_strtoul(arg + 1, 0, &param);
1002 if (ret)
1003 break;
1004 f->fn = t->fetch[FETCH_MTD_memory];
1005 f->data = (void *)param;
1006 } else {
1007 ret = split_symbol_offset(arg + 1, &offset);
1008 if (ret)
1009 break;
1010 f->data = alloc_symbol_cache(arg + 1, offset);
1011 if (f->data)
1012 f->fn = t->fetch[FETCH_MTD_symbol];
1013 }
1014 break;
1015 case '+': /* deref memory */
1016 arg++; /* Skip '+', because strict_strtol() rejects it. */
1017 case '-':
1018 tmp = strchr(arg, '(');
1019 if (!tmp)
1020 break;
1021 *tmp = '\0';
1022 ret = strict_strtol(arg, 0, &offset);
1023 if (ret)
1024 break;
1025 arg = tmp + 1;
1026 tmp = strrchr(arg, ')');
1027 if (tmp) {
1028 struct deref_fetch_param *dprm;
1029 const struct fetch_type *t2 = find_fetch_type(NULL);
1030 *tmp = '\0';
1031 dprm = kzalloc(sizeof(struct deref_fetch_param),
1032 GFP_KERNEL);
1033 if (!dprm)
1034 return -ENOMEM;
1035 dprm->offset = offset;
1036 ret = __parse_probe_arg(arg, t2, &dprm->orig,
1037 is_return);
1038 if (ret)
1039 kfree(dprm);
1040 else {
1041 f->fn = t->fetch[FETCH_MTD_deref];
1042 f->data = (void *)dprm;
1043 }
1044 }
1045 break;
1046 }
1047 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
1048 pr_info("%s type has no corresponding fetch method.\n",
1049 t->name);
1050 ret = -EINVAL;
1051 }
1052 return ret;
1053}
1054
1055#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
1056
1057/* Bitfield type needs to be parsed into a fetch function */
1058static int __parse_bitfield_probe_arg(const char *bf,
1059 const struct fetch_type *t,
1060 struct fetch_param *f)
1061{
1062 struct bitfield_fetch_param *bprm;
1063 unsigned long bw, bo;
1064 char *tail;
1065
1066 if (*bf != 'b')
1067 return 0;
1068
1069 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1070 if (!bprm)
1071 return -ENOMEM;
1072 bprm->orig = *f;
1073 f->fn = t->fetch[FETCH_MTD_bitfield];
1074 f->data = (void *)bprm;
1075
1076 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
1077 if (bw == 0 || *tail != '@')
1078 return -EINVAL;
1079
1080 bf = tail + 1;
1081 bo = simple_strtoul(bf, &tail, 0);
1082 if (tail == bf || *tail != '/')
1083 return -EINVAL;
1084
1085 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
1086 bprm->low_shift = bprm->hi_shift + bo;
1087 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
1088}
1089
1090/* String length checking wrapper */
1091static int parse_probe_arg(char *arg, struct trace_probe *tp,
1092 struct probe_arg *parg, int is_return)
1093{
1094 const char *t;
1095 int ret;
1096
1097 if (strlen(arg) > MAX_ARGSTR_LEN) {
1098 pr_info("Argument is too long.: %s\n", arg);
1099 return -ENOSPC;
1100 }
1101 parg->comm = kstrdup(arg, GFP_KERNEL);
1102 if (!parg->comm) {
1103 pr_info("Failed to allocate memory for command '%s'.\n", arg);
1104 return -ENOMEM;
1105 }
1106 t = strchr(parg->comm, ':');
1107 if (t) {
1108 arg[t - parg->comm] = '\0';
1109 t++;
1110 }
1111 parg->type = find_fetch_type(t);
1112 if (!parg->type) {
1113 pr_info("Unsupported type: %s\n", t);
1114 return -EINVAL;
1115 }
1116 parg->offset = tp->size;
1117 tp->size += parg->type->size;
1118 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
1119 if (ret >= 0 && t != NULL)
1120 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
1121 if (ret >= 0) {
1122 parg->fetch_size.fn = get_fetch_size_function(parg->type,
1123 parg->fetch.fn);
1124 parg->fetch_size.data = parg->fetch.data;
1125 }
1126 return ret;
1127}
1128
1129/* Return 1 if name is reserved or already used by another argument */
1130static int conflict_field_name(const char *name,
1131 struct probe_arg *args, int narg)
1132{
1133 int i;
1134 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
1135 if (strcmp(reserved_field_names[i], name) == 0)
1136 return 1;
1137 for (i = 0; i < narg; i++)
1138 if (strcmp(args[i].name, name) == 0)
1139 return 1;
1140 return 0;
1141}
1142
1143static int create_trace_probe(int argc, char **argv) 356static int create_trace_probe(int argc, char **argv)
1144{ 357{
1145 /* 358 /*
@@ -1162,7 +375,7 @@ static int create_trace_probe(int argc, char **argv)
1162 */ 375 */
1163 struct trace_probe *tp; 376 struct trace_probe *tp;
1164 int i, ret = 0; 377 int i, ret = 0;
1165 int is_return = 0, is_delete = 0; 378 bool is_return = false, is_delete = false;
1166 char *symbol = NULL, *event = NULL, *group = NULL; 379 char *symbol = NULL, *event = NULL, *group = NULL;
1167 char *arg; 380 char *arg;
1168 unsigned long offset = 0; 381 unsigned long offset = 0;
@@ -1171,11 +384,11 @@ static int create_trace_probe(int argc, char **argv)
1171 384
1172 /* argc must be >= 1 */ 385 /* argc must be >= 1 */
1173 if (argv[0][0] == 'p') 386 if (argv[0][0] == 'p')
1174 is_return = 0; 387 is_return = false;
1175 else if (argv[0][0] == 'r') 388 else if (argv[0][0] == 'r')
1176 is_return = 1; 389 is_return = true;
1177 else if (argv[0][0] == '-') 390 else if (argv[0][0] == '-')
1178 is_delete = 1; 391 is_delete = true;
1179 else { 392 else {
1180 pr_info("Probe definition must be started with 'p', 'r' or" 393 pr_info("Probe definition must be started with 'p', 'r' or"
1181 " '-'.\n"); 394 " '-'.\n");
@@ -1240,7 +453,7 @@ static int create_trace_probe(int argc, char **argv)
1240 /* a symbol specified */ 453 /* a symbol specified */
1241 symbol = argv[1]; 454 symbol = argv[1];
1242 /* TODO: support .init module functions */ 455 /* TODO: support .init module functions */
1243 ret = split_symbol_offset(symbol, &offset); 456 ret = traceprobe_split_symbol_offset(symbol, &offset);
1244 if (ret) { 457 if (ret) {
1245 pr_info("Failed to parse symbol.\n"); 458 pr_info("Failed to parse symbol.\n");
1246 return ret; 459 return ret;
@@ -1302,7 +515,8 @@ static int create_trace_probe(int argc, char **argv)
1302 goto error; 515 goto error;
1303 } 516 }
1304 517
1305 if (conflict_field_name(tp->args[i].name, tp->args, i)) { 518 if (traceprobe_conflict_field_name(tp->args[i].name,
519 tp->args, i)) {
1306 pr_info("Argument[%d] name '%s' conflicts with " 520 pr_info("Argument[%d] name '%s' conflicts with "
1307 "another field.\n", i, argv[i]); 521 "another field.\n", i, argv[i]);
1308 ret = -EINVAL; 522 ret = -EINVAL;
@@ -1310,7 +524,8 @@ static int create_trace_probe(int argc, char **argv)
1310 } 524 }
1311 525
1312 /* Parse fetch argument */ 526 /* Parse fetch argument */
1313 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); 527 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
528 is_return, true);
1314 if (ret) { 529 if (ret) {
1315 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 530 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
1316 goto error; 531 goto error;
@@ -1412,70 +627,11 @@ static int probes_open(struct inode *inode, struct file *file)
1412 return seq_open(file, &probes_seq_op); 627 return seq_open(file, &probes_seq_op);
1413} 628}
1414 629
1415static int command_trace_probe(const char *buf)
1416{
1417 char **argv;
1418 int argc = 0, ret = 0;
1419
1420 argv = argv_split(GFP_KERNEL, buf, &argc);
1421 if (!argv)
1422 return -ENOMEM;
1423
1424 if (argc)
1425 ret = create_trace_probe(argc, argv);
1426
1427 argv_free(argv);
1428 return ret;
1429}
1430
1431#define WRITE_BUFSIZE 4096
1432
1433static ssize_t probes_write(struct file *file, const char __user *buffer, 630static ssize_t probes_write(struct file *file, const char __user *buffer,
1434 size_t count, loff_t *ppos) 631 size_t count, loff_t *ppos)
1435{ 632{
1436 char *kbuf, *tmp; 633 return traceprobe_probes_write(file, buffer, count, ppos,
1437 int ret; 634 create_trace_probe);
1438 size_t done;
1439 size_t size;
1440
1441 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
1442 if (!kbuf)
1443 return -ENOMEM;
1444
1445 ret = done = 0;
1446 while (done < count) {
1447 size = count - done;
1448 if (size >= WRITE_BUFSIZE)
1449 size = WRITE_BUFSIZE - 1;
1450 if (copy_from_user(kbuf, buffer + done, size)) {
1451 ret = -EFAULT;
1452 goto out;
1453 }
1454 kbuf[size] = '\0';
1455 tmp = strchr(kbuf, '\n');
1456 if (tmp) {
1457 *tmp = '\0';
1458 size = tmp - kbuf + 1;
1459 } else if (done + size < count) {
1460 pr_warning("Line length is too long: "
1461 "Should be less than %d.", WRITE_BUFSIZE);
1462 ret = -EINVAL;
1463 goto out;
1464 }
1465 done += size;
1466 /* Remove comments */
1467 tmp = strchr(kbuf, '#');
1468 if (tmp)
1469 *tmp = '\0';
1470
1471 ret = command_trace_probe(kbuf);
1472 if (ret)
1473 goto out;
1474 }
1475 ret = done;
1476out:
1477 kfree(kbuf);
1478 return ret;
1479} 635}
1480 636
1481static const struct file_operations kprobe_events_ops = { 637static const struct file_operations kprobe_events_ops = {
@@ -1711,16 +867,6 @@ partial:
1711 return TRACE_TYPE_PARTIAL_LINE; 867 return TRACE_TYPE_PARTIAL_LINE;
1712} 868}
1713 869
1714#undef DEFINE_FIELD
1715#define DEFINE_FIELD(type, item, name, is_signed) \
1716 do { \
1717 ret = trace_define_field(event_call, #type, name, \
1718 offsetof(typeof(field), item), \
1719 sizeof(field.item), is_signed, \
1720 FILTER_OTHER); \
1721 if (ret) \
1722 return ret; \
1723 } while (0)
1724 870
1725static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 871static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1726{ 872{
@@ -2051,8 +1197,9 @@ static __init int kprobe_trace_self_tests_init(void)
2051 1197
2052 pr_info("Testing kprobe tracing: "); 1198 pr_info("Testing kprobe tracing: ");
2053 1199
2054 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1200 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
2055 "$stack $stack0 +0($stack)"); 1201 "$stack $stack0 +0($stack)",
1202 create_trace_probe);
2056 if (WARN_ON_ONCE(ret)) { 1203 if (WARN_ON_ONCE(ret)) {
2057 pr_warning("error on probing function entry.\n"); 1204 pr_warning("error on probing function entry.\n");
2058 warn++; 1205 warn++;
@@ -2066,8 +1213,8 @@ static __init int kprobe_trace_self_tests_init(void)
2066 enable_trace_probe(tp, TP_FLAG_TRACE); 1213 enable_trace_probe(tp, TP_FLAG_TRACE);
2067 } 1214 }
2068 1215
2069 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1216 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
2070 "$retval"); 1217 "$retval", create_trace_probe);
2071 if (WARN_ON_ONCE(ret)) { 1218 if (WARN_ON_ONCE(ret)) {
2072 pr_warning("error on probing function return.\n"); 1219 pr_warning("error on probing function return.\n");
2073 warn++; 1220 warn++;
@@ -2101,13 +1248,13 @@ static __init int kprobe_trace_self_tests_init(void)
2101 } else 1248 } else
2102 disable_trace_probe(tp, TP_FLAG_TRACE); 1249 disable_trace_probe(tp, TP_FLAG_TRACE);
2103 1250
2104 ret = command_trace_probe("-:testprobe"); 1251 ret = traceprobe_command("-:testprobe", create_trace_probe);
2105 if (WARN_ON_ONCE(ret)) { 1252 if (WARN_ON_ONCE(ret)) {
2106 pr_warning("error on deleting a probe.\n"); 1253 pr_warning("error on deleting a probe.\n");
2107 warn++; 1254 warn++;
2108 } 1255 }
2109 1256
2110 ret = command_trace_probe("-:testprobe2"); 1257 ret = traceprobe_command("-:testprobe2", create_trace_probe);
2111 if (WARN_ON_ONCE(ret)) { 1258 if (WARN_ON_ONCE(ret)) {
2112 pr_warning("error on deleting a probe.\n"); 1259 pr_warning("error on deleting a probe.\n");
2113 warn++; 1260 warn++;
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
new file mode 100644
index 000000000000..daa9980153af
--- /dev/null
+++ b/kernel/trace/trace_probe.c
@@ -0,0 +1,839 @@
1/*
2 * Common code for probe-based Dynamic events.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * This code was copied from kernel/trace/trace_kprobe.c written by
18 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
19 *
20 * Updates to make this generic:
21 * Copyright (C) IBM Corporation, 2010-2011
22 * Author: Srikar Dronamraju
23 */
24
25#include "trace_probe.h"
26
27const char *reserved_field_names[] = {
28 "common_type",
29 "common_flags",
30 "common_preempt_count",
31 "common_pid",
32 "common_tgid",
33 FIELD_STRING_IP,
34 FIELD_STRING_RETIP,
35 FIELD_STRING_FUNC,
36};
37
38/* Printing function type */
39#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
40#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
41
42/* Printing in basic type function template */
43#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
44static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
45 const char *name, \
46 void *data, void *ent)\
47{ \
48 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
49} \
50static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
51
52DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
53DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
54DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
55DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
56DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
57DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
58DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
59DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
60
61static inline void *get_rloc_data(u32 *dl)
62{
63 return (u8 *)dl + get_rloc_offs(*dl);
64}
65
66/* For data_loc conversion */
67static inline void *get_loc_data(u32 *dl, void *ent)
68{
69 return (u8 *)ent + get_rloc_offs(*dl);
70}
71
72/* For defining macros, define string/string_size types */
73typedef u32 string;
74typedef u32 string_size;
75
76/* Print type function for string type */
77static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
78 const char *name,
79 void *data, void *ent)
80{
81 int len = *(u32 *)data >> 16;
82
83 if (!len)
84 return trace_seq_printf(s, " %s=(fault)", name);
85 else
86 return trace_seq_printf(s, " %s=\"%s\"", name,
87 (const char *)get_loc_data(data, ent));
88}
89
90static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
91
92#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
93/*
94 * Define macro for basic types - we don't need to define s* types, because
95 * we have to care only about bitwidth at recording time.
96 */
97#define DEFINE_BASIC_FETCH_FUNCS(method) \
98DEFINE_FETCH_##method(u8) \
99DEFINE_FETCH_##method(u16) \
100DEFINE_FETCH_##method(u32) \
101DEFINE_FETCH_##method(u64)
102
103#define CHECK_FETCH_FUNCS(method, fn) \
104 (((FETCH_FUNC_NAME(method, u8) == fn) || \
105 (FETCH_FUNC_NAME(method, u16) == fn) || \
106 (FETCH_FUNC_NAME(method, u32) == fn) || \
107 (FETCH_FUNC_NAME(method, u64) == fn) || \
108 (FETCH_FUNC_NAME(method, string) == fn) || \
109 (FETCH_FUNC_NAME(method, string_size) == fn)) \
110 && (fn != NULL))
111
112/* Data fetch function templates */
113#define DEFINE_FETCH_reg(type) \
114static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
115 void *offset, void *dest) \
116{ \
117 *(type *)dest = (type)regs_get_register(regs, \
118 (unsigned int)((unsigned long)offset)); \
119}
120DEFINE_BASIC_FETCH_FUNCS(reg)
121/* No string on the register */
122#define fetch_reg_string NULL
123#define fetch_reg_string_size NULL
124
125#define DEFINE_FETCH_stack(type) \
126static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
127 void *offset, void *dest) \
128{ \
129 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
130 (unsigned int)((unsigned long)offset)); \
131}
132DEFINE_BASIC_FETCH_FUNCS(stack)
133/* No string on the stack entry */
134#define fetch_stack_string NULL
135#define fetch_stack_string_size NULL
136
137#define DEFINE_FETCH_retval(type) \
138static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
139 void *dummy, void *dest) \
140{ \
141 *(type *)dest = (type)regs_return_value(regs); \
142}
143DEFINE_BASIC_FETCH_FUNCS(retval)
144/* No string on the retval */
145#define fetch_retval_string NULL
146#define fetch_retval_string_size NULL
147
148#define DEFINE_FETCH_memory(type) \
149static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
150 void *addr, void *dest) \
151{ \
152 type retval; \
153 if (probe_kernel_address(addr, retval)) \
154 *(type *)dest = 0; \
155 else \
156 *(type *)dest = retval; \
157}
158DEFINE_BASIC_FETCH_FUNCS(memory)
159/*
160 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
161 * length and relative data location.
162 */
163static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
164 void *addr, void *dest)
165{
166 long ret;
167 int maxlen = get_rloc_len(*(u32 *)dest);
168 u8 *dst = get_rloc_data(dest);
169 u8 *src = addr;
170 mm_segment_t old_fs = get_fs();
171
172 if (!maxlen)
173 return;
174
175 /*
176 * Try to get string again, since the string can be changed while
177 * probing.
178 */
179 set_fs(KERNEL_DS);
180 pagefault_disable();
181
182 do
183 ret = __copy_from_user_inatomic(dst++, src++, 1);
184 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
185
186 dst[-1] = '\0';
187 pagefault_enable();
188 set_fs(old_fs);
189
190 if (ret < 0) { /* Failed to fetch string */
191 ((u8 *)get_rloc_data(dest))[0] = '\0';
192 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
193 } else {
194 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
195 get_rloc_offs(*(u32 *)dest));
196 }
197}
198
199/* Return the length of string -- including null terminal byte */
200static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
201 void *addr, void *dest)
202{
203 mm_segment_t old_fs;
204 int ret, len = 0;
205 u8 c;
206
207 old_fs = get_fs();
208 set_fs(KERNEL_DS);
209 pagefault_disable();
210
211 do {
212 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
213 len++;
214 } while (c && ret == 0 && len < MAX_STRING_SIZE);
215
216 pagefault_enable();
217 set_fs(old_fs);
218
219 if (ret < 0) /* Failed to check the length */
220 *(u32 *)dest = 0;
221 else
222 *(u32 *)dest = len;
223}
224
225/* Memory fetching by symbol */
226struct symbol_cache {
227 char *symbol;
228 long offset;
229 unsigned long addr;
230};
231
232static unsigned long update_symbol_cache(struct symbol_cache *sc)
233{
234 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
235
236 if (sc->addr)
237 sc->addr += sc->offset;
238
239 return sc->addr;
240}
241
242static void free_symbol_cache(struct symbol_cache *sc)
243{
244 kfree(sc->symbol);
245 kfree(sc);
246}
247
248static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
249{
250 struct symbol_cache *sc;
251
252 if (!sym || strlen(sym) == 0)
253 return NULL;
254
255 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
256 if (!sc)
257 return NULL;
258
259 sc->symbol = kstrdup(sym, GFP_KERNEL);
260 if (!sc->symbol) {
261 kfree(sc);
262 return NULL;
263 }
264 sc->offset = offset;
265 update_symbol_cache(sc);
266
267 return sc;
268}
269
270#define DEFINE_FETCH_symbol(type) \
271static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
272 void *data, void *dest) \
273{ \
274 struct symbol_cache *sc = data; \
275 if (sc->addr) \
276 fetch_memory_##type(regs, (void *)sc->addr, dest); \
277 else \
278 *(type *)dest = 0; \
279}
280DEFINE_BASIC_FETCH_FUNCS(symbol)
281DEFINE_FETCH_symbol(string)
282DEFINE_FETCH_symbol(string_size)
283
284/* Dereference memory access function */
285struct deref_fetch_param {
286 struct fetch_param orig;
287 long offset;
288};
289
290#define DEFINE_FETCH_deref(type) \
291static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
292 void *data, void *dest) \
293{ \
294 struct deref_fetch_param *dprm = data; \
295 unsigned long addr; \
296 call_fetch(&dprm->orig, regs, &addr); \
297 if (addr) { \
298 addr += dprm->offset; \
299 fetch_memory_##type(regs, (void *)addr, dest); \
300 } else \
301 *(type *)dest = 0; \
302}
303DEFINE_BASIC_FETCH_FUNCS(deref)
304DEFINE_FETCH_deref(string)
305DEFINE_FETCH_deref(string_size)
306
307static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
308{
309 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
310 update_deref_fetch_param(data->orig.data);
311 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
312 update_symbol_cache(data->orig.data);
313}
314
315static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
316{
317 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
318 free_deref_fetch_param(data->orig.data);
319 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
320 free_symbol_cache(data->orig.data);
321 kfree(data);
322}
323
324/* Bitfield fetch function */
325struct bitfield_fetch_param {
326 struct fetch_param orig;
327 unsigned char hi_shift;
328 unsigned char low_shift;
329};
330
331#define DEFINE_FETCH_bitfield(type) \
332static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
333 void *data, void *dest) \
334{ \
335 struct bitfield_fetch_param *bprm = data; \
336 type buf = 0; \
337 call_fetch(&bprm->orig, regs, &buf); \
338 if (buf) { \
339 buf <<= bprm->hi_shift; \
340 buf >>= bprm->low_shift; \
341 } \
342 *(type *)dest = buf; \
343}
344
345DEFINE_BASIC_FETCH_FUNCS(bitfield)
346#define fetch_bitfield_string NULL
347#define fetch_bitfield_string_size NULL
348
349static __kprobes void
350update_bitfield_fetch_param(struct bitfield_fetch_param *data)
351{
352 /*
353 * Don't check the bitfield itself, because this must be the
354 * last fetch function.
355 */
356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
357 update_deref_fetch_param(data->orig.data);
358 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
359 update_symbol_cache(data->orig.data);
360}
361
362static __kprobes void
363free_bitfield_fetch_param(struct bitfield_fetch_param *data)
364{
365 /*
366 * Don't check the bitfield itself, because this must be the
367 * last fetch function.
368 */
369 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
370 free_deref_fetch_param(data->orig.data);
371 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
372 free_symbol_cache(data->orig.data);
373
374 kfree(data);
375}
376
377/* Default (unsigned long) fetch type */
378#define __DEFAULT_FETCH_TYPE(t) u##t
379#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
380#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
381#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
382
383#define ASSIGN_FETCH_FUNC(method, type) \
384 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
385
386#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
387 {.name = _name, \
388 .size = _size, \
389 .is_signed = sign, \
390 .print = PRINT_TYPE_FUNC_NAME(ptype), \
391 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
392 .fmttype = _fmttype, \
393 .fetch = { \
394ASSIGN_FETCH_FUNC(reg, ftype), \
395ASSIGN_FETCH_FUNC(stack, ftype), \
396ASSIGN_FETCH_FUNC(retval, ftype), \
397ASSIGN_FETCH_FUNC(memory, ftype), \
398ASSIGN_FETCH_FUNC(symbol, ftype), \
399ASSIGN_FETCH_FUNC(deref, ftype), \
400ASSIGN_FETCH_FUNC(bitfield, ftype), \
401 } \
402 }
403
404#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
405 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
406
407#define FETCH_TYPE_STRING 0
408#define FETCH_TYPE_STRSIZE 1
409
410/* Fetch type information table */
411static const struct fetch_type fetch_type_table[] = {
412 /* Special types */
413 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
414 sizeof(u32), 1, "__data_loc char[]"),
415 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
416 string_size, sizeof(u32), 0, "u32"),
417 /* Basic types */
418 ASSIGN_FETCH_TYPE(u8, u8, 0),
419 ASSIGN_FETCH_TYPE(u16, u16, 0),
420 ASSIGN_FETCH_TYPE(u32, u32, 0),
421 ASSIGN_FETCH_TYPE(u64, u64, 0),
422 ASSIGN_FETCH_TYPE(s8, u8, 1),
423 ASSIGN_FETCH_TYPE(s16, u16, 1),
424 ASSIGN_FETCH_TYPE(s32, u32, 1),
425 ASSIGN_FETCH_TYPE(s64, u64, 1),
426};
427
428static const struct fetch_type *find_fetch_type(const char *type)
429{
430 int i;
431
432 if (!type)
433 type = DEFAULT_FETCH_TYPE_STR;
434
435 /* Special case: bitfield */
436 if (*type == 'b') {
437 unsigned long bs;
438
439 type = strchr(type, '/');
440 if (!type)
441 goto fail;
442
443 type++;
444 if (strict_strtoul(type, 0, &bs))
445 goto fail;
446
447 switch (bs) {
448 case 8:
449 return find_fetch_type("u8");
450 case 16:
451 return find_fetch_type("u16");
452 case 32:
453 return find_fetch_type("u32");
454 case 64:
455 return find_fetch_type("u64");
456 default:
457 goto fail;
458 }
459 }
460
461 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
462 if (strcmp(type, fetch_type_table[i].name) == 0)
463 return &fetch_type_table[i];
464
465fail:
466 return NULL;
467}
468
469/* Special function : only accept unsigned long */
470static __kprobes void fetch_stack_address(struct pt_regs *regs,
471 void *dummy, void *dest)
472{
473 *(unsigned long *)dest = kernel_stack_pointer(regs);
474}
475
476static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
477 fetch_func_t orig_fn)
478{
479 int i;
480
481 if (type != &fetch_type_table[FETCH_TYPE_STRING])
482 return NULL; /* Only string type needs size function */
483
484 for (i = 0; i < FETCH_MTD_END; i++)
485 if (type->fetch[i] == orig_fn)
486 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
487
488 WARN_ON(1); /* This should not happen */
489
490 return NULL;
491}
492
493/* Split symbol and offset. */
494int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset)
495{
496 char *tmp;
497 int ret;
498
499 if (!offset)
500 return -EINVAL;
501
502 tmp = strchr(symbol, '+');
503 if (tmp) {
504 /* skip sign because strict_strtol doesn't accept '+' */
505 ret = strict_strtoul(tmp + 1, 0, offset);
506 if (ret)
507 return ret;
508
509 *tmp = '\0';
510 } else
511 *offset = 0;
512
513 return 0;
514}
515
516#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
517
518static int parse_probe_vars(char *arg, const struct fetch_type *t,
519 struct fetch_param *f, bool is_return)
520{
521 int ret = 0;
522 unsigned long param;
523
524 if (strcmp(arg, "retval") == 0) {
525 if (is_return)
526 f->fn = t->fetch[FETCH_MTD_retval];
527 else
528 ret = -EINVAL;
529 } else if (strncmp(arg, "stack", 5) == 0) {
530 if (arg[5] == '\0') {
531 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
532 f->fn = fetch_stack_address;
533 else
534 ret = -EINVAL;
535 } else if (isdigit(arg[5])) {
536 ret = strict_strtoul(arg + 5, 10, &param);
537 if (ret || param > PARAM_MAX_STACK)
538 ret = -EINVAL;
539 else {
540 f->fn = t->fetch[FETCH_MTD_stack];
541 f->data = (void *)param;
542 }
543 } else
544 ret = -EINVAL;
545 } else
546 ret = -EINVAL;
547
548 return ret;
549}
550
551/* Recursive argument parser */
552static int parse_probe_arg(char *arg, const struct fetch_type *t,
553 struct fetch_param *f, bool is_return, bool is_kprobe)
554{
555 unsigned long param;
556 long offset;
557 char *tmp;
558 int ret;
559
560 ret = 0;
561
562 /* Until uprobe_events supports only reg arguments */
563 if (!is_kprobe && arg[0] != '%')
564 return -EINVAL;
565
566 switch (arg[0]) {
567 case '$':
568 ret = parse_probe_vars(arg + 1, t, f, is_return);
569 break;
570
571 case '%': /* named register */
572 ret = regs_query_register_offset(arg + 1);
573 if (ret >= 0) {
574 f->fn = t->fetch[FETCH_MTD_reg];
575 f->data = (void *)(unsigned long)ret;
576 ret = 0;
577 }
578 break;
579
580 case '@': /* memory or symbol */
581 if (isdigit(arg[1])) {
582 ret = strict_strtoul(arg + 1, 0, &param);
583 if (ret)
584 break;
585
586 f->fn = t->fetch[FETCH_MTD_memory];
587 f->data = (void *)param;
588 } else {
589 ret = traceprobe_split_symbol_offset(arg + 1, &offset);
590 if (ret)
591 break;
592
593 f->data = alloc_symbol_cache(arg + 1, offset);
594 if (f->data)
595 f->fn = t->fetch[FETCH_MTD_symbol];
596 }
597 break;
598
599 case '+': /* deref memory */
600 arg++; /* Skip '+', because strict_strtol() rejects it. */
601 case '-':
602 tmp = strchr(arg, '(');
603 if (!tmp)
604 break;
605
606 *tmp = '\0';
607 ret = strict_strtol(arg, 0, &offset);
608
609 if (ret)
610 break;
611
612 arg = tmp + 1;
613 tmp = strrchr(arg, ')');
614
615 if (tmp) {
616 struct deref_fetch_param *dprm;
617 const struct fetch_type *t2;
618
619 t2 = find_fetch_type(NULL);
620 *tmp = '\0';
621 dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL);
622
623 if (!dprm)
624 return -ENOMEM;
625
626 dprm->offset = offset;
627 ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
628 is_kprobe);
629 if (ret)
630 kfree(dprm);
631 else {
632 f->fn = t->fetch[FETCH_MTD_deref];
633 f->data = (void *)dprm;
634 }
635 }
636 break;
637 }
638 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
639 pr_info("%s type has no corresponding fetch method.\n", t->name);
640 ret = -EINVAL;
641 }
642
643 return ret;
644}
645
646#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
647
648/* Bitfield type needs to be parsed into a fetch function */
649static int __parse_bitfield_probe_arg(const char *bf,
650 const struct fetch_type *t,
651 struct fetch_param *f)
652{
653 struct bitfield_fetch_param *bprm;
654 unsigned long bw, bo;
655 char *tail;
656
657 if (*bf != 'b')
658 return 0;
659
660 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
661 if (!bprm)
662 return -ENOMEM;
663
664 bprm->orig = *f;
665 f->fn = t->fetch[FETCH_MTD_bitfield];
666 f->data = (void *)bprm;
667 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
668
669 if (bw == 0 || *tail != '@')
670 return -EINVAL;
671
672 bf = tail + 1;
673 bo = simple_strtoul(bf, &tail, 0);
674
675 if (tail == bf || *tail != '/')
676 return -EINVAL;
677
678 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
679 bprm->low_shift = bprm->hi_shift + bo;
680
681 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
682}
683
684/* String length checking wrapper */
685int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
686 struct probe_arg *parg, bool is_return, bool is_kprobe)
687{
688 const char *t;
689 int ret;
690
691 if (strlen(arg) > MAX_ARGSTR_LEN) {
692 pr_info("Argument is too long.: %s\n", arg);
693 return -ENOSPC;
694 }
695 parg->comm = kstrdup(arg, GFP_KERNEL);
696 if (!parg->comm) {
697 pr_info("Failed to allocate memory for command '%s'.\n", arg);
698 return -ENOMEM;
699 }
700 t = strchr(parg->comm, ':');
701 if (t) {
702 arg[t - parg->comm] = '\0';
703 t++;
704 }
705 parg->type = find_fetch_type(t);
706 if (!parg->type) {
707 pr_info("Unsupported type: %s\n", t);
708 return -EINVAL;
709 }
710 parg->offset = *size;
711 *size += parg->type->size;
712 ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe);
713
714 if (ret >= 0 && t != NULL)
715 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
716
717 if (ret >= 0) {
718 parg->fetch_size.fn = get_fetch_size_function(parg->type,
719 parg->fetch.fn);
720 parg->fetch_size.data = parg->fetch.data;
721 }
722
723 return ret;
724}
725
726/* Return 1 if name is reserved or already used by another argument */
727int traceprobe_conflict_field_name(const char *name,
728 struct probe_arg *args, int narg)
729{
730 int i;
731
732 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
733 if (strcmp(reserved_field_names[i], name) == 0)
734 return 1;
735
736 for (i = 0; i < narg; i++)
737 if (strcmp(args[i].name, name) == 0)
738 return 1;
739
740 return 0;
741}
742
743void traceprobe_update_arg(struct probe_arg *arg)
744{
745 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
746 update_bitfield_fetch_param(arg->fetch.data);
747 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
748 update_deref_fetch_param(arg->fetch.data);
749 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
750 update_symbol_cache(arg->fetch.data);
751}
752
753void traceprobe_free_probe_arg(struct probe_arg *arg)
754{
755 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
756 free_bitfield_fetch_param(arg->fetch.data);
757 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
758 free_deref_fetch_param(arg->fetch.data);
759 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
760 free_symbol_cache(arg->fetch.data);
761
762 kfree(arg->name);
763 kfree(arg->comm);
764}
765
766int traceprobe_command(const char *buf, int (*createfn)(int, char **))
767{
768 char **argv;
769 int argc, ret;
770
771 argc = 0;
772 ret = 0;
773 argv = argv_split(GFP_KERNEL, buf, &argc);
774 if (!argv)
775 return -ENOMEM;
776
777 if (argc)
778 ret = createfn(argc, argv);
779
780 argv_free(argv);
781
782 return ret;
783}
784
785#define WRITE_BUFSIZE 4096
786
787ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
788 size_t count, loff_t *ppos,
789 int (*createfn)(int, char **))
790{
791 char *kbuf, *tmp;
792 int ret = 0;
793 size_t done = 0;
794 size_t size;
795
796 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
797 if (!kbuf)
798 return -ENOMEM;
799
800 while (done < count) {
801 size = count - done;
802
803 if (size >= WRITE_BUFSIZE)
804 size = WRITE_BUFSIZE - 1;
805
806 if (copy_from_user(kbuf, buffer + done, size)) {
807 ret = -EFAULT;
808 goto out;
809 }
810 kbuf[size] = '\0';
811 tmp = strchr(kbuf, '\n');
812
813 if (tmp) {
814 *tmp = '\0';
815 size = tmp - kbuf + 1;
816 } else if (done + size < count) {
817 pr_warning("Line length is too long: "
818 "Should be less than %d.", WRITE_BUFSIZE);
819 ret = -EINVAL;
820 goto out;
821 }
822 done += size;
823 /* Remove comments */
824 tmp = strchr(kbuf, '#');
825
826 if (tmp)
827 *tmp = '\0';
828
829 ret = traceprobe_command(kbuf, createfn);
830 if (ret)
831 goto out;
832 }
833 ret = done;
834
835out:
836 kfree(kbuf);
837
838 return ret;
839}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
new file mode 100644
index 000000000000..933708677814
--- /dev/null
+++ b/kernel/trace/trace_probe.h
@@ -0,0 +1,161 @@
1/*
2 * Common header file for probe-based Dynamic events.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * This code was copied from kernel/trace/trace_kprobe.h written by
18 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
19 *
20 * Updates to make this generic:
21 * Copyright (C) IBM Corporation, 2010-2011
22 * Author: Srikar Dronamraju
23 */
24
25#include <linux/seq_file.h>
26#include <linux/slab.h>
27#include <linux/smp.h>
28#include <linux/debugfs.h>
29#include <linux/types.h>
30#include <linux/string.h>
31#include <linux/ctype.h>
32#include <linux/ptrace.h>
33#include <linux/perf_event.h>
34#include <linux/kprobes.h>
35#include <linux/stringify.h>
36#include <linux/limits.h>
37#include <linux/uaccess.h>
38#include <asm/bitsperlong.h>
39
40#include "trace.h"
41#include "trace_output.h"
42
43#define MAX_TRACE_ARGS 128
44#define MAX_ARGSTR_LEN 63
45#define MAX_EVENT_NAME_LEN 64
46#define MAX_STRING_SIZE PATH_MAX
47
48/* Reserved field names */
49#define FIELD_STRING_IP "__probe_ip"
50#define FIELD_STRING_RETIP "__probe_ret_ip"
51#define FIELD_STRING_FUNC "__probe_func"
52
53#undef DEFINE_FIELD
54#define DEFINE_FIELD(type, item, name, is_signed) \
55 do { \
56 ret = trace_define_field(event_call, #type, name, \
57 offsetof(typeof(field), item), \
58 sizeof(field.item), is_signed, \
59 FILTER_OTHER); \
60 if (ret) \
61 return ret; \
62 } while (0)
63
64
65/* Flags for trace_probe */
66#define TP_FLAG_TRACE 1
67#define TP_FLAG_PROFILE 2
68#define TP_FLAG_REGISTERED 4
69#define TP_FLAG_UPROBE 8
70
71
72/* data_rloc: data relative location, compatible with u32 */
73#define make_data_rloc(len, roffs) \
74 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
75#define get_rloc_len(dl) ((u32)(dl) >> 16)
76#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
77
78/*
79 * Convert data_rloc to data_loc:
80 * data_rloc stores the offset from data_rloc itself, but data_loc
81 * stores the offset from event entry.
82 */
83#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
84
85/* Data fetch function type */
86typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
87/* Printing function type */
88typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *);
89
90/* Fetch types */
91enum {
92 FETCH_MTD_reg = 0,
93 FETCH_MTD_stack,
94 FETCH_MTD_retval,
95 FETCH_MTD_memory,
96 FETCH_MTD_symbol,
97 FETCH_MTD_deref,
98 FETCH_MTD_bitfield,
99 FETCH_MTD_END,
100};
101
102/* Fetch type information table */
103struct fetch_type {
104 const char *name; /* Name of type */
105 size_t size; /* Byte size of type */
106 int is_signed; /* Signed flag */
107 print_type_func_t print; /* Print functions */
108 const char *fmt; /* Fromat string */
109 const char *fmttype; /* Name in format file */
110 /* Fetch functions */
111 fetch_func_t fetch[FETCH_MTD_END];
112};
113
114struct fetch_param {
115 fetch_func_t fn;
116 void *data;
117};
118
119struct probe_arg {
120 struct fetch_param fetch;
121 struct fetch_param fetch_size;
122 unsigned int offset; /* Offset from argument entry */
123 const char *name; /* Name of this argument */
124 const char *comm; /* Command of this argument */
125 const struct fetch_type *type; /* Type of this argument */
126};
127
128static inline __kprobes void call_fetch(struct fetch_param *fprm,
129 struct pt_regs *regs, void *dest)
130{
131 return fprm->fn(regs, fprm->data, dest);
132}
133
134/* Check the name is good for event/group/fields */
135static inline int is_good_name(const char *name)
136{
137 if (!isalpha(*name) && *name != '_')
138 return 0;
139 while (*++name != '\0') {
140 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
141 return 0;
142 }
143 return 1;
144}
145
146extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
147 struct probe_arg *parg, bool is_return, bool is_kprobe);
148
149extern int traceprobe_conflict_field_name(const char *name,
150 struct probe_arg *args, int narg);
151
152extern void traceprobe_update_arg(struct probe_arg *arg);
153extern void traceprobe_free_probe_arg(struct probe_arg *arg);
154
155extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset);
156
157extern ssize_t traceprobe_probes_write(struct file *file,
158 const char __user *buffer, size_t count, loff_t *ppos,
159 int (*createfn)(int, char**));
160
161extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
new file mode 100644
index 000000000000..2b36ac68549e
--- /dev/null
+++ b/kernel/trace/trace_uprobe.c
@@ -0,0 +1,788 @@
1/*
2 * uprobes-based tracing events
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * Copyright (C) IBM Corporation, 2010-2012
18 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
19 */
20
21#include <linux/module.h>
22#include <linux/uaccess.h>
23#include <linux/uprobes.h>
24#include <linux/namei.h>
25
26#include "trace_probe.h"
27
28#define UPROBE_EVENT_SYSTEM "uprobes"
29
30/*
31 * uprobe event core functions
32 */
33struct trace_uprobe;
34struct uprobe_trace_consumer {
35 struct uprobe_consumer cons;
36 struct trace_uprobe *tu;
37};
38
39struct trace_uprobe {
40 struct list_head list;
41 struct ftrace_event_class class;
42 struct ftrace_event_call call;
43 struct uprobe_trace_consumer *consumer;
44 struct inode *inode;
45 char *filename;
46 unsigned long offset;
47 unsigned long nhit;
48 unsigned int flags; /* For TP_FLAG_* */
49 ssize_t size; /* trace entry size */
50 unsigned int nr_args;
51 struct probe_arg args[];
52};
53
54#define SIZEOF_TRACE_UPROBE(n) \
55 (offsetof(struct trace_uprobe, args) + \
56 (sizeof(struct probe_arg) * (n)))
57
58static int register_uprobe_event(struct trace_uprobe *tu);
59static void unregister_uprobe_event(struct trace_uprobe *tu);
60
61static DEFINE_MUTEX(uprobe_lock);
62static LIST_HEAD(uprobe_list);
63
64static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
65
66/*
67 * Allocate new trace_uprobe and initialize it (including uprobes).
68 */
69static struct trace_uprobe *
70alloc_trace_uprobe(const char *group, const char *event, int nargs)
71{
72 struct trace_uprobe *tu;
73
74 if (!event || !is_good_name(event))
75 return ERR_PTR(-EINVAL);
76
77 if (!group || !is_good_name(group))
78 return ERR_PTR(-EINVAL);
79
80 tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
81 if (!tu)
82 return ERR_PTR(-ENOMEM);
83
84 tu->call.class = &tu->class;
85 tu->call.name = kstrdup(event, GFP_KERNEL);
86 if (!tu->call.name)
87 goto error;
88
89 tu->class.system = kstrdup(group, GFP_KERNEL);
90 if (!tu->class.system)
91 goto error;
92
93 INIT_LIST_HEAD(&tu->list);
94 return tu;
95
96error:
97 kfree(tu->call.name);
98 kfree(tu);
99
100 return ERR_PTR(-ENOMEM);
101}
102
103static void free_trace_uprobe(struct trace_uprobe *tu)
104{
105 int i;
106
107 for (i = 0; i < tu->nr_args; i++)
108 traceprobe_free_probe_arg(&tu->args[i]);
109
110 iput(tu->inode);
111 kfree(tu->call.class->system);
112 kfree(tu->call.name);
113 kfree(tu->filename);
114 kfree(tu);
115}
116
117static struct trace_uprobe *find_probe_event(const char *event, const char *group)
118{
119 struct trace_uprobe *tu;
120
121 list_for_each_entry(tu, &uprobe_list, list)
122 if (strcmp(tu->call.name, event) == 0 &&
123 strcmp(tu->call.class->system, group) == 0)
124 return tu;
125
126 return NULL;
127}
128
129/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
130static void unregister_trace_uprobe(struct trace_uprobe *tu)
131{
132 list_del(&tu->list);
133 unregister_uprobe_event(tu);
134 free_trace_uprobe(tu);
135}
136
137/* Register a trace_uprobe and probe_event */
138static int register_trace_uprobe(struct trace_uprobe *tu)
139{
140 struct trace_uprobe *old_tp;
141 int ret;
142
143 mutex_lock(&uprobe_lock);
144
145 /* register as an event */
146 old_tp = find_probe_event(tu->call.name, tu->call.class->system);
147 if (old_tp)
148 /* delete old event */
149 unregister_trace_uprobe(old_tp);
150
151 ret = register_uprobe_event(tu);
152 if (ret) {
153 pr_warning("Failed to register probe event(%d)\n", ret);
154 goto end;
155 }
156
157 list_add_tail(&tu->list, &uprobe_list);
158
159end:
160 mutex_unlock(&uprobe_lock);
161
162 return ret;
163}
164
165/*
166 * Argument syntax:
167 * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS]
168 *
169 * - Remove uprobe: -:[GRP/]EVENT
170 */
171static int create_trace_uprobe(int argc, char **argv)
172{
173 struct trace_uprobe *tu;
174 struct inode *inode;
175 char *arg, *event, *group, *filename;
176 char buf[MAX_EVENT_NAME_LEN];
177 struct path path;
178 unsigned long offset;
179 bool is_delete;
180 int i, ret;
181
182 inode = NULL;
183 ret = 0;
184 is_delete = false;
185 event = NULL;
186 group = NULL;
187
188 /* argc must be >= 1 */
189 if (argv[0][0] == '-')
190 is_delete = true;
191 else if (argv[0][0] != 'p') {
192 pr_info("Probe definition must be started with 'p', 'r' or" " '-'.\n");
193 return -EINVAL;
194 }
195
196 if (argv[0][1] == ':') {
197 event = &argv[0][2];
198 arg = strchr(event, '/');
199
200 if (arg) {
201 group = event;
202 event = arg + 1;
203 event[-1] = '\0';
204
205 if (strlen(group) == 0) {
206 pr_info("Group name is not specified\n");
207 return -EINVAL;
208 }
209 }
210 if (strlen(event) == 0) {
211 pr_info("Event name is not specified\n");
212 return -EINVAL;
213 }
214 }
215 if (!group)
216 group = UPROBE_EVENT_SYSTEM;
217
218 if (is_delete) {
219 if (!event) {
220 pr_info("Delete command needs an event name.\n");
221 return -EINVAL;
222 }
223 mutex_lock(&uprobe_lock);
224 tu = find_probe_event(event, group);
225
226 if (!tu) {
227 mutex_unlock(&uprobe_lock);
228 pr_info("Event %s/%s doesn't exist.\n", group, event);
229 return -ENOENT;
230 }
231 /* delete an event */
232 unregister_trace_uprobe(tu);
233 mutex_unlock(&uprobe_lock);
234 return 0;
235 }
236
237 if (argc < 2) {
238 pr_info("Probe point is not specified.\n");
239 return -EINVAL;
240 }
241 if (isdigit(argv[1][0])) {
242 pr_info("probe point must be have a filename.\n");
243 return -EINVAL;
244 }
245 arg = strchr(argv[1], ':');
246 if (!arg)
247 goto fail_address_parse;
248
249 *arg++ = '\0';
250 filename = argv[1];
251 ret = kern_path(filename, LOOKUP_FOLLOW, &path);
252 if (ret)
253 goto fail_address_parse;
254
255 ret = strict_strtoul(arg, 0, &offset);
256 if (ret)
257 goto fail_address_parse;
258
259 inode = igrab(path.dentry->d_inode);
260
261 argc -= 2;
262 argv += 2;
263
264 /* setup a probe */
265 if (!event) {
266 char *tail = strrchr(filename, '/');
267 char *ptr;
268
269 ptr = kstrdup((tail ? tail + 1 : filename), GFP_KERNEL);
270 if (!ptr) {
271 ret = -ENOMEM;
272 goto fail_address_parse;
273 }
274
275 tail = ptr;
276 ptr = strpbrk(tail, ".-_");
277 if (ptr)
278 *ptr = '\0';
279
280 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
281 event = buf;
282 kfree(tail);
283 }
284
285 tu = alloc_trace_uprobe(group, event, argc);
286 if (IS_ERR(tu)) {
287 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
288 ret = PTR_ERR(tu);
289 goto fail_address_parse;
290 }
291 tu->offset = offset;
292 tu->inode = inode;
293 tu->filename = kstrdup(filename, GFP_KERNEL);
294
295 if (!tu->filename) {
296 pr_info("Failed to allocate filename.\n");
297 ret = -ENOMEM;
298 goto error;
299 }
300
301 /* parse arguments */
302 ret = 0;
303 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
304 /* Increment count for freeing args in error case */
305 tu->nr_args++;
306
307 /* Parse argument name */
308 arg = strchr(argv[i], '=');
309 if (arg) {
310 *arg++ = '\0';
311 tu->args[i].name = kstrdup(argv[i], GFP_KERNEL);
312 } else {
313 arg = argv[i];
314 /* If argument name is omitted, set "argN" */
315 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
316 tu->args[i].name = kstrdup(buf, GFP_KERNEL);
317 }
318
319 if (!tu->args[i].name) {
320 pr_info("Failed to allocate argument[%d] name.\n", i);
321 ret = -ENOMEM;
322 goto error;
323 }
324
325 if (!is_good_name(tu->args[i].name)) {
326 pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name);
327 ret = -EINVAL;
328 goto error;
329 }
330
331 if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) {
332 pr_info("Argument[%d] name '%s' conflicts with "
333 "another field.\n", i, argv[i]);
334 ret = -EINVAL;
335 goto error;
336 }
337
338 /* Parse fetch argument */
339 ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false);
340 if (ret) {
341 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
342 goto error;
343 }
344 }
345
346 ret = register_trace_uprobe(tu);
347 if (ret)
348 goto error;
349 return 0;
350
351error:
352 free_trace_uprobe(tu);
353 return ret;
354
355fail_address_parse:
356 if (inode)
357 iput(inode);
358
359 pr_info("Failed to parse address.\n");
360
361 return ret;
362}
363
364static void cleanup_all_probes(void)
365{
366 struct trace_uprobe *tu;
367
368 mutex_lock(&uprobe_lock);
369 while (!list_empty(&uprobe_list)) {
370 tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
371 unregister_trace_uprobe(tu);
372 }
373 mutex_unlock(&uprobe_lock);
374}
375
376/* Probes listing interfaces */
377static void *probes_seq_start(struct seq_file *m, loff_t *pos)
378{
379 mutex_lock(&uprobe_lock);
380 return seq_list_start(&uprobe_list, *pos);
381}
382
383static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
384{
385 return seq_list_next(v, &uprobe_list, pos);
386}
387
388static void probes_seq_stop(struct seq_file *m, void *v)
389{
390 mutex_unlock(&uprobe_lock);
391}
392
393static int probes_seq_show(struct seq_file *m, void *v)
394{
395 struct trace_uprobe *tu = v;
396 int i;
397
398 seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name);
399 seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
400
401 for (i = 0; i < tu->nr_args; i++)
402 seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm);
403
404 seq_printf(m, "\n");
405 return 0;
406}
407
408static const struct seq_operations probes_seq_op = {
409 .start = probes_seq_start,
410 .next = probes_seq_next,
411 .stop = probes_seq_stop,
412 .show = probes_seq_show
413};
414
415static int probes_open(struct inode *inode, struct file *file)
416{
417 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
418 cleanup_all_probes();
419
420 return seq_open(file, &probes_seq_op);
421}
422
423static ssize_t probes_write(struct file *file, const char __user *buffer,
424 size_t count, loff_t *ppos)
425{
426 return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe);
427}
428
429static const struct file_operations uprobe_events_ops = {
430 .owner = THIS_MODULE,
431 .open = probes_open,
432 .read = seq_read,
433 .llseek = seq_lseek,
434 .release = seq_release,
435 .write = probes_write,
436};
437
438/* Probes profiling interfaces */
439static int probes_profile_seq_show(struct seq_file *m, void *v)
440{
441 struct trace_uprobe *tu = v;
442
443 seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit);
444 return 0;
445}
446
447static const struct seq_operations profile_seq_op = {
448 .start = probes_seq_start,
449 .next = probes_seq_next,
450 .stop = probes_seq_stop,
451 .show = probes_profile_seq_show
452};
453
454static int profile_open(struct inode *inode, struct file *file)
455{
456 return seq_open(file, &profile_seq_op);
457}
458
459static const struct file_operations uprobe_profile_ops = {
460 .owner = THIS_MODULE,
461 .open = profile_open,
462 .read = seq_read,
463 .llseek = seq_lseek,
464 .release = seq_release,
465};
466
467/* uprobe handler */
468static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
469{
470 struct uprobe_trace_entry_head *entry;
471 struct ring_buffer_event *event;
472 struct ring_buffer *buffer;
473 u8 *data;
474 int size, i, pc;
475 unsigned long irq_flags;
476 struct ftrace_event_call *call = &tu->call;
477
478 tu->nhit++;
479
480 local_save_flags(irq_flags);
481 pc = preempt_count();
482
483 size = sizeof(*entry) + tu->size;
484
485 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
486 size, irq_flags, pc);
487 if (!event)
488 return;
489
490 entry = ring_buffer_event_data(event);
491 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
492 data = (u8 *)&entry[1];
493 for (i = 0; i < tu->nr_args; i++)
494 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
495
496 if (!filter_current_check_discard(buffer, call, entry, event))
497 trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
498}
499
500/* Event entry printers */
501static enum print_line_t
502print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
503{
504 struct uprobe_trace_entry_head *field;
505 struct trace_seq *s = &iter->seq;
506 struct trace_uprobe *tu;
507 u8 *data;
508 int i;
509
510 field = (struct uprobe_trace_entry_head *)iter->ent;
511 tu = container_of(event, struct trace_uprobe, call.event);
512
513 if (!trace_seq_printf(s, "%s: (", tu->call.name))
514 goto partial;
515
516 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
517 goto partial;
518
519 if (!trace_seq_puts(s, ")"))
520 goto partial;
521
522 data = (u8 *)&field[1];
523 for (i = 0; i < tu->nr_args; i++) {
524 if (!tu->args[i].type->print(s, tu->args[i].name,
525 data + tu->args[i].offset, field))
526 goto partial;
527 }
528
529 if (trace_seq_puts(s, "\n"))
530 return TRACE_TYPE_HANDLED;
531
532partial:
533 return TRACE_TYPE_PARTIAL_LINE;
534}
535
536static int probe_event_enable(struct trace_uprobe *tu, int flag)
537{
538 struct uprobe_trace_consumer *utc;
539 int ret = 0;
540
541 if (!tu->inode || tu->consumer)
542 return -EINTR;
543
544 utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL);
545 if (!utc)
546 return -EINTR;
547
548 utc->cons.handler = uprobe_dispatcher;
549 utc->cons.filter = NULL;
550 ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
551 if (ret) {
552 kfree(utc);
553 return ret;
554 }
555
556 tu->flags |= flag;
557 utc->tu = tu;
558 tu->consumer = utc;
559
560 return 0;
561}
562
563static void probe_event_disable(struct trace_uprobe *tu, int flag)
564{
565 if (!tu->inode || !tu->consumer)
566 return;
567
568 uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons);
569 tu->flags &= ~flag;
570 kfree(tu->consumer);
571 tu->consumer = NULL;
572}
573
574static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
575{
576 int ret, i;
577 struct uprobe_trace_entry_head field;
578 struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data;
579
580 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
581 /* Set argument names as fields */
582 for (i = 0; i < tu->nr_args; i++) {
583 ret = trace_define_field(event_call, tu->args[i].type->fmttype,
584 tu->args[i].name,
585 sizeof(field) + tu->args[i].offset,
586 tu->args[i].type->size,
587 tu->args[i].type->is_signed,
588 FILTER_OTHER);
589
590 if (ret)
591 return ret;
592 }
593 return 0;
594}
595
596#define LEN_OR_ZERO (len ? len - pos : 0)
597static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
598{
599 const char *fmt, *arg;
600 int i;
601 int pos = 0;
602
603 fmt = "(%lx)";
604 arg = "REC->" FIELD_STRING_IP;
605
606 /* When len=0, we just calculate the needed length */
607
608 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
609
610 for (i = 0; i < tu->nr_args; i++) {
611 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
612 tu->args[i].name, tu->args[i].type->fmt);
613 }
614
615 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
616
617 for (i = 0; i < tu->nr_args; i++) {
618 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
619 tu->args[i].name);
620 }
621
622 return pos; /* return the length of print_fmt */
623}
624#undef LEN_OR_ZERO
625
626static int set_print_fmt(struct trace_uprobe *tu)
627{
628 char *print_fmt;
629 int len;
630
631 /* First: called with 0 length to calculate the needed length */
632 len = __set_print_fmt(tu, NULL, 0);
633 print_fmt = kmalloc(len + 1, GFP_KERNEL);
634 if (!print_fmt)
635 return -ENOMEM;
636
637 /* Second: actually write the @print_fmt */
638 __set_print_fmt(tu, print_fmt, len + 1);
639 tu->call.print_fmt = print_fmt;
640
641 return 0;
642}
643
644#ifdef CONFIG_PERF_EVENTS
645/* uprobe profile handler */
646static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
647{
648 struct ftrace_event_call *call = &tu->call;
649 struct uprobe_trace_entry_head *entry;
650 struct hlist_head *head;
651 u8 *data;
652 int size, __size, i;
653 int rctx;
654
655 __size = sizeof(*entry) + tu->size;
656 size = ALIGN(__size + sizeof(u32), sizeof(u64));
657 size -= sizeof(u32);
658 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
659 return;
660
661 preempt_disable();
662
663 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
664 if (!entry)
665 goto out;
666
667 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
668 data = (u8 *)&entry[1];
669 for (i = 0; i < tu->nr_args; i++)
670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
671
672 head = this_cpu_ptr(call->perf_events);
673 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
674
675 out:
676 preempt_enable();
677}
678#endif /* CONFIG_PERF_EVENTS */
679
680static
681int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
682{
683 struct trace_uprobe *tu = (struct trace_uprobe *)event->data;
684
685 switch (type) {
686 case TRACE_REG_REGISTER:
687 return probe_event_enable(tu, TP_FLAG_TRACE);
688
689 case TRACE_REG_UNREGISTER:
690 probe_event_disable(tu, TP_FLAG_TRACE);
691 return 0;
692
693#ifdef CONFIG_PERF_EVENTS
694 case TRACE_REG_PERF_REGISTER:
695 return probe_event_enable(tu, TP_FLAG_PROFILE);
696
697 case TRACE_REG_PERF_UNREGISTER:
698 probe_event_disable(tu, TP_FLAG_PROFILE);
699 return 0;
700#endif
701 default:
702 return 0;
703 }
704 return 0;
705}
706
707static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
708{
709 struct uprobe_trace_consumer *utc;
710 struct trace_uprobe *tu;
711
712 utc = container_of(con, struct uprobe_trace_consumer, cons);
713 tu = utc->tu;
714 if (!tu || tu->consumer != utc)
715 return 0;
716
717 if (tu->flags & TP_FLAG_TRACE)
718 uprobe_trace_func(tu, regs);
719
720#ifdef CONFIG_PERF_EVENTS
721 if (tu->flags & TP_FLAG_PROFILE)
722 uprobe_perf_func(tu, regs);
723#endif
724 return 0;
725}
726
727static struct trace_event_functions uprobe_funcs = {
728 .trace = print_uprobe_event
729};
730
731static int register_uprobe_event(struct trace_uprobe *tu)
732{
733 struct ftrace_event_call *call = &tu->call;
734 int ret;
735
736 /* Initialize ftrace_event_call */
737 INIT_LIST_HEAD(&call->class->fields);
738 call->event.funcs = &uprobe_funcs;
739 call->class->define_fields = uprobe_event_define_fields;
740
741 if (set_print_fmt(tu) < 0)
742 return -ENOMEM;
743
744 ret = register_ftrace_event(&call->event);
745 if (!ret) {
746 kfree(call->print_fmt);
747 return -ENODEV;
748 }
749 call->flags = 0;
750 call->class->reg = trace_uprobe_register;
751 call->data = tu;
752 ret = trace_add_event_call(call);
753
754 if (ret) {
755 pr_info("Failed to register uprobe event: %s\n", call->name);
756 kfree(call->print_fmt);
757 unregister_ftrace_event(&call->event);
758 }
759
760 return ret;
761}
762
763static void unregister_uprobe_event(struct trace_uprobe *tu)
764{
765 /* tu->event is unregistered in trace_remove_event_call() */
766 trace_remove_event_call(&tu->call);
767 kfree(tu->call.print_fmt);
768 tu->call.print_fmt = NULL;
769}
770
771/* Make a trace interface for controling probe points */
772static __init int init_uprobe_trace(void)
773{
774 struct dentry *d_tracer;
775
776 d_tracer = tracing_init_dentry();
777 if (!d_tracer)
778 return 0;
779
780 trace_create_file("uprobe_events", 0644, d_tracer,
781 NULL, &uprobe_events_ops);
782 /* Profile interface */
783 trace_create_file("uprobe_profile", 0444, d_tracer,
784 NULL, &uprobe_profile_ops);
785 return 0;
786}
787
788fs_initcall(init_uprobe_trace);