aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace_kprobe.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 14:39:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 14:39:34 -0400
commit654443e20dfc0617231f28a07c96a979ee1a0239 (patch)
treea0dc3f093eb13892539082e663607c34b4fc2d07 /kernel/trace/trace_kprobe.c
parent2c01e7bc46f10e9190818437e564f7e0db875ae9 (diff)
parent9cba26e66d09bf394ae5a739627a1dc8b7cae6f4 (diff)
Merge branch 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull user-space probe instrumentation from Ingo Molnar: "The uprobes code originates from SystemTap and has been used for years in Fedora and RHEL kernels. This version is much rewritten, reviews from PeterZ, Oleg and myself shaped the end result. This tree includes uprobes support in 'perf probe' - but SystemTap (and other tools) can take advantage of user probe points as well. Sample usage of uprobes via perf, for example to profile malloc() calls without modifying user-space binaries. First boot a new kernel with CONFIG_UPROBE_EVENT=y enabled. If you don't know which function you want to probe you can pick one from 'perf top' or can get a list all functions that can be probed within libc (binaries can be specified as well): $ perf probe -F -x /lib/libc.so.6 To probe libc's malloc(): $ perf probe -x /lib64/libc.so.6 malloc Added new event: probe_libc:malloc (on 0x7eac0) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 Make use of it to create a call graph (as the flat profile is going to look very boring): $ perf record -e probe_libc:malloc -gR make [ perf record: Woken up 173 times to write data ] [ perf record: Captured and wrote 44.190 MB perf.data (~1930712 $ perf report | less 32.03% git libc-2.15.so [.] malloc | --- malloc 29.49% cc1 libc-2.15.so [.] malloc | --- malloc | |--0.95%-- 0x208eb1000000000 | |--0.63%-- htab_traverse_noresize 11.04% as libc-2.15.so [.] malloc | --- malloc | 7.15% ld libc-2.15.so [.] malloc | --- malloc | 5.07% sh libc-2.15.so [.] malloc | --- malloc | 4.99% python-config libc-2.15.so [.] malloc | --- malloc | 4.54% make libc-2.15.so [.] malloc | --- malloc | |--7.34%-- glob | | | |--93.18%-- 0x41588f | | | --6.82%-- glob | 0x41588f ... Or: $ perf report -g flat | less # Overhead Command Shared Object Symbol # ........ ............. ............. .......... # 32.03% git libc-2.15.so [.] malloc 27.19% malloc 29.49% cc1 libc-2.15.so [.] malloc 24.77% malloc 11.04% as libc-2.15.so [.] malloc 11.02% malloc 7.15% ld libc-2.15.so [.] malloc 6.57% malloc ... The core uprobes design is fairly straightforward: uprobes probe points register themselves at (inode:offset) addresses of libraries/binaries, after which all existing (or new) vmas that map that address will have a software breakpoint injected at that address. vmas are COW-ed to preserve original content. The probe points are kept in an rbtree. If user-space executes the probed inode:offset instruction address then an event is generated which can be recovered from the regular perf event channels and mmap-ed ring-buffer. Multiple probes at the same address are supported, they create a dynamic callback list of event consumers. The basic model is further complicated by the XOL speedup: the original instruction that is probed is copied (in an architecture specific fashion) and executed out of line when the probe triggers. The XOL area is a single vma per process, with a fixed number of entries (which limits probe execution parallelism). The API: uprobes are installed/removed via /sys/kernel/debug/tracing/uprobe_events, the API is integrated to align with the kprobes interface as much as possible, but is separate to it. Injecting a probe point is privileged operation, which can be relaxed by setting perf_paranoid to -1. You can use multiple probes as well and mix them with kprobes and regular PMU events or tracepoints, when instrumenting a task." Fix up trivial conflicts in mm/memory.c due to previous cleanup of unmap_single_vma(). * 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) perf probe: Detect probe target when m/x options are absent perf probe: Provide perf interface for uprobes tracing: Fix kconfig warning due to a typo tracing: Provide trace events interface for uprobes tracing: Extract out common code for kprobes/uprobes trace events tracing: Modify is_delete, is_return from int to bool uprobes/core: Decrement uprobe count before the pages are unmapped uprobes/core: Make background page replacement logic account for rss_stat counters uprobes/core: Optimize probe hits with the help of a counter uprobes/core: Allocate XOL slots for uprobes use uprobes/core: Handle breakpoint and singlestep exceptions uprobes/core: Rename bkpt to swbp uprobes/core: Make order of function parameters consistent across functions uprobes/core: Make macro names consistent uprobes: Update copyright notices uprobes/core: Move insn to arch specific structure uprobes/core: Remove uprobe_opcode_sz uprobes/core: Make instruction tables volatile uprobes: Move to kernel/events/ uprobes/core: Clean up, refactor and improve the code ...
Diffstat (limited to 'kernel/trace/trace_kprobe.c')
-rw-r--r--kernel/trace/trace_kprobe.c899
1 files changed, 23 insertions, 876 deletions
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 580a05ec926b..b31d3d5699fe 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -19,547 +19,15 @@
19 19
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <asm/bitsperlong.h>
35
36#include "trace.h"
37#include "trace_output.h"
38
39#define MAX_TRACE_ARGS 128
40#define MAX_ARGSTR_LEN 63
41#define MAX_EVENT_NAME_LEN 64
42#define MAX_STRING_SIZE PATH_MAX
43#define KPROBE_EVENT_SYSTEM "kprobes"
44
45/* Reserved field names */
46#define FIELD_STRING_IP "__probe_ip"
47#define FIELD_STRING_RETIP "__probe_ret_ip"
48#define FIELD_STRING_FUNC "__probe_func"
49
50const char *reserved_field_names[] = {
51 "common_type",
52 "common_flags",
53 "common_preempt_count",
54 "common_pid",
55 "common_tgid",
56 FIELD_STRING_IP,
57 FIELD_STRING_RETIP,
58 FIELD_STRING_FUNC,
59};
60
61/* Printing function type */
62typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
63 void *);
64#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
65#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
66
67/* Printing in basic type function template */
68#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
69static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
70 const char *name, \
71 void *data, void *ent)\
72{ \
73 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
74} \
75static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
76
77DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
78DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
80DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
82DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
83DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
84DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
85
86/* data_rloc: data relative location, compatible with u32 */
87#define make_data_rloc(len, roffs) \
88 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
89#define get_rloc_len(dl) ((u32)(dl) >> 16)
90#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
91
92static inline void *get_rloc_data(u32 *dl)
93{
94 return (u8 *)dl + get_rloc_offs(*dl);
95}
96
97/* For data_loc conversion */
98static inline void *get_loc_data(u32 *dl, void *ent)
99{
100 return (u8 *)ent + get_rloc_offs(*dl);
101}
102
103/*
104 * Convert data_rloc to data_loc:
105 * data_rloc stores the offset from data_rloc itself, but data_loc
106 * stores the offset from event entry.
107 */
108#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
109
110/* For defining macros, define string/string_size types */
111typedef u32 string;
112typedef u32 string_size;
113
114/* Print type function for string type */
115static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
116 const char *name,
117 void *data, void *ent)
118{
119 int len = *(u32 *)data >> 16;
120
121 if (!len)
122 return trace_seq_printf(s, " %s=(fault)", name);
123 else
124 return trace_seq_printf(s, " %s=\"%s\"", name,
125 (const char *)get_loc_data(data, ent));
126}
127static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
128
129/* Data fetch function type */
130typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
131
132struct fetch_param {
133 fetch_func_t fn;
134 void *data;
135};
136
137static __kprobes void call_fetch(struct fetch_param *fprm,
138 struct pt_regs *regs, void *dest)
139{
140 return fprm->fn(regs, fprm->data, dest);
141}
142
143#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
144/*
145 * Define macro for basic types - we don't need to define s* types, because
146 * we have to care only about bitwidth at recording time.
147 */
148#define DEFINE_BASIC_FETCH_FUNCS(method) \
149DEFINE_FETCH_##method(u8) \
150DEFINE_FETCH_##method(u16) \
151DEFINE_FETCH_##method(u32) \
152DEFINE_FETCH_##method(u64)
153
154#define CHECK_FETCH_FUNCS(method, fn) \
155 (((FETCH_FUNC_NAME(method, u8) == fn) || \
156 (FETCH_FUNC_NAME(method, u16) == fn) || \
157 (FETCH_FUNC_NAME(method, u32) == fn) || \
158 (FETCH_FUNC_NAME(method, u64) == fn) || \
159 (FETCH_FUNC_NAME(method, string) == fn) || \
160 (FETCH_FUNC_NAME(method, string_size) == fn)) \
161 && (fn != NULL))
162
163/* Data fetch function templates */
164#define DEFINE_FETCH_reg(type) \
165static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
166 void *offset, void *dest) \
167{ \
168 *(type *)dest = (type)regs_get_register(regs, \
169 (unsigned int)((unsigned long)offset)); \
170}
171DEFINE_BASIC_FETCH_FUNCS(reg)
172/* No string on the register */
173#define fetch_reg_string NULL
174#define fetch_reg_string_size NULL
175
176#define DEFINE_FETCH_stack(type) \
177static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
178 void *offset, void *dest) \
179{ \
180 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
181 (unsigned int)((unsigned long)offset)); \
182}
183DEFINE_BASIC_FETCH_FUNCS(stack)
184/* No string on the stack entry */
185#define fetch_stack_string NULL
186#define fetch_stack_string_size NULL
187
188#define DEFINE_FETCH_retval(type) \
189static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
190 void *dummy, void *dest) \
191{ \
192 *(type *)dest = (type)regs_return_value(regs); \
193}
194DEFINE_BASIC_FETCH_FUNCS(retval)
195/* No string on the retval */
196#define fetch_retval_string NULL
197#define fetch_retval_string_size NULL
198
199#define DEFINE_FETCH_memory(type) \
200static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
201 void *addr, void *dest) \
202{ \
203 type retval; \
204 if (probe_kernel_address(addr, retval)) \
205 *(type *)dest = 0; \
206 else \
207 *(type *)dest = retval; \
208}
209DEFINE_BASIC_FETCH_FUNCS(memory)
210/*
211 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
212 * length and relative data location.
213 */
214static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
215 void *addr, void *dest)
216{
217 long ret;
218 int maxlen = get_rloc_len(*(u32 *)dest);
219 u8 *dst = get_rloc_data(dest);
220 u8 *src = addr;
221 mm_segment_t old_fs = get_fs();
222 if (!maxlen)
223 return;
224 /*
225 * Try to get string again, since the string can be changed while
226 * probing.
227 */
228 set_fs(KERNEL_DS);
229 pagefault_disable();
230 do
231 ret = __copy_from_user_inatomic(dst++, src++, 1);
232 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
233 dst[-1] = '\0';
234 pagefault_enable();
235 set_fs(old_fs);
236
237 if (ret < 0) { /* Failed to fetch string */
238 ((u8 *)get_rloc_data(dest))[0] = '\0';
239 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
240 } else
241 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
242 get_rloc_offs(*(u32 *)dest));
243}
244/* Return the length of string -- including null terminal byte */
245static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
246 void *addr, void *dest)
247{
248 int ret, len = 0;
249 u8 c;
250 mm_segment_t old_fs = get_fs();
251
252 set_fs(KERNEL_DS);
253 pagefault_disable();
254 do {
255 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
256 len++;
257 } while (c && ret == 0 && len < MAX_STRING_SIZE);
258 pagefault_enable();
259 set_fs(old_fs);
260
261 if (ret < 0) /* Failed to check the length */
262 *(u32 *)dest = 0;
263 else
264 *(u32 *)dest = len;
265}
266
267/* Memory fetching by symbol */
268struct symbol_cache {
269 char *symbol;
270 long offset;
271 unsigned long addr;
272};
273
274static unsigned long update_symbol_cache(struct symbol_cache *sc)
275{
276 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
277 if (sc->addr)
278 sc->addr += sc->offset;
279 return sc->addr;
280}
281
282static void free_symbol_cache(struct symbol_cache *sc)
283{
284 kfree(sc->symbol);
285 kfree(sc);
286}
287
288static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
289{
290 struct symbol_cache *sc;
291
292 if (!sym || strlen(sym) == 0)
293 return NULL;
294 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
295 if (!sc)
296 return NULL;
297
298 sc->symbol = kstrdup(sym, GFP_KERNEL);
299 if (!sc->symbol) {
300 kfree(sc);
301 return NULL;
302 }
303 sc->offset = offset;
304 22
305 update_symbol_cache(sc); 23#include "trace_probe.h"
306 return sc;
307}
308
309#define DEFINE_FETCH_symbol(type) \
310static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
311 void *data, void *dest) \
312{ \
313 struct symbol_cache *sc = data; \
314 if (sc->addr) \
315 fetch_memory_##type(regs, (void *)sc->addr, dest); \
316 else \
317 *(type *)dest = 0; \
318}
319DEFINE_BASIC_FETCH_FUNCS(symbol)
320DEFINE_FETCH_symbol(string)
321DEFINE_FETCH_symbol(string_size)
322
323/* Dereference memory access function */
324struct deref_fetch_param {
325 struct fetch_param orig;
326 long offset;
327};
328
329#define DEFINE_FETCH_deref(type) \
330static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
331 void *data, void *dest) \
332{ \
333 struct deref_fetch_param *dprm = data; \
334 unsigned long addr; \
335 call_fetch(&dprm->orig, regs, &addr); \
336 if (addr) { \
337 addr += dprm->offset; \
338 fetch_memory_##type(regs, (void *)addr, dest); \
339 } else \
340 *(type *)dest = 0; \
341}
342DEFINE_BASIC_FETCH_FUNCS(deref)
343DEFINE_FETCH_deref(string)
344DEFINE_FETCH_deref(string_size)
345
346static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
347{
348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
349 update_deref_fetch_param(data->orig.data);
350 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
351 update_symbol_cache(data->orig.data);
352}
353
354static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
355{
356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
357 free_deref_fetch_param(data->orig.data);
358 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
359 free_symbol_cache(data->orig.data);
360 kfree(data);
361}
362
363/* Bitfield fetch function */
364struct bitfield_fetch_param {
365 struct fetch_param orig;
366 unsigned char hi_shift;
367 unsigned char low_shift;
368};
369 24
370#define DEFINE_FETCH_bitfield(type) \ 25#define KPROBE_EVENT_SYSTEM "kprobes"
371static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
372 void *data, void *dest) \
373{ \
374 struct bitfield_fetch_param *bprm = data; \
375 type buf = 0; \
376 call_fetch(&bprm->orig, regs, &buf); \
377 if (buf) { \
378 buf <<= bprm->hi_shift; \
379 buf >>= bprm->low_shift; \
380 } \
381 *(type *)dest = buf; \
382}
383DEFINE_BASIC_FETCH_FUNCS(bitfield)
384#define fetch_bitfield_string NULL
385#define fetch_bitfield_string_size NULL
386
387static __kprobes void
388update_bitfield_fetch_param(struct bitfield_fetch_param *data)
389{
390 /*
391 * Don't check the bitfield itself, because this must be the
392 * last fetch function.
393 */
394 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
395 update_deref_fetch_param(data->orig.data);
396 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
397 update_symbol_cache(data->orig.data);
398}
399
400static __kprobes void
401free_bitfield_fetch_param(struct bitfield_fetch_param *data)
402{
403 /*
404 * Don't check the bitfield itself, because this must be the
405 * last fetch function.
406 */
407 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
408 free_deref_fetch_param(data->orig.data);
409 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
410 free_symbol_cache(data->orig.data);
411 kfree(data);
412}
413
414/* Default (unsigned long) fetch type */
415#define __DEFAULT_FETCH_TYPE(t) u##t
416#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
417#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
418#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
419
420/* Fetch types */
421enum {
422 FETCH_MTD_reg = 0,
423 FETCH_MTD_stack,
424 FETCH_MTD_retval,
425 FETCH_MTD_memory,
426 FETCH_MTD_symbol,
427 FETCH_MTD_deref,
428 FETCH_MTD_bitfield,
429 FETCH_MTD_END,
430};
431
432#define ASSIGN_FETCH_FUNC(method, type) \
433 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
434
435#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
436 {.name = _name, \
437 .size = _size, \
438 .is_signed = sign, \
439 .print = PRINT_TYPE_FUNC_NAME(ptype), \
440 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
441 .fmttype = _fmttype, \
442 .fetch = { \
443ASSIGN_FETCH_FUNC(reg, ftype), \
444ASSIGN_FETCH_FUNC(stack, ftype), \
445ASSIGN_FETCH_FUNC(retval, ftype), \
446ASSIGN_FETCH_FUNC(memory, ftype), \
447ASSIGN_FETCH_FUNC(symbol, ftype), \
448ASSIGN_FETCH_FUNC(deref, ftype), \
449ASSIGN_FETCH_FUNC(bitfield, ftype), \
450 } \
451 }
452
453#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
454 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
455
456#define FETCH_TYPE_STRING 0
457#define FETCH_TYPE_STRSIZE 1
458
459/* Fetch type information table */
460static const struct fetch_type {
461 const char *name; /* Name of type */
462 size_t size; /* Byte size of type */
463 int is_signed; /* Signed flag */
464 print_type_func_t print; /* Print functions */
465 const char *fmt; /* Fromat string */
466 const char *fmttype; /* Name in format file */
467 /* Fetch functions */
468 fetch_func_t fetch[FETCH_MTD_END];
469} fetch_type_table[] = {
470 /* Special types */
471 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
472 sizeof(u32), 1, "__data_loc char[]"),
473 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
474 string_size, sizeof(u32), 0, "u32"),
475 /* Basic types */
476 ASSIGN_FETCH_TYPE(u8, u8, 0),
477 ASSIGN_FETCH_TYPE(u16, u16, 0),
478 ASSIGN_FETCH_TYPE(u32, u32, 0),
479 ASSIGN_FETCH_TYPE(u64, u64, 0),
480 ASSIGN_FETCH_TYPE(s8, u8, 1),
481 ASSIGN_FETCH_TYPE(s16, u16, 1),
482 ASSIGN_FETCH_TYPE(s32, u32, 1),
483 ASSIGN_FETCH_TYPE(s64, u64, 1),
484};
485
486static const struct fetch_type *find_fetch_type(const char *type)
487{
488 int i;
489
490 if (!type)
491 type = DEFAULT_FETCH_TYPE_STR;
492
493 /* Special case: bitfield */
494 if (*type == 'b') {
495 unsigned long bs;
496 type = strchr(type, '/');
497 if (!type)
498 goto fail;
499 type++;
500 if (strict_strtoul(type, 0, &bs))
501 goto fail;
502 switch (bs) {
503 case 8:
504 return find_fetch_type("u8");
505 case 16:
506 return find_fetch_type("u16");
507 case 32:
508 return find_fetch_type("u32");
509 case 64:
510 return find_fetch_type("u64");
511 default:
512 goto fail;
513 }
514 }
515
516 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
517 if (strcmp(type, fetch_type_table[i].name) == 0)
518 return &fetch_type_table[i];
519fail:
520 return NULL;
521}
522
523/* Special function : only accept unsigned long */
524static __kprobes void fetch_stack_address(struct pt_regs *regs,
525 void *dummy, void *dest)
526{
527 *(unsigned long *)dest = kernel_stack_pointer(regs);
528}
529
530static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
531 fetch_func_t orig_fn)
532{
533 int i;
534
535 if (type != &fetch_type_table[FETCH_TYPE_STRING])
536 return NULL; /* Only string type needs size function */
537 for (i = 0; i < FETCH_MTD_END; i++)
538 if (type->fetch[i] == orig_fn)
539 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
540
541 WARN_ON(1); /* This should not happen */
542 return NULL;
543}
544 26
545/** 27/**
546 * Kprobe event core functions 28 * Kprobe event core functions
547 */ 29 */
548 30
549struct probe_arg {
550 struct fetch_param fetch;
551 struct fetch_param fetch_size;
552 unsigned int offset; /* Offset from argument entry */
553 const char *name; /* Name of this argument */
554 const char *comm; /* Command of this argument */
555 const struct fetch_type *type; /* Type of this argument */
556};
557
558/* Flags for trace_probe */
559#define TP_FLAG_TRACE 1
560#define TP_FLAG_PROFILE 2
561#define TP_FLAG_REGISTERED 4
562
563struct trace_probe { 31struct trace_probe {
564 struct list_head list; 32 struct list_head list;
565 struct kretprobe rp; /* Use rp.kp for kprobe use */ 33 struct kretprobe rp; /* Use rp.kp for kprobe use */
@@ -631,18 +99,6 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
631static int kretprobe_dispatcher(struct kretprobe_instance *ri, 99static int kretprobe_dispatcher(struct kretprobe_instance *ri,
632 struct pt_regs *regs); 100 struct pt_regs *regs);
633 101
634/* Check the name is good for event/group/fields */
635static int is_good_name(const char *name)
636{
637 if (!isalpha(*name) && *name != '_')
638 return 0;
639 while (*++name != '\0') {
640 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
641 return 0;
642 }
643 return 1;
644}
645
646/* 102/*
647 * Allocate new trace_probe and initialize it (including kprobes). 103 * Allocate new trace_probe and initialize it (including kprobes).
648 */ 104 */
@@ -651,7 +107,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
651 void *addr, 107 void *addr,
652 const char *symbol, 108 const char *symbol,
653 unsigned long offs, 109 unsigned long offs,
654 int nargs, int is_return) 110 int nargs, bool is_return)
655{ 111{
656 struct trace_probe *tp; 112 struct trace_probe *tp;
657 int ret = -ENOMEM; 113 int ret = -ENOMEM;
@@ -702,34 +158,12 @@ error:
702 return ERR_PTR(ret); 158 return ERR_PTR(ret);
703} 159}
704 160
705static void update_probe_arg(struct probe_arg *arg)
706{
707 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
708 update_bitfield_fetch_param(arg->fetch.data);
709 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
710 update_deref_fetch_param(arg->fetch.data);
711 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
712 update_symbol_cache(arg->fetch.data);
713}
714
715static void free_probe_arg(struct probe_arg *arg)
716{
717 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
718 free_bitfield_fetch_param(arg->fetch.data);
719 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
720 free_deref_fetch_param(arg->fetch.data);
721 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
722 free_symbol_cache(arg->fetch.data);
723 kfree(arg->name);
724 kfree(arg->comm);
725}
726
727static void free_trace_probe(struct trace_probe *tp) 161static void free_trace_probe(struct trace_probe *tp)
728{ 162{
729 int i; 163 int i;
730 164
731 for (i = 0; i < tp->nr_args; i++) 165 for (i = 0; i < tp->nr_args; i++)
732 free_probe_arg(&tp->args[i]); 166 traceprobe_free_probe_arg(&tp->args[i]);
733 167
734 kfree(tp->call.class->system); 168 kfree(tp->call.class->system);
735 kfree(tp->call.name); 169 kfree(tp->call.name);
@@ -787,7 +221,7 @@ static int __register_trace_probe(struct trace_probe *tp)
787 return -EINVAL; 221 return -EINVAL;
788 222
789 for (i = 0; i < tp->nr_args; i++) 223 for (i = 0; i < tp->nr_args; i++)
790 update_probe_arg(&tp->args[i]); 224 traceprobe_update_arg(&tp->args[i]);
791 225
792 /* Set/clear disabled flag according to tp->flag */ 226 /* Set/clear disabled flag according to tp->flag */
793 if (trace_probe_is_enabled(tp)) 227 if (trace_probe_is_enabled(tp))
@@ -919,227 +353,6 @@ static struct notifier_block trace_probe_module_nb = {
919 .priority = 1 /* Invoked after kprobe module callback */ 353 .priority = 1 /* Invoked after kprobe module callback */
920}; 354};
921 355
922/* Split symbol and offset. */
923static int split_symbol_offset(char *symbol, unsigned long *offset)
924{
925 char *tmp;
926 int ret;
927
928 if (!offset)
929 return -EINVAL;
930
931 tmp = strchr(symbol, '+');
932 if (tmp) {
933 /* skip sign because strict_strtol doesn't accept '+' */
934 ret = strict_strtoul(tmp + 1, 0, offset);
935 if (ret)
936 return ret;
937 *tmp = '\0';
938 } else
939 *offset = 0;
940 return 0;
941}
942
943#define PARAM_MAX_ARGS 16
944#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
945
946static int parse_probe_vars(char *arg, const struct fetch_type *t,
947 struct fetch_param *f, int is_return)
948{
949 int ret = 0;
950 unsigned long param;
951
952 if (strcmp(arg, "retval") == 0) {
953 if (is_return)
954 f->fn = t->fetch[FETCH_MTD_retval];
955 else
956 ret = -EINVAL;
957 } else if (strncmp(arg, "stack", 5) == 0) {
958 if (arg[5] == '\0') {
959 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
960 f->fn = fetch_stack_address;
961 else
962 ret = -EINVAL;
963 } else if (isdigit(arg[5])) {
964 ret = strict_strtoul(arg + 5, 10, &param);
965 if (ret || param > PARAM_MAX_STACK)
966 ret = -EINVAL;
967 else {
968 f->fn = t->fetch[FETCH_MTD_stack];
969 f->data = (void *)param;
970 }
971 } else
972 ret = -EINVAL;
973 } else
974 ret = -EINVAL;
975 return ret;
976}
977
978/* Recursive argument parser */
979static int __parse_probe_arg(char *arg, const struct fetch_type *t,
980 struct fetch_param *f, int is_return)
981{
982 int ret = 0;
983 unsigned long param;
984 long offset;
985 char *tmp;
986
987 switch (arg[0]) {
988 case '$':
989 ret = parse_probe_vars(arg + 1, t, f, is_return);
990 break;
991 case '%': /* named register */
992 ret = regs_query_register_offset(arg + 1);
993 if (ret >= 0) {
994 f->fn = t->fetch[FETCH_MTD_reg];
995 f->data = (void *)(unsigned long)ret;
996 ret = 0;
997 }
998 break;
999 case '@': /* memory or symbol */
1000 if (isdigit(arg[1])) {
1001 ret = strict_strtoul(arg + 1, 0, &param);
1002 if (ret)
1003 break;
1004 f->fn = t->fetch[FETCH_MTD_memory];
1005 f->data = (void *)param;
1006 } else {
1007 ret = split_symbol_offset(arg + 1, &offset);
1008 if (ret)
1009 break;
1010 f->data = alloc_symbol_cache(arg + 1, offset);
1011 if (f->data)
1012 f->fn = t->fetch[FETCH_MTD_symbol];
1013 }
1014 break;
1015 case '+': /* deref memory */
1016 arg++; /* Skip '+', because strict_strtol() rejects it. */
1017 case '-':
1018 tmp = strchr(arg, '(');
1019 if (!tmp)
1020 break;
1021 *tmp = '\0';
1022 ret = strict_strtol(arg, 0, &offset);
1023 if (ret)
1024 break;
1025 arg = tmp + 1;
1026 tmp = strrchr(arg, ')');
1027 if (tmp) {
1028 struct deref_fetch_param *dprm;
1029 const struct fetch_type *t2 = find_fetch_type(NULL);
1030 *tmp = '\0';
1031 dprm = kzalloc(sizeof(struct deref_fetch_param),
1032 GFP_KERNEL);
1033 if (!dprm)
1034 return -ENOMEM;
1035 dprm->offset = offset;
1036 ret = __parse_probe_arg(arg, t2, &dprm->orig,
1037 is_return);
1038 if (ret)
1039 kfree(dprm);
1040 else {
1041 f->fn = t->fetch[FETCH_MTD_deref];
1042 f->data = (void *)dprm;
1043 }
1044 }
1045 break;
1046 }
1047 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
1048 pr_info("%s type has no corresponding fetch method.\n",
1049 t->name);
1050 ret = -EINVAL;
1051 }
1052 return ret;
1053}
1054
1055#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
1056
1057/* Bitfield type needs to be parsed into a fetch function */
1058static int __parse_bitfield_probe_arg(const char *bf,
1059 const struct fetch_type *t,
1060 struct fetch_param *f)
1061{
1062 struct bitfield_fetch_param *bprm;
1063 unsigned long bw, bo;
1064 char *tail;
1065
1066 if (*bf != 'b')
1067 return 0;
1068
1069 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1070 if (!bprm)
1071 return -ENOMEM;
1072 bprm->orig = *f;
1073 f->fn = t->fetch[FETCH_MTD_bitfield];
1074 f->data = (void *)bprm;
1075
1076 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
1077 if (bw == 0 || *tail != '@')
1078 return -EINVAL;
1079
1080 bf = tail + 1;
1081 bo = simple_strtoul(bf, &tail, 0);
1082 if (tail == bf || *tail != '/')
1083 return -EINVAL;
1084
1085 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
1086 bprm->low_shift = bprm->hi_shift + bo;
1087 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
1088}
1089
1090/* String length checking wrapper */
1091static int parse_probe_arg(char *arg, struct trace_probe *tp,
1092 struct probe_arg *parg, int is_return)
1093{
1094 const char *t;
1095 int ret;
1096
1097 if (strlen(arg) > MAX_ARGSTR_LEN) {
1098 pr_info("Argument is too long.: %s\n", arg);
1099 return -ENOSPC;
1100 }
1101 parg->comm = kstrdup(arg, GFP_KERNEL);
1102 if (!parg->comm) {
1103 pr_info("Failed to allocate memory for command '%s'.\n", arg);
1104 return -ENOMEM;
1105 }
1106 t = strchr(parg->comm, ':');
1107 if (t) {
1108 arg[t - parg->comm] = '\0';
1109 t++;
1110 }
1111 parg->type = find_fetch_type(t);
1112 if (!parg->type) {
1113 pr_info("Unsupported type: %s\n", t);
1114 return -EINVAL;
1115 }
1116 parg->offset = tp->size;
1117 tp->size += parg->type->size;
1118 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
1119 if (ret >= 0 && t != NULL)
1120 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
1121 if (ret >= 0) {
1122 parg->fetch_size.fn = get_fetch_size_function(parg->type,
1123 parg->fetch.fn);
1124 parg->fetch_size.data = parg->fetch.data;
1125 }
1126 return ret;
1127}
1128
1129/* Return 1 if name is reserved or already used by another argument */
1130static int conflict_field_name(const char *name,
1131 struct probe_arg *args, int narg)
1132{
1133 int i;
1134 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
1135 if (strcmp(reserved_field_names[i], name) == 0)
1136 return 1;
1137 for (i = 0; i < narg; i++)
1138 if (strcmp(args[i].name, name) == 0)
1139 return 1;
1140 return 0;
1141}
1142
1143static int create_trace_probe(int argc, char **argv) 356static int create_trace_probe(int argc, char **argv)
1144{ 357{
1145 /* 358 /*
@@ -1162,7 +375,7 @@ static int create_trace_probe(int argc, char **argv)
1162 */ 375 */
1163 struct trace_probe *tp; 376 struct trace_probe *tp;
1164 int i, ret = 0; 377 int i, ret = 0;
1165 int is_return = 0, is_delete = 0; 378 bool is_return = false, is_delete = false;
1166 char *symbol = NULL, *event = NULL, *group = NULL; 379 char *symbol = NULL, *event = NULL, *group = NULL;
1167 char *arg; 380 char *arg;
1168 unsigned long offset = 0; 381 unsigned long offset = 0;
@@ -1171,11 +384,11 @@ static int create_trace_probe(int argc, char **argv)
1171 384
1172 /* argc must be >= 1 */ 385 /* argc must be >= 1 */
1173 if (argv[0][0] == 'p') 386 if (argv[0][0] == 'p')
1174 is_return = 0; 387 is_return = false;
1175 else if (argv[0][0] == 'r') 388 else if (argv[0][0] == 'r')
1176 is_return = 1; 389 is_return = true;
1177 else if (argv[0][0] == '-') 390 else if (argv[0][0] == '-')
1178 is_delete = 1; 391 is_delete = true;
1179 else { 392 else {
1180 pr_info("Probe definition must be started with 'p', 'r' or" 393 pr_info("Probe definition must be started with 'p', 'r' or"
1181 " '-'.\n"); 394 " '-'.\n");
@@ -1240,7 +453,7 @@ static int create_trace_probe(int argc, char **argv)
1240 /* a symbol specified */ 453 /* a symbol specified */
1241 symbol = argv[1]; 454 symbol = argv[1];
1242 /* TODO: support .init module functions */ 455 /* TODO: support .init module functions */
1243 ret = split_symbol_offset(symbol, &offset); 456 ret = traceprobe_split_symbol_offset(symbol, &offset);
1244 if (ret) { 457 if (ret) {
1245 pr_info("Failed to parse symbol.\n"); 458 pr_info("Failed to parse symbol.\n");
1246 return ret; 459 return ret;
@@ -1302,7 +515,8 @@ static int create_trace_probe(int argc, char **argv)
1302 goto error; 515 goto error;
1303 } 516 }
1304 517
1305 if (conflict_field_name(tp->args[i].name, tp->args, i)) { 518 if (traceprobe_conflict_field_name(tp->args[i].name,
519 tp->args, i)) {
1306 pr_info("Argument[%d] name '%s' conflicts with " 520 pr_info("Argument[%d] name '%s' conflicts with "
1307 "another field.\n", i, argv[i]); 521 "another field.\n", i, argv[i]);
1308 ret = -EINVAL; 522 ret = -EINVAL;
@@ -1310,7 +524,8 @@ static int create_trace_probe(int argc, char **argv)
1310 } 524 }
1311 525
1312 /* Parse fetch argument */ 526 /* Parse fetch argument */
1313 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); 527 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
528 is_return, true);
1314 if (ret) { 529 if (ret) {
1315 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 530 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
1316 goto error; 531 goto error;
@@ -1412,70 +627,11 @@ static int probes_open(struct inode *inode, struct file *file)
1412 return seq_open(file, &probes_seq_op); 627 return seq_open(file, &probes_seq_op);
1413} 628}
1414 629
1415static int command_trace_probe(const char *buf)
1416{
1417 char **argv;
1418 int argc = 0, ret = 0;
1419
1420 argv = argv_split(GFP_KERNEL, buf, &argc);
1421 if (!argv)
1422 return -ENOMEM;
1423
1424 if (argc)
1425 ret = create_trace_probe(argc, argv);
1426
1427 argv_free(argv);
1428 return ret;
1429}
1430
1431#define WRITE_BUFSIZE 4096
1432
1433static ssize_t probes_write(struct file *file, const char __user *buffer, 630static ssize_t probes_write(struct file *file, const char __user *buffer,
1434 size_t count, loff_t *ppos) 631 size_t count, loff_t *ppos)
1435{ 632{
1436 char *kbuf, *tmp; 633 return traceprobe_probes_write(file, buffer, count, ppos,
1437 int ret; 634 create_trace_probe);
1438 size_t done;
1439 size_t size;
1440
1441 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
1442 if (!kbuf)
1443 return -ENOMEM;
1444
1445 ret = done = 0;
1446 while (done < count) {
1447 size = count - done;
1448 if (size >= WRITE_BUFSIZE)
1449 size = WRITE_BUFSIZE - 1;
1450 if (copy_from_user(kbuf, buffer + done, size)) {
1451 ret = -EFAULT;
1452 goto out;
1453 }
1454 kbuf[size] = '\0';
1455 tmp = strchr(kbuf, '\n');
1456 if (tmp) {
1457 *tmp = '\0';
1458 size = tmp - kbuf + 1;
1459 } else if (done + size < count) {
1460 pr_warning("Line length is too long: "
1461 "Should be less than %d.", WRITE_BUFSIZE);
1462 ret = -EINVAL;
1463 goto out;
1464 }
1465 done += size;
1466 /* Remove comments */
1467 tmp = strchr(kbuf, '#');
1468 if (tmp)
1469 *tmp = '\0';
1470
1471 ret = command_trace_probe(kbuf);
1472 if (ret)
1473 goto out;
1474 }
1475 ret = done;
1476out:
1477 kfree(kbuf);
1478 return ret;
1479} 635}
1480 636
1481static const struct file_operations kprobe_events_ops = { 637static const struct file_operations kprobe_events_ops = {
@@ -1711,16 +867,6 @@ partial:
1711 return TRACE_TYPE_PARTIAL_LINE; 867 return TRACE_TYPE_PARTIAL_LINE;
1712} 868}
1713 869
1714#undef DEFINE_FIELD
1715#define DEFINE_FIELD(type, item, name, is_signed) \
1716 do { \
1717 ret = trace_define_field(event_call, #type, name, \
1718 offsetof(typeof(field), item), \
1719 sizeof(field.item), is_signed, \
1720 FILTER_OTHER); \
1721 if (ret) \
1722 return ret; \
1723 } while (0)
1724 870
1725static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 871static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1726{ 872{
@@ -2051,8 +1197,9 @@ static __init int kprobe_trace_self_tests_init(void)
2051 1197
2052 pr_info("Testing kprobe tracing: "); 1198 pr_info("Testing kprobe tracing: ");
2053 1199
2054 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1200 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
2055 "$stack $stack0 +0($stack)"); 1201 "$stack $stack0 +0($stack)",
1202 create_trace_probe);
2056 if (WARN_ON_ONCE(ret)) { 1203 if (WARN_ON_ONCE(ret)) {
2057 pr_warning("error on probing function entry.\n"); 1204 pr_warning("error on probing function entry.\n");
2058 warn++; 1205 warn++;
@@ -2066,8 +1213,8 @@ static __init int kprobe_trace_self_tests_init(void)
2066 enable_trace_probe(tp, TP_FLAG_TRACE); 1213 enable_trace_probe(tp, TP_FLAG_TRACE);
2067 } 1214 }
2068 1215
2069 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1216 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
2070 "$retval"); 1217 "$retval", create_trace_probe);
2071 if (WARN_ON_ONCE(ret)) { 1218 if (WARN_ON_ONCE(ret)) {
2072 pr_warning("error on probing function return.\n"); 1219 pr_warning("error on probing function return.\n");
2073 warn++; 1220 warn++;
@@ -2101,13 +1248,13 @@ static __init int kprobe_trace_self_tests_init(void)
2101 } else 1248 } else
2102 disable_trace_probe(tp, TP_FLAG_TRACE); 1249 disable_trace_probe(tp, TP_FLAG_TRACE);
2103 1250
2104 ret = command_trace_probe("-:testprobe"); 1251 ret = traceprobe_command("-:testprobe", create_trace_probe);
2105 if (WARN_ON_ONCE(ret)) { 1252 if (WARN_ON_ONCE(ret)) {
2106 pr_warning("error on deleting a probe.\n"); 1253 pr_warning("error on deleting a probe.\n");
2107 warn++; 1254 warn++;
2108 } 1255 }
2109 1256
2110 ret = command_trace_probe("-:testprobe2"); 1257 ret = traceprobe_command("-:testprobe2", create_trace_probe);
2111 if (WARN_ON_ONCE(ret)) { 1258 if (WARN_ON_ONCE(ret)) {
2112 pr_warning("error on deleting a probe.\n"); 1259 pr_warning("error on deleting a probe.\n");
2113 warn++; 1260 warn++;