aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig15
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/blktrace.c5
-rw-r--r--kernel/trace/ftrace.c141
-rw-r--r--kernel/trace/ring_buffer.c45
-rw-r--r--kernel/trace/ring_buffer_benchmark.c1
-rw-r--r--kernel/trace/trace.c204
-rw-r--r--kernel/trace/trace.h11
-rw-r--r--kernel/trace/trace_branch.c19
-rw-r--r--kernel/trace/trace_clock.c1
-rw-r--r--kernel/trace/trace_event_profile.c52
-rw-r--r--kernel/trace/trace_events.c81
-rw-r--r--kernel/trace/trace_events_filter.c33
-rw-r--r--kernel/trace/trace_export.c87
-rw-r--r--kernel/trace/trace_functions_graph.c107
-rw-r--r--kernel/trace/trace_kprobe.c306
-rw-r--r--kernel/trace/trace_stack.c24
-rw-r--r--kernel/trace/trace_syscalls.c189
18 files changed, 701 insertions, 624 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a2f289..13e13d428cd3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
@@ -330,15 +328,6 @@ config BRANCH_TRACER
330 328
331 Say N if unsure. 329 Say N if unsure.
332 330
333config POWER_TRACER
334 bool "Trace power consumption behavior"
335 depends on X86
336 select GENERIC_TRACER
337 help
338 This tracer helps developers to analyze and optimize the kernel's
339 power management decisions, specifically the C-state and P-state
340 behavior.
341
342config KSYM_TRACER 331config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations" 332 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT 333 depends on HAVE_HW_BREAKPOINT
@@ -451,7 +440,7 @@ config BLK_DEV_IO_TRACE
451 440
452config KPROBE_EVENT 441config KPROBE_EVENT
453 depends on KPROBES 442 depends on KPROBES
454 depends on X86 443 depends on HAVE_REGS_AND_STACK_ACCESS_API
455 bool "Enable kprobes-based dynamic events" 444 bool "Enable kprobes-based dynamic events"
456 select TRACING 445 select TRACING
457 default y 446 default y
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..d00c6fe23f54 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@ endif
51obj-$(CONFIG_EVENT_TRACING) += trace_events.o 51obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
56endif
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o 59obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d9d6206e0b14..07f945a99430 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -540,9 +540,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
540 if (ret) 540 if (ret)
541 return ret; 541 return ret;
542 542
543 if (copy_to_user(arg, &buts, sizeof(buts))) 543 if (copy_to_user(arg, &buts, sizeof(buts))) {
544 blk_trace_remove(q);
544 return -EFAULT; 545 return -EFAULT;
545 546 }
546 return 0; 547 return 0;
547} 548}
548EXPORT_SYMBOL_GPL(blk_trace_setup); 549EXPORT_SYMBOL_GPL(blk_trace_setup);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7968762c8167..d9062f5cc0c0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,12 +22,12 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
28#include <linux/ctype.h> 27#include <linux/ctype.h>
29#include <linux/list.h> 28#include <linux/list.h>
30#include <linux/hash.h> 29#include <linux/hash.h>
30#include <linux/rcupdate.h>
31 31
32#include <trace/events/sched.h> 32#include <trace/events/sched.h>
33 33
@@ -85,22 +85,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
87 87
88#ifdef CONFIG_FUNCTION_GRAPH_TRACER 88/*
89static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); 89 * Traverse the ftrace_list, invoking all entries. The reason that we
90#endif 90 * can use rcu_dereference_raw() is that elements removed from this list
91 91 * are simply leaked, so there is no need to interact with a grace-period
92 * mechanism. The rcu_dereference_raw() calls are needed to handle
93 * concurrent insertions into the ftrace_list.
94 *
95 * Silly Alpha and silly pointer-speculation compiler optimizations!
96 */
92static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 97static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
93{ 98{
94 struct ftrace_ops *op = ftrace_list; 99 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
95
96 /* in case someone actually ports this to alpha! */
97 read_barrier_depends();
98 100
99 while (op != &ftrace_list_end) { 101 while (op != &ftrace_list_end) {
100 /* silly alpha */
101 read_barrier_depends();
102 op->func(ip, parent_ip); 102 op->func(ip, parent_ip);
103 op = op->next; 103 op = rcu_dereference_raw(op->next); /*see above*/
104 }; 104 };
105} 105}
106 106
@@ -155,8 +155,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 * the ops->next pointer is valid before another CPU sees 155 * the ops->next pointer is valid before another CPU sees
156 * the ops pointer included into the ftrace_list. 156 * the ops pointer included into the ftrace_list.
157 */ 157 */
158 smp_wmb(); 158 rcu_assign_pointer(ftrace_list, ops);
159 ftrace_list = ops;
160 159
161 if (ftrace_enabled) { 160 if (ftrace_enabled) {
162 ftrace_func_t func; 161 ftrace_func_t func;
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
898 } \ 897 } \
899 } 898 }
900 899
901#ifdef CONFIG_KPROBES
902
903static int frozen_record_count;
904
905static inline void freeze_record(struct dyn_ftrace *rec)
906{
907 if (!(rec->flags & FTRACE_FL_FROZEN)) {
908 rec->flags |= FTRACE_FL_FROZEN;
909 frozen_record_count++;
910 }
911}
912
913static inline void unfreeze_record(struct dyn_ftrace *rec)
914{
915 if (rec->flags & FTRACE_FL_FROZEN) {
916 rec->flags &= ~FTRACE_FL_FROZEN;
917 frozen_record_count--;
918 }
919}
920
921static inline int record_frozen(struct dyn_ftrace *rec)
922{
923 return rec->flags & FTRACE_FL_FROZEN;
924}
925#else
926# define freeze_record(rec) ({ 0; })
927# define unfreeze_record(rec) ({ 0; })
928# define record_frozen(rec) ({ 0; })
929#endif /* CONFIG_KPROBES */
930
931static void ftrace_free_rec(struct dyn_ftrace *rec) 900static void ftrace_free_rec(struct dyn_ftrace *rec)
932{ 901{
933 rec->freelist = ftrace_free_records; 902 rec->freelist = ftrace_free_records;
@@ -1025,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip)
1025} 994}
1026 995
1027 996
997/* Return 1 if the address range is reserved for ftrace */
998int ftrace_text_reserved(void *start, void *end)
999{
1000 struct dyn_ftrace *rec;
1001 struct ftrace_page *pg;
1002
1003 do_for_each_ftrace_rec(pg, rec) {
1004 if (rec->ip <= (unsigned long)end &&
1005 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1006 return 1;
1007 } while_for_each_ftrace_rec();
1008 return 0;
1009}
1010
1011
1028static int 1012static int
1029__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1013__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1030{ 1014{
@@ -1076,14 +1060,6 @@ static void ftrace_replace_code(int enable)
1076 !(rec->flags & FTRACE_FL_CONVERTED)) 1060 !(rec->flags & FTRACE_FL_CONVERTED))
1077 continue; 1061 continue;
1078 1062
1079 /* ignore updates to this record's mcount site */
1080 if (get_kprobe((void *)rec->ip)) {
1081 freeze_record(rec);
1082 continue;
1083 } else {
1084 unfreeze_record(rec);
1085 }
1086
1087 failed = __ftrace_replace_code(rec, enable); 1063 failed = __ftrace_replace_code(rec, enable);
1088 if (failed) { 1064 if (failed) {
1089 rec->flags |= FTRACE_FL_FAILED; 1065 rec->flags |= FTRACE_FL_FAILED;
@@ -1690,7 +1666,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1690static int ftrace_match(char *str, char *regex, int len, int type) 1666static int ftrace_match(char *str, char *regex, int len, int type)
1691{ 1667{
1692 int matched = 0; 1668 int matched = 0;
1693 char *ptr; 1669 int slen;
1694 1670
1695 switch (type) { 1671 switch (type) {
1696 case MATCH_FULL: 1672 case MATCH_FULL:
@@ -1706,8 +1682,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
1706 matched = 1; 1682 matched = 1;
1707 break; 1683 break;
1708 case MATCH_END_ONLY: 1684 case MATCH_END_ONLY:
1709 ptr = strstr(str, regex); 1685 slen = strlen(str);
1710 if (ptr && (ptr[len] == 0)) 1686 if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
1711 matched = 1; 1687 matched = 1;
1712 break; 1688 break;
1713 } 1689 }
@@ -2300,6 +2276,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
2300 2276
2301#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2277#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2302static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; 2278static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2279static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2280
2303static int __init set_graph_function(char *str) 2281static int __init set_graph_function(char *str)
2304{ 2282{
2305 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); 2283 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -2426,6 +2404,7 @@ static const struct file_operations ftrace_notrace_fops = {
2426static DEFINE_MUTEX(graph_lock); 2404static DEFINE_MUTEX(graph_lock);
2427 2405
2428int ftrace_graph_count; 2406int ftrace_graph_count;
2407int ftrace_graph_filter_enabled;
2429unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2408unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2430 2409
2431static void * 2410static void *
@@ -2448,7 +2427,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
2448 mutex_lock(&graph_lock); 2427 mutex_lock(&graph_lock);
2449 2428
2450 /* Nothing, tell g_show to print all functions are enabled */ 2429 /* Nothing, tell g_show to print all functions are enabled */
2451 if (!ftrace_graph_count && !*pos) 2430 if (!ftrace_graph_filter_enabled && !*pos)
2452 return (void *)1; 2431 return (void *)1;
2453 2432
2454 return __g_next(m, pos); 2433 return __g_next(m, pos);
@@ -2494,6 +2473,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2494 mutex_lock(&graph_lock); 2473 mutex_lock(&graph_lock);
2495 if ((file->f_mode & FMODE_WRITE) && 2474 if ((file->f_mode & FMODE_WRITE) &&
2496 (file->f_flags & O_TRUNC)) { 2475 (file->f_flags & O_TRUNC)) {
2476 ftrace_graph_filter_enabled = 0;
2497 ftrace_graph_count = 0; 2477 ftrace_graph_count = 0;
2498 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2478 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2499 } 2479 }
@@ -2519,7 +2499,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2519 struct dyn_ftrace *rec; 2499 struct dyn_ftrace *rec;
2520 struct ftrace_page *pg; 2500 struct ftrace_page *pg;
2521 int search_len; 2501 int search_len;
2522 int found = 0; 2502 int fail = 1;
2523 int type, not; 2503 int type, not;
2524 char *search; 2504 char *search;
2525 bool exists; 2505 bool exists;
@@ -2530,37 +2510,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2530 2510
2531 /* decode regex */ 2511 /* decode regex */
2532 type = filter_parse_regex(buffer, strlen(buffer), &search, &not); 2512 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2533 if (not) 2513 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2534 return -EINVAL; 2514 return -EBUSY;
2535 2515
2536 search_len = strlen(search); 2516 search_len = strlen(search);
2537 2517
2538 mutex_lock(&ftrace_lock); 2518 mutex_lock(&ftrace_lock);
2539 do_for_each_ftrace_rec(pg, rec) { 2519 do_for_each_ftrace_rec(pg, rec) {
2540 2520
2541 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2542 break;
2543
2544 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2521 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2545 continue; 2522 continue;
2546 2523
2547 if (ftrace_match_record(rec, search, search_len, type)) { 2524 if (ftrace_match_record(rec, search, search_len, type)) {
2548 /* ensure it is not already in the array */ 2525 /* if it is in the array */
2549 exists = false; 2526 exists = false;
2550 for (i = 0; i < *idx; i++) 2527 for (i = 0; i < *idx; i++) {
2551 if (array[i] == rec->ip) { 2528 if (array[i] == rec->ip) {
2552 exists = true; 2529 exists = true;
2553 break; 2530 break;
2554 } 2531 }
2555 if (!exists) 2532 }
2556 array[(*idx)++] = rec->ip; 2533
2557 found = 1; 2534 if (!not) {
2535 fail = 0;
2536 if (!exists) {
2537 array[(*idx)++] = rec->ip;
2538 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2539 goto out;
2540 }
2541 } else {
2542 if (exists) {
2543 array[i] = array[--(*idx)];
2544 array[*idx] = 0;
2545 fail = 0;
2546 }
2547 }
2558 } 2548 }
2559 } while_for_each_ftrace_rec(); 2549 } while_for_each_ftrace_rec();
2560 2550out:
2561 mutex_unlock(&ftrace_lock); 2551 mutex_unlock(&ftrace_lock);
2562 2552
2563 return found ? 0 : -EINVAL; 2553 if (fail)
2554 return -EINVAL;
2555
2556 ftrace_graph_filter_enabled = 1;
2557 return 0;
2564} 2558}
2565 2559
2566static ssize_t 2560static ssize_t
@@ -2570,16 +2564,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2570 struct trace_parser parser; 2564 struct trace_parser parser;
2571 ssize_t read, ret; 2565 ssize_t read, ret;
2572 2566
2573 if (!cnt || cnt < 0) 2567 if (!cnt)
2574 return 0; 2568 return 0;
2575 2569
2576 mutex_lock(&graph_lock); 2570 mutex_lock(&graph_lock);
2577 2571
2578 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2579 ret = -EBUSY;
2580 goto out_unlock;
2581 }
2582
2583 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2572 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2584 ret = -ENOMEM; 2573 ret = -ENOMEM;
2585 goto out_unlock; 2574 goto out_unlock;
@@ -3364,6 +3353,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3364{ 3353{
3365 /* Make sure we do not use the parent ret_stack */ 3354 /* Make sure we do not use the parent ret_stack */
3366 t->ret_stack = NULL; 3355 t->ret_stack = NULL;
3356 t->curr_ret_stack = -1;
3367 3357
3368 if (ftrace_graph_active) { 3358 if (ftrace_graph_active) {
3369 struct ftrace_ret_stack *ret_stack; 3359 struct ftrace_ret_stack *ret_stack;
@@ -3373,7 +3363,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3373 GFP_KERNEL); 3363 GFP_KERNEL);
3374 if (!ret_stack) 3364 if (!ret_stack)
3375 return; 3365 return;
3376 t->curr_ret_stack = -1;
3377 atomic_set(&t->tracing_graph_pause, 0); 3366 atomic_set(&t->tracing_graph_pause, 0);
3378 atomic_set(&t->trace_overrun, 0); 3367 atomic_set(&t->trace_overrun, 0);
3379 t->ftrace_timestamp = 0; 3368 t->ftrace_timestamp = 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2326b04c95c4..05a9f83b8819 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -20,6 +20,7 @@
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22 22
23#include <asm/local.h>
23#include "trace.h" 24#include "trace.h"
24 25
25/* 26/*
@@ -464,6 +465,8 @@ struct ring_buffer_iter {
464 struct ring_buffer_per_cpu *cpu_buffer; 465 struct ring_buffer_per_cpu *cpu_buffer;
465 unsigned long head; 466 unsigned long head;
466 struct buffer_page *head_page; 467 struct buffer_page *head_page;
468 struct buffer_page *cache_reader_page;
469 unsigned long cache_read;
467 u64 read_stamp; 470 u64 read_stamp;
468}; 471};
469 472
@@ -2230,12 +2233,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2230 if (ring_buffer_flags != RB_BUFFERS_ON) 2233 if (ring_buffer_flags != RB_BUFFERS_ON)
2231 return NULL; 2234 return NULL;
2232 2235
2233 if (atomic_read(&buffer->record_disabled))
2234 return NULL;
2235
2236 /* If we are tracing schedule, we don't want to recurse */ 2236 /* If we are tracing schedule, we don't want to recurse */
2237 resched = ftrace_preempt_disable(); 2237 resched = ftrace_preempt_disable();
2238 2238
2239 if (atomic_read(&buffer->record_disabled))
2240 goto out_nocheck;
2241
2239 if (trace_recursive_lock()) 2242 if (trace_recursive_lock())
2240 goto out_nocheck; 2243 goto out_nocheck;
2241 2244
@@ -2467,11 +2470,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2467 if (ring_buffer_flags != RB_BUFFERS_ON) 2470 if (ring_buffer_flags != RB_BUFFERS_ON)
2468 return -EBUSY; 2471 return -EBUSY;
2469 2472
2470 if (atomic_read(&buffer->record_disabled))
2471 return -EBUSY;
2472
2473 resched = ftrace_preempt_disable(); 2473 resched = ftrace_preempt_disable();
2474 2474
2475 if (atomic_read(&buffer->record_disabled))
2476 goto out;
2477
2475 cpu = raw_smp_processor_id(); 2478 cpu = raw_smp_processor_id();
2476 2479
2477 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2480 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2539,7 +2542,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2539 * @buffer: The ring buffer to enable writes 2542 * @buffer: The ring buffer to enable writes
2540 * 2543 *
2541 * Note, multiple disables will need the same number of enables 2544 * Note, multiple disables will need the same number of enables
2542 * to truely enable the writing (much like preempt_disable). 2545 * to truly enable the writing (much like preempt_disable).
2543 */ 2546 */
2544void ring_buffer_record_enable(struct ring_buffer *buffer) 2547void ring_buffer_record_enable(struct ring_buffer *buffer)
2545{ 2548{
@@ -2575,7 +2578,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2575 * @cpu: The CPU to enable. 2578 * @cpu: The CPU to enable.
2576 * 2579 *
2577 * Note, multiple disables will need the same number of enables 2580 * Note, multiple disables will need the same number of enables
2578 * to truely enable the writing (much like preempt_disable). 2581 * to truly enable the writing (much like preempt_disable).
2579 */ 2582 */
2580void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2583void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2581{ 2584{
@@ -2716,6 +2719,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2716 iter->read_stamp = cpu_buffer->read_stamp; 2719 iter->read_stamp = cpu_buffer->read_stamp;
2717 else 2720 else
2718 iter->read_stamp = iter->head_page->page->time_stamp; 2721 iter->read_stamp = iter->head_page->page->time_stamp;
2722 iter->cache_reader_page = cpu_buffer->reader_page;
2723 iter->cache_read = cpu_buffer->read;
2719} 2724}
2720 2725
2721/** 2726/**
@@ -2869,7 +2874,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2869 * Splice the empty reader page into the list around the head. 2874 * Splice the empty reader page into the list around the head.
2870 */ 2875 */
2871 reader = rb_set_head_page(cpu_buffer); 2876 reader = rb_set_head_page(cpu_buffer);
2872 cpu_buffer->reader_page->list.next = reader->list.next; 2877 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
2873 cpu_buffer->reader_page->list.prev = reader->list.prev; 2878 cpu_buffer->reader_page->list.prev = reader->list.prev;
2874 2879
2875 /* 2880 /*
@@ -2906,7 +2911,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2906 * 2911 *
2907 * Now make the new head point back to the reader page. 2912 * Now make the new head point back to the reader page.
2908 */ 2913 */
2909 reader->list.next->prev = &cpu_buffer->reader_page->list; 2914 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
2910 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2915 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2911 2916
2912 /* Finally update the reader page to the new head */ 2917 /* Finally update the reader page to the new head */
@@ -3060,13 +3065,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3060 struct ring_buffer_event *event; 3065 struct ring_buffer_event *event;
3061 int nr_loops = 0; 3066 int nr_loops = 0;
3062 3067
3063 if (ring_buffer_iter_empty(iter))
3064 return NULL;
3065
3066 cpu_buffer = iter->cpu_buffer; 3068 cpu_buffer = iter->cpu_buffer;
3067 buffer = cpu_buffer->buffer; 3069 buffer = cpu_buffer->buffer;
3068 3070
3071 /*
3072 * Check if someone performed a consuming read to
3073 * the buffer. A consuming read invalidates the iterator
3074 * and we need to reset the iterator in this case.
3075 */
3076 if (unlikely(iter->cache_read != cpu_buffer->read ||
3077 iter->cache_reader_page != cpu_buffer->reader_page))
3078 rb_iter_reset(iter);
3079
3069 again: 3080 again:
3081 if (ring_buffer_iter_empty(iter))
3082 return NULL;
3083
3070 /* 3084 /*
3071 * We repeat when a timestamp is encountered. 3085 * We repeat when a timestamp is encountered.
3072 * We can get multiple timestamps by nested interrupts or also 3086 * We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3095,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3081 if (rb_per_cpu_empty(cpu_buffer)) 3095 if (rb_per_cpu_empty(cpu_buffer))
3082 return NULL; 3096 return NULL;
3083 3097
3098 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3099 rb_inc_iter(iter);
3100 goto again;
3101 }
3102
3084 event = rb_iter_head_event(iter); 3103 event = rb_iter_head_event(iter);
3085 3104
3086 switch (event->type_len) { 3105 switch (event->type_len) {
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index b2477caf09c2..df74c7982255 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <asm/local.h>
11 12
12struct rb_page { 13struct rb_page {
13 u64 ts; 14 u64 ts;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f2cb9e..3ec2ee6f6560 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,6 +32,7 @@
32#include <linux/splice.h> 32#include <linux/splice.h>
33#include <linux/kdebug.h> 33#include <linux/kdebug.h>
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/rwsem.h>
35#include <linux/ctype.h> 36#include <linux/ctype.h>
36#include <linux/init.h> 37#include <linux/init.h>
37#include <linux/poll.h> 38#include <linux/poll.h>
@@ -91,20 +92,17 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
91static inline void ftrace_disable_cpu(void) 92static inline void ftrace_disable_cpu(void)
92{ 93{
93 preempt_disable(); 94 preempt_disable();
94 __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); 95 __this_cpu_inc(ftrace_cpu_disabled);
95} 96}
96 97
97static inline void ftrace_enable_cpu(void) 98static inline void ftrace_enable_cpu(void)
98{ 99{
99 __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); 100 __this_cpu_dec(ftrace_cpu_disabled);
100 preempt_enable(); 101 preempt_enable();
101} 102}
102 103
103static cpumask_var_t __read_mostly tracing_buffer_mask; 104static cpumask_var_t __read_mostly tracing_buffer_mask;
104 105
105/* Define which cpu buffers are currently read in trace_pipe */
106static cpumask_var_t tracing_reader_cpumask;
107
108#define for_each_tracing_cpu(cpu) \ 106#define for_each_tracing_cpu(cpu) \
109 for_each_cpu(cpu, tracing_buffer_mask) 107 for_each_cpu(cpu, tracing_buffer_mask)
110 108
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly;
243 241
244/* 242/*
245 * trace_types_lock is used to protect the trace_types list. 243 * trace_types_lock is used to protect the trace_types list.
246 * This lock is also used to keep user access serialized.
247 * Accesses from userspace will grab this lock while userspace
248 * activities happen inside the kernel.
249 */ 244 */
250static DEFINE_MUTEX(trace_types_lock); 245static DEFINE_MUTEX(trace_types_lock);
251 246
247/*
248 * serialize the access of the ring buffer
249 *
250 * ring buffer serializes readers, but it is low level protection.
251 * The validity of the events (which returns by ring_buffer_peek() ..etc)
252 * are not protected by ring buffer.
253 *
254 * The content of events may become garbage if we allow other process consumes
255 * these events concurrently:
256 * A) the page of the consumed events may become a normal page
257 * (not reader page) in ring buffer, and this page will be rewrited
258 * by events producer.
259 * B) The page of the consumed events may become a page for splice_read,
260 * and this page will be returned to system.
261 *
262 * These primitives allow multi process access to different cpu ring buffer
263 * concurrently.
264 *
265 * These primitives don't distinguish read-only and read-consume access.
266 * Multi read-only access are also serialized.
267 */
268
269#ifdef CONFIG_SMP
270static DECLARE_RWSEM(all_cpu_access_lock);
271static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
272
273static inline void trace_access_lock(int cpu)
274{
275 if (cpu == TRACE_PIPE_ALL_CPU) {
276 /* gain it for accessing the whole ring buffer. */
277 down_write(&all_cpu_access_lock);
278 } else {
279 /* gain it for accessing a cpu ring buffer. */
280
281 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
282 down_read(&all_cpu_access_lock);
283
284 /* Secondly block other access to this @cpu ring buffer. */
285 mutex_lock(&per_cpu(cpu_access_lock, cpu));
286 }
287}
288
289static inline void trace_access_unlock(int cpu)
290{
291 if (cpu == TRACE_PIPE_ALL_CPU) {
292 up_write(&all_cpu_access_lock);
293 } else {
294 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
295 up_read(&all_cpu_access_lock);
296 }
297}
298
299static inline void trace_access_lock_init(void)
300{
301 int cpu;
302
303 for_each_possible_cpu(cpu)
304 mutex_init(&per_cpu(cpu_access_lock, cpu));
305}
306
307#else
308
309static DEFINE_MUTEX(access_lock);
310
311static inline void trace_access_lock(int cpu)
312{
313 (void)cpu;
314 mutex_lock(&access_lock);
315}
316
317static inline void trace_access_unlock(int cpu)
318{
319 (void)cpu;
320 mutex_unlock(&access_lock);
321}
322
323static inline void trace_access_lock_init(void)
324{
325}
326
327#endif
328
252/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 329/* trace_wait is a waitqueue for tasks blocked on trace_poll */
253static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 330static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
254 331
@@ -297,6 +374,21 @@ static int __init set_buf_size(char *str)
297} 374}
298__setup("trace_buf_size=", set_buf_size); 375__setup("trace_buf_size=", set_buf_size);
299 376
377static int __init set_tracing_thresh(char *str)
378{
379 unsigned long threshhold;
380 int ret;
381
382 if (!str)
383 return 0;
384 ret = strict_strtoul(str, 0, &threshhold);
385 if (ret < 0)
386 return 0;
387 tracing_thresh = threshhold * 1000;
388 return 1;
389}
390__setup("tracing_thresh=", set_tracing_thresh);
391
300unsigned long nsecs_to_usecs(unsigned long nsecs) 392unsigned long nsecs_to_usecs(unsigned long nsecs)
301{ 393{
302 return nsecs / 1000; 394 return nsecs / 1000;
@@ -502,9 +594,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
502static arch_spinlock_t ftrace_max_lock = 594static arch_spinlock_t ftrace_max_lock =
503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 595 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
504 596
597unsigned long __read_mostly tracing_thresh;
598
505#ifdef CONFIG_TRACER_MAX_TRACE 599#ifdef CONFIG_TRACER_MAX_TRACE
506unsigned long __read_mostly tracing_max_latency; 600unsigned long __read_mostly tracing_max_latency;
507unsigned long __read_mostly tracing_thresh;
508 601
509/* 602/*
510 * Copy the new maximum trace into the separate maximum-trace 603 * Copy the new maximum trace into the separate maximum-trace
@@ -515,7 +608,7 @@ static void
515__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 608__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
516{ 609{
517 struct trace_array_cpu *data = tr->data[cpu]; 610 struct trace_array_cpu *data = tr->data[cpu];
518 struct trace_array_cpu *max_data = tr->data[cpu]; 611 struct trace_array_cpu *max_data;
519 612
520 max_tr.cpu = cpu; 613 max_tr.cpu = cpu;
521 max_tr.time_start = data->preempt_timestamp; 614 max_tr.time_start = data->preempt_timestamp;
@@ -525,7 +618,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
525 max_data->critical_start = data->critical_start; 618 max_data->critical_start = data->critical_start;
526 max_data->critical_end = data->critical_end; 619 max_data->critical_end = data->critical_end;
527 620
528 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 621 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
529 max_data->pid = tsk->pid; 622 max_data->pid = tsk->pid;
530 max_data->uid = task_uid(tsk); 623 max_data->uid = task_uid(tsk);
531 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 624 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -747,10 +840,10 @@ out:
747 mutex_unlock(&trace_types_lock); 840 mutex_unlock(&trace_types_lock);
748} 841}
749 842
750static void __tracing_reset(struct trace_array *tr, int cpu) 843static void __tracing_reset(struct ring_buffer *buffer, int cpu)
751{ 844{
752 ftrace_disable_cpu(); 845 ftrace_disable_cpu();
753 ring_buffer_reset_cpu(tr->buffer, cpu); 846 ring_buffer_reset_cpu(buffer, cpu);
754 ftrace_enable_cpu(); 847 ftrace_enable_cpu();
755} 848}
756 849
@@ -762,7 +855,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
762 855
763 /* Make sure all commits have finished */ 856 /* Make sure all commits have finished */
764 synchronize_sched(); 857 synchronize_sched();
765 __tracing_reset(tr, cpu); 858 __tracing_reset(buffer, cpu);
766 859
767 ring_buffer_record_enable(buffer); 860 ring_buffer_record_enable(buffer);
768} 861}
@@ -780,7 +873,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
780 tr->time_start = ftrace_now(tr->cpu); 873 tr->time_start = ftrace_now(tr->cpu);
781 874
782 for_each_online_cpu(cpu) 875 for_each_online_cpu(cpu)
783 __tracing_reset(tr, cpu); 876 __tracing_reset(buffer, cpu);
784 877
785 ring_buffer_record_enable(buffer); 878 ring_buffer_record_enable(buffer);
786} 879}
@@ -857,6 +950,8 @@ void tracing_start(void)
857 goto out; 950 goto out;
858 } 951 }
859 952
953 /* Prevent the buffers from switching */
954 arch_spin_lock(&ftrace_max_lock);
860 955
861 buffer = global_trace.buffer; 956 buffer = global_trace.buffer;
862 if (buffer) 957 if (buffer)
@@ -866,6 +961,8 @@ void tracing_start(void)
866 if (buffer) 961 if (buffer)
867 ring_buffer_record_enable(buffer); 962 ring_buffer_record_enable(buffer);
868 963
964 arch_spin_unlock(&ftrace_max_lock);
965
869 ftrace_start(); 966 ftrace_start();
870 out: 967 out:
871 spin_unlock_irqrestore(&tracing_start_lock, flags); 968 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -887,6 +984,9 @@ void tracing_stop(void)
887 if (trace_stop_count++) 984 if (trace_stop_count++)
888 goto out; 985 goto out;
889 986
987 /* Prevent the buffers from switching */
988 arch_spin_lock(&ftrace_max_lock);
989
890 buffer = global_trace.buffer; 990 buffer = global_trace.buffer;
891 if (buffer) 991 if (buffer)
892 ring_buffer_record_disable(buffer); 992 ring_buffer_record_disable(buffer);
@@ -895,6 +995,8 @@ void tracing_stop(void)
895 if (buffer) 995 if (buffer)
896 ring_buffer_record_disable(buffer); 996 ring_buffer_record_disable(buffer);
897 997
998 arch_spin_unlock(&ftrace_max_lock);
999
898 out: 1000 out:
899 spin_unlock_irqrestore(&tracing_start_lock, flags); 1001 spin_unlock_irqrestore(&tracing_start_lock, flags);
900} 1002}
@@ -951,6 +1053,11 @@ void trace_find_cmdline(int pid, char comm[])
951 return; 1053 return;
952 } 1054 }
953 1055
1056 if (WARN_ON_ONCE(pid < 0)) {
1057 strcpy(comm, "<XXX>");
1058 return;
1059 }
1060
954 if (pid > PID_MAX_DEFAULT) { 1061 if (pid > PID_MAX_DEFAULT) {
955 strcpy(comm, "<...>"); 1062 strcpy(comm, "<...>");
956 return; 1063 return;
@@ -1084,7 +1191,7 @@ trace_function(struct trace_array *tr,
1084 struct ftrace_entry *entry; 1191 struct ftrace_entry *entry;
1085 1192
1086 /* If we are reading the ring buffer, don't trace */ 1193 /* If we are reading the ring buffer, don't trace */
1087 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 1194 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1088 return; 1195 return;
1089 1196
1090 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1197 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1177,6 +1284,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1177 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1284 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1178 return; 1285 return;
1179 1286
1287 /*
1288 * NMIs can not handle page faults, even with fix ups.
1289 * The save user stack can (and often does) fault.
1290 */
1291 if (unlikely(in_nmi()))
1292 return;
1293
1180 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1294 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1181 sizeof(*entry), flags, pc); 1295 sizeof(*entry), flags, pc);
1182 if (!event) 1296 if (!event)
@@ -1315,8 +1429,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1315 entry->fmt = fmt; 1429 entry->fmt = fmt;
1316 1430
1317 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1431 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1318 if (!filter_check_discard(call, entry, buffer, event)) 1432 if (!filter_check_discard(call, entry, buffer, event)) {
1319 ring_buffer_unlock_commit(buffer, event); 1433 ring_buffer_unlock_commit(buffer, event);
1434 ftrace_trace_stack(buffer, flags, 6, pc);
1435 }
1320 1436
1321out_unlock: 1437out_unlock:
1322 arch_spin_unlock(&trace_buf_lock); 1438 arch_spin_unlock(&trace_buf_lock);
@@ -1389,8 +1505,10 @@ int trace_array_vprintk(struct trace_array *tr,
1389 1505
1390 memcpy(&entry->buf, trace_buf, len); 1506 memcpy(&entry->buf, trace_buf, len);
1391 entry->buf[len] = '\0'; 1507 entry->buf[len] = '\0';
1392 if (!filter_check_discard(call, entry, buffer, event)) 1508 if (!filter_check_discard(call, entry, buffer, event)) {
1393 ring_buffer_unlock_commit(buffer, event); 1509 ring_buffer_unlock_commit(buffer, event);
1510 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1511 }
1394 1512
1395 out_unlock: 1513 out_unlock:
1396 arch_spin_unlock(&trace_buf_lock); 1514 arch_spin_unlock(&trace_buf_lock);
@@ -1580,12 +1698,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1580} 1698}
1581 1699
1582/* 1700/*
1583 * No necessary locking here. The worst thing which can
1584 * happen is loosing events consumed at the same time
1585 * by a trace_pipe reader.
1586 * Other than that, we don't risk to crash the ring buffer
1587 * because it serializes the readers.
1588 *
1589 * The current tracer is copied to avoid a global locking 1701 * The current tracer is copied to avoid a global locking
1590 * all around. 1702 * all around.
1591 */ 1703 */
@@ -1623,6 +1735,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1623 1735
1624 ftrace_enable_cpu(); 1736 ftrace_enable_cpu();
1625 1737
1738 iter->leftover = 0;
1626 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1739 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1627 ; 1740 ;
1628 1741
@@ -1640,12 +1753,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1640 } 1753 }
1641 1754
1642 trace_event_read_lock(); 1755 trace_event_read_lock();
1756 trace_access_lock(cpu_file);
1643 return p; 1757 return p;
1644} 1758}
1645 1759
1646static void s_stop(struct seq_file *m, void *p) 1760static void s_stop(struct seq_file *m, void *p)
1647{ 1761{
1762 struct trace_iterator *iter = m->private;
1763
1648 atomic_dec(&trace_record_cmdline_disabled); 1764 atomic_dec(&trace_record_cmdline_disabled);
1765 trace_access_unlock(iter->cpu_file);
1649 trace_event_read_unlock(); 1766 trace_event_read_unlock();
1650} 1767}
1651 1768
@@ -2836,22 +2953,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2836 2953
2837 mutex_lock(&trace_types_lock); 2954 mutex_lock(&trace_types_lock);
2838 2955
2839 /* We only allow one reader per cpu */
2840 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2841 if (!cpumask_empty(tracing_reader_cpumask)) {
2842 ret = -EBUSY;
2843 goto out;
2844 }
2845 cpumask_setall(tracing_reader_cpumask);
2846 } else {
2847 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2848 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2849 else {
2850 ret = -EBUSY;
2851 goto out;
2852 }
2853 }
2854
2855 /* create a buffer to store the information to pass to userspace */ 2956 /* create a buffer to store the information to pass to userspace */
2856 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2957 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2857 if (!iter) { 2958 if (!iter) {
@@ -2907,12 +3008,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2907 3008
2908 mutex_lock(&trace_types_lock); 3009 mutex_lock(&trace_types_lock);
2909 3010
2910 if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2911 cpumask_clear(tracing_reader_cpumask);
2912 else
2913 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2914
2915
2916 if (iter->trace->pipe_close) 3011 if (iter->trace->pipe_close)
2917 iter->trace->pipe_close(iter); 3012 iter->trace->pipe_close(iter);
2918 3013
@@ -3074,6 +3169,7 @@ waitagain:
3074 iter->pos = -1; 3169 iter->pos = -1;
3075 3170
3076 trace_event_read_lock(); 3171 trace_event_read_lock();
3172 trace_access_lock(iter->cpu_file);
3077 while (find_next_entry_inc(iter) != NULL) { 3173 while (find_next_entry_inc(iter) != NULL) {
3078 enum print_line_t ret; 3174 enum print_line_t ret;
3079 int len = iter->seq.len; 3175 int len = iter->seq.len;
@@ -3090,6 +3186,7 @@ waitagain:
3090 if (iter->seq.len >= cnt) 3186 if (iter->seq.len >= cnt)
3091 break; 3187 break;
3092 } 3188 }
3189 trace_access_unlock(iter->cpu_file);
3093 trace_event_read_unlock(); 3190 trace_event_read_unlock();
3094 3191
3095 /* Now copy what we have to the user */ 3192 /* Now copy what we have to the user */
@@ -3215,6 +3312,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3215 } 3312 }
3216 3313
3217 trace_event_read_lock(); 3314 trace_event_read_lock();
3315 trace_access_lock(iter->cpu_file);
3218 3316
3219 /* Fill as many pages as possible. */ 3317 /* Fill as many pages as possible. */
3220 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3318 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3238,6 +3336,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3238 trace_seq_init(&iter->seq); 3336 trace_seq_init(&iter->seq);
3239 } 3337 }
3240 3338
3339 trace_access_unlock(iter->cpu_file);
3241 trace_event_read_unlock(); 3340 trace_event_read_unlock();
3242 mutex_unlock(&iter->mutex); 3341 mutex_unlock(&iter->mutex);
3243 3342
@@ -3539,10 +3638,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3539 3638
3540 info->read = 0; 3639 info->read = 0;
3541 3640
3641 trace_access_lock(info->cpu);
3542 ret = ring_buffer_read_page(info->tr->buffer, 3642 ret = ring_buffer_read_page(info->tr->buffer,
3543 &info->spare, 3643 &info->spare,
3544 count, 3644 count,
3545 info->cpu, 0); 3645 info->cpu, 0);
3646 trace_access_unlock(info->cpu);
3546 if (ret < 0) 3647 if (ret < 0)
3547 return 0; 3648 return 0;
3548 3649
@@ -3670,6 +3771,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3670 len &= PAGE_MASK; 3771 len &= PAGE_MASK;
3671 } 3772 }
3672 3773
3774 trace_access_lock(info->cpu);
3673 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3775 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3674 3776
3675 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3777 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3717,6 +3819,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3717 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3819 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3718 } 3820 }
3719 3821
3822 trace_access_unlock(info->cpu);
3720 spd.nr_pages = i; 3823 spd.nr_pages = i;
3721 3824
3722 /* did we read anything? */ 3825 /* did we read anything? */
@@ -4153,6 +4256,8 @@ static __init int tracer_init_debugfs(void)
4153 struct dentry *d_tracer; 4256 struct dentry *d_tracer;
4154 int cpu; 4257 int cpu;
4155 4258
4259 trace_access_lock_init();
4260
4156 d_tracer = tracing_init_dentry(); 4261 d_tracer = tracing_init_dentry();
4157 4262
4158 trace_create_file("tracing_enabled", 0644, d_tracer, 4263 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4176,10 +4281,10 @@ static __init int tracer_init_debugfs(void)
4176#ifdef CONFIG_TRACER_MAX_TRACE 4281#ifdef CONFIG_TRACER_MAX_TRACE
4177 trace_create_file("tracing_max_latency", 0644, d_tracer, 4282 trace_create_file("tracing_max_latency", 0644, d_tracer,
4178 &tracing_max_latency, &tracing_max_lat_fops); 4283 &tracing_max_latency, &tracing_max_lat_fops);
4284#endif
4179 4285
4180 trace_create_file("tracing_thresh", 0644, d_tracer, 4286 trace_create_file("tracing_thresh", 0644, d_tracer,
4181 &tracing_thresh, &tracing_max_lat_fops); 4287 &tracing_thresh, &tracing_max_lat_fops);
4182#endif
4183 4288
4184 trace_create_file("README", 0444, d_tracer, 4289 trace_create_file("README", 0444, d_tracer,
4185 NULL, &tracing_readme_fops); 4290 NULL, &tracing_readme_fops);
@@ -4387,9 +4492,6 @@ __init static int tracer_alloc_buffers(void)
4387 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4492 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4388 goto out_free_buffer_mask; 4493 goto out_free_buffer_mask;
4389 4494
4390 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4391 goto out_free_tracing_cpumask;
4392
4393 /* To save memory, keep the ring buffer size to its minimum */ 4495 /* To save memory, keep the ring buffer size to its minimum */
4394 if (ring_buffer_expanded) 4496 if (ring_buffer_expanded)
4395 ring_buf_size = trace_buf_size; 4497 ring_buf_size = trace_buf_size;
@@ -4447,8 +4549,6 @@ __init static int tracer_alloc_buffers(void)
4447 return 0; 4549 return 0;
4448 4550
4449out_free_cpumask: 4551out_free_cpumask:
4450 free_cpumask_var(tracing_reader_cpumask);
4451out_free_tracing_cpumask:
4452 free_cpumask_var(tracing_cpumask); 4552 free_cpumask_var(tracing_cpumask);
4453out_free_buffer_mask: 4553out_free_buffer_mask:
4454 free_cpumask_var(tracing_buffer_mask); 4554 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4df6a77eb196..2825ef2c0b15 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396 396
397extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
398 398
399extern unsigned long tracing_thresh;
400
399#ifdef CONFIG_TRACER_MAX_TRACE 401#ifdef CONFIG_TRACER_MAX_TRACE
400extern unsigned long tracing_max_latency; 402extern unsigned long tracing_max_latency;
401extern unsigned long tracing_thresh;
402 403
403void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 404void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
404void update_max_tr_single(struct trace_array *tr, 405void update_max_tr_single(struct trace_array *tr,
@@ -497,6 +498,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
497#ifdef CONFIG_DYNAMIC_FTRACE 498#ifdef CONFIG_DYNAMIC_FTRACE
498/* TODO: make this variable */ 499/* TODO: make this variable */
499#define FTRACE_GRAPH_MAX_FUNCS 32 500#define FTRACE_GRAPH_MAX_FUNCS 32
501extern int ftrace_graph_filter_enabled;
500extern int ftrace_graph_count; 502extern int ftrace_graph_count;
501extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 503extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
502 504
@@ -504,7 +506,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
504{ 506{
505 int i; 507 int i;
506 508
507 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 509 if (!ftrace_graph_filter_enabled)
508 return 1; 510 return 1;
509 511
510 for (i = 0; i < ftrace_graph_count; i++) { 512 for (i = 0; i < ftrace_graph_count; i++) {
@@ -549,7 +551,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
549 * struct trace_parser - servers for reading the user input separated by spaces 551 * struct trace_parser - servers for reading the user input separated by spaces
550 * @cont: set if the input is not complete - no final space char was found 552 * @cont: set if the input is not complete - no final space char was found
551 * @buffer: holds the parsed user input 553 * @buffer: holds the parsed user input
552 * @idx: user input lenght 554 * @idx: user input length
553 * @size: buffer size 555 * @size: buffer size
554 */ 556 */
555struct trace_parser { 557struct trace_parser {
@@ -791,7 +793,8 @@ extern const char *__stop___trace_bprintk_fmt[];
791 793
792#undef FTRACE_ENTRY 794#undef FTRACE_ENTRY
793#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 795#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
794 extern struct ftrace_event_call event_##call; 796 extern struct ftrace_event_call \
797 __attribute__((__aligned__(4))) event_##call;
795#undef FTRACE_ENTRY_DUP 798#undef FTRACE_ENTRY_DUP
796#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 799#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
797 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 800 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..b9bc4d470177 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
307 return -1; 307 return -1;
308 if (percent_a > percent_b) 308 if (percent_a > percent_b)
309 return 1; 309 return 1;
310 else 310
311 return 0; 311 if (a->incorrect < b->incorrect)
312 return -1;
313 if (a->incorrect > b->incorrect)
314 return 1;
315
316 /*
317 * Since the above shows worse (incorrect) cases
318 * first, we continue that by showing best (correct)
319 * cases last.
320 */
321 if (a->correct > b->correct)
322 return -1;
323 if (a->correct < b->correct)
324 return 1;
325
326 return 0;
312} 327}
313 328
314static struct tracer_stat annotated_branch_stats = { 329static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7ba072a..6fbfb8f417b9 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 9e25573242cf..c1cc3ab633de 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -6,14 +6,12 @@
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/kprobes.h>
9#include "trace.h" 10#include "trace.h"
10 11
11 12
12char *perf_trace_buf; 13static char *perf_trace_buf;
13EXPORT_SYMBOL_GPL(perf_trace_buf); 14static char *perf_trace_buf_nmi;
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
17 15
18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; 16typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
19 17
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
120 } 118 }
121 mutex_unlock(&event_mutex); 119 mutex_unlock(&event_mutex);
122} 120}
121
122__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
123 int *rctxp, unsigned long *irq_flags)
124{
125 struct trace_entry *entry;
126 char *trace_buf, *raw_data;
127 int pc, cpu;
128
129 pc = preempt_count();
130
131 /* Protect the per cpu buffer, begin the rcu read side */
132 local_irq_save(*irq_flags);
133
134 *rctxp = perf_swevent_get_recursion_context();
135 if (*rctxp < 0)
136 goto err_recursion;
137
138 cpu = smp_processor_id();
139
140 if (in_nmi())
141 trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
142 else
143 trace_buf = rcu_dereference_sched(perf_trace_buf);
144
145 if (!trace_buf)
146 goto err;
147
148 raw_data = per_cpu_ptr(trace_buf, cpu);
149
150 /* zero the dead bytes from align to not leak stack to user */
151 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
152
153 entry = (struct trace_entry *)raw_data;
154 tracing_generic_entry_update(entry, *irq_flags, pc);
155 entry->type = type;
156
157 return raw_data;
158err:
159 perf_swevent_put_recursion_context(*rctxp);
160err_recursion:
161 local_irq_restore(*irq_flags);
162 return NULL;
163}
164EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 189b09baf4fb..3f972ad98d04 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -60,10 +60,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
60 return 0; 60 return 0;
61 61
62err: 62err:
63 if (field) { 63 if (field)
64 kfree(field->name); 64 kfree(field->name);
65 kfree(field->type);
66 }
67 kfree(field); 65 kfree(field);
68 66
69 return -ENOMEM; 67 return -ENOMEM;
@@ -520,41 +518,16 @@ out:
520 return ret; 518 return ret;
521} 519}
522 520
523extern char *__bad_type_size(void);
524
525#undef FIELD
526#define FIELD(type, name) \
527 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
528 #type, "common_" #name, offsetof(typeof(field), name), \
529 sizeof(field.name), is_signed_type(type)
530
531static int trace_write_header(struct trace_seq *s)
532{
533 struct trace_entry field;
534
535 /* struct trace_entry */
536 return trace_seq_printf(s,
537 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
538 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
539 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
540 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
541 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
542 "\n",
543 FIELD(unsigned short, type),
544 FIELD(unsigned char, flags),
545 FIELD(unsigned char, preempt_count),
546 FIELD(int, pid),
547 FIELD(int, lock_depth));
548}
549
550static ssize_t 521static ssize_t
551event_format_read(struct file *filp, char __user *ubuf, size_t cnt, 522event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
552 loff_t *ppos) 523 loff_t *ppos)
553{ 524{
554 struct ftrace_event_call *call = filp->private_data; 525 struct ftrace_event_call *call = filp->private_data;
526 struct ftrace_event_field *field;
555 struct trace_seq *s; 527 struct trace_seq *s;
528 int common_field_count = 5;
556 char *buf; 529 char *buf;
557 int r; 530 int r = 0;
558 531
559 if (*ppos) 532 if (*ppos)
560 return 0; 533 return 0;
@@ -565,14 +538,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
565 538
566 trace_seq_init(s); 539 trace_seq_init(s);
567 540
568 /* If any of the first writes fail, so will the show_format. */
569
570 trace_seq_printf(s, "name: %s\n", call->name); 541 trace_seq_printf(s, "name: %s\n", call->name);
571 trace_seq_printf(s, "ID: %d\n", call->id); 542 trace_seq_printf(s, "ID: %d\n", call->id);
572 trace_seq_printf(s, "format:\n"); 543 trace_seq_printf(s, "format:\n");
573 trace_write_header(s);
574 544
575 r = call->show_format(call, s); 545 list_for_each_entry_reverse(field, &call->fields, link) {
546 /*
547 * Smartly shows the array type(except dynamic array).
548 * Normal:
549 * field:TYPE VAR
550 * If TYPE := TYPE[LEN], it is shown:
551 * field:TYPE VAR[LEN]
552 */
553 const char *array_descriptor = strchr(field->type, '[');
554
555 if (!strncmp(field->type, "__data_loc", 10))
556 array_descriptor = NULL;
557
558 if (!array_descriptor) {
559 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
560 "\tsize:%u;\tsigned:%d;\n",
561 field->type, field->name, field->offset,
562 field->size, !!field->is_signed);
563 } else {
564 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
565 "\tsize:%u;\tsigned:%d;\n",
566 (int)(array_descriptor - field->type),
567 field->type, field->name,
568 array_descriptor, field->offset,
569 field->size, !!field->is_signed);
570 }
571
572 if (--common_field_count == 0)
573 r = trace_seq_printf(s, "\n");
574
575 if (!r)
576 break;
577 }
578
579 if (r)
580 r = trace_seq_printf(s, "\nprint fmt: %s\n",
581 call->print_fmt);
582
576 if (!r) { 583 if (!r) {
577 /* 584 /*
578 * ug! The format output is bigger than a PAGE!! 585 * ug! The format output is bigger than a PAGE!!
@@ -948,10 +955,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
948 filter); 955 filter);
949 } 956 }
950 957
951 /* A trace may not want to export its format */
952 if (!call->show_format)
953 return 0;
954
955 trace_create_file("format", 0444, call->dir, call, 958 trace_create_file("format", 0444, call->dir, call,
956 format); 959 format);
957 960
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..4615f62a04f1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
211{ 211{
212 char **addr = (char **)(event + pred->offset); 212 char **addr = (char **)(event + pred->offset);
213 int cmp, match; 213 int cmp, match;
214 int len = strlen(*addr) + 1; /* including tailing '\0' */
214 215
215 cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); 216 cmp = pred->regex.match(*addr, &pred->regex, len);
216 217
217 match = cmp ^ pred->not; 218 match = cmp ^ pred->not;
218 219
@@ -251,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
251 return 0; 252 return 0;
252} 253}
253 254
254/* Basic regex callbacks */ 255/*
256 * regex_match_foo - Basic regex callbacks
257 *
258 * @str: the string to be searched
259 * @r: the regex structure containing the pattern string
260 * @len: the length of the string to be searched (including '\0')
261 *
262 * Note:
263 * - @str might not be NULL-terminated if it's of type DYN_STRING
264 * or STATIC_STRING
265 */
266
255static int regex_match_full(char *str, struct regex *r, int len) 267static int regex_match_full(char *str, struct regex *r, int len)
256{ 268{
257 if (strncmp(str, r->pattern, len) == 0) 269 if (strncmp(str, r->pattern, len) == 0)
@@ -261,23 +273,24 @@ static int regex_match_full(char *str, struct regex *r, int len)
261 273
262static int regex_match_front(char *str, struct regex *r, int len) 274static int regex_match_front(char *str, struct regex *r, int len)
263{ 275{
264 if (strncmp(str, r->pattern, len) == 0) 276 if (strncmp(str, r->pattern, r->len) == 0)
265 return 1; 277 return 1;
266 return 0; 278 return 0;
267} 279}
268 280
269static int regex_match_middle(char *str, struct regex *r, int len) 281static int regex_match_middle(char *str, struct regex *r, int len)
270{ 282{
271 if (strstr(str, r->pattern)) 283 if (strnstr(str, r->pattern, len))
272 return 1; 284 return 1;
273 return 0; 285 return 0;
274} 286}
275 287
276static int regex_match_end(char *str, struct regex *r, int len) 288static int regex_match_end(char *str, struct regex *r, int len)
277{ 289{
278 char *ptr = strstr(str, r->pattern); 290 int strlen = len - 1;
279 291
280 if (ptr && (ptr[r->len] == 0)) 292 if (strlen >= r->len &&
293 memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
281 return 1; 294 return 1;
282 return 0; 295 return 0;
283} 296}
@@ -781,10 +794,8 @@ static int filter_add_pred(struct filter_parse_state *ps,
781 pred->regex.field_len = field->size; 794 pred->regex.field_len = field->size;
782 } else if (field->filter_type == FILTER_DYN_STRING) 795 } else if (field->filter_type == FILTER_DYN_STRING)
783 fn = filter_pred_strloc; 796 fn = filter_pred_strloc;
784 else { 797 else
785 fn = filter_pred_pchar; 798 fn = filter_pred_pchar;
786 pred->regex.field_len = strlen(pred->regex.pattern);
787 }
788 } else { 799 } else {
789 if (field->is_signed) 800 if (field->is_signed)
790 ret = strict_strtoll(pred->regex.pattern, 0, &val); 801 ret = strict_strtoll(pred->regex.pattern, 0, &val);
@@ -1360,7 +1371,7 @@ out_unlock:
1360 return err; 1371 return err;
1361} 1372}
1362 1373
1363#ifdef CONFIG_EVENT_PROFILE 1374#ifdef CONFIG_PERF_EVENTS
1364 1375
1365void ftrace_profile_free_filter(struct perf_event *event) 1376void ftrace_profile_free_filter(struct perf_event *event)
1366{ 1377{
@@ -1428,5 +1439,5 @@ out_unlock:
1428 return err; 1439 return err;
1429} 1440}
1430 1441
1431#endif /* CONFIG_EVENT_PROFILE */ 1442#endif /* CONFIG_PERF_EVENTS */
1432 1443
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d4fa5dc1ee4e..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \
62 62
63#include "trace_entries.h" 63#include "trace_entries.h"
64 64
65
66#undef __field
67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \
71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \
73 return 0;
74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item), \
81 is_signed_type(type)); \
82 if (!ret) \
83 return 0;
84
85#undef __array
86#define __array(type, item, len) \
87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
89 offsetof(typeof(field), item), \
90 sizeof(field.item), is_signed_type(type)); \
91 if (!ret) \
92 return 0;
93
94#undef __array_desc
95#define __array_desc(type, container, item, len) \
96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
98 offsetof(typeof(field), container.item), \
99 sizeof(field.container.item), \
100 is_signed_type(type)); \
101 if (!ret) \
102 return 0;
103
104#undef __dynamic_array
105#define __dynamic_array(type, item) \
106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
110 if (!ret) \
111 return 0;
112
113#undef F_printk
114#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
115
116#undef __entry
117#define __entry REC
118
119#undef FTRACE_ENTRY
120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
121static int \
122ftrace_format_##name(struct ftrace_event_call *unused, \
123 struct trace_seq *s) \
124{ \
125 struct struct_name field __attribute__((unused)); \
126 int ret = 0; \
127 \
128 tstruct; \
129 \
130 trace_seq_printf(s, "\nprint fmt: " print); \
131 \
132 return ret; \
133}
134
135#include "trace_entries.h"
136
137#undef __field 65#undef __field
138#define __field(type, item) \ 66#define __field(type, item) \
139 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
@@ -175,7 +103,12 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
175 return ret; 103 return ret;
176 104
177#undef __dynamic_array 105#undef __dynamic_array
178#define __dynamic_array(type, item) 106#define __dynamic_array(type, item) \
107 ret = trace_define_field(event_call, #type, #item, \
108 offsetof(typeof(field), item), \
109 0, is_signed_type(type), FILTER_OTHER);\
110 if (ret) \
111 return ret;
179 112
180#undef FTRACE_ENTRY 113#undef FTRACE_ENTRY
181#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 114#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -198,6 +131,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
198 return 0; 131 return 0;
199} 132}
200 133
134#undef __entry
135#define __entry REC
136
201#undef __field 137#undef __field
202#define __field(type, item) 138#define __field(type, item)
203 139
@@ -213,6 +149,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
213#undef __dynamic_array 149#undef __dynamic_array
214#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
215 151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
216#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
217#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
218 \ 157 \
@@ -223,7 +162,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
223 .id = type, \ 162 .id = type, \
224 .system = __stringify(TRACE_SYSTEM), \ 163 .system = __stringify(TRACE_SYSTEM), \
225 .raw_init = ftrace_raw_init_event, \ 164 .raw_init = ftrace_raw_init_event, \
226 .show_format = ftrace_format_##call, \ 165 .print_fmt = print, \
227 .define_fields = ftrace_define_fields_##call, \ 166 .define_fields = ftrace_define_fields_##call, \
228}; \ 167}; \
229 168
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1342c5d37cf..e6989d9b44da 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -18,6 +18,7 @@ struct fgraph_cpu_data {
18 pid_t last_pid; 18 pid_t last_pid;
19 int depth; 19 int depth;
20 int ignore; 20 int ignore;
21 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
21}; 22};
22 23
23struct fgraph_data { 24struct fgraph_data {
@@ -187,7 +188,7 @@ static int __trace_graph_entry(struct trace_array *tr,
187 struct ring_buffer *buffer = tr->buffer; 188 struct ring_buffer *buffer = tr->buffer;
188 struct ftrace_graph_ent_entry *entry; 189 struct ftrace_graph_ent_entry *entry;
189 190
190 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 191 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
191 return 0; 192 return 0;
192 193
193 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 194 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -212,13 +213,11 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
212 int cpu; 213 int cpu;
213 int pc; 214 int pc;
214 215
215 if (unlikely(!tr))
216 return 0;
217
218 if (!ftrace_trace_task(current)) 216 if (!ftrace_trace_task(current))
219 return 0; 217 return 0;
220 218
221 if (!ftrace_graph_addr(trace->func)) 219 /* trace it when it is-nested-in or is a function enabled. */
220 if (!(trace->depth || ftrace_graph_addr(trace->func)))
222 return 0; 221 return 0;
223 222
224 local_irq_save(flags); 223 local_irq_save(flags);
@@ -231,9 +230,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
231 } else { 230 } else {
232 ret = 0; 231 ret = 0;
233 } 232 }
234 /* Only do the atomic if it is not already set */
235 if (!test_tsk_trace_graph(current))
236 set_tsk_trace_graph(current);
237 233
238 atomic_dec(&data->disabled); 234 atomic_dec(&data->disabled);
239 local_irq_restore(flags); 235 local_irq_restore(flags);
@@ -241,6 +237,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
241 return ret; 237 return ret;
242} 238}
243 239
240int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
241{
242 if (tracing_thresh)
243 return 1;
244 else
245 return trace_graph_entry(trace);
246}
247
244static void __trace_graph_return(struct trace_array *tr, 248static void __trace_graph_return(struct trace_array *tr,
245 struct ftrace_graph_ret *trace, 249 struct ftrace_graph_ret *trace,
246 unsigned long flags, 250 unsigned long flags,
@@ -251,7 +255,7 @@ static void __trace_graph_return(struct trace_array *tr,
251 struct ring_buffer *buffer = tr->buffer; 255 struct ring_buffer *buffer = tr->buffer;
252 struct ftrace_graph_ret_entry *entry; 256 struct ftrace_graph_ret_entry *entry;
253 257
254 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 258 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
255 return; 259 return;
256 260
257 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 261 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -281,19 +285,39 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
281 pc = preempt_count(); 285 pc = preempt_count();
282 __trace_graph_return(tr, trace, flags, pc); 286 __trace_graph_return(tr, trace, flags, pc);
283 } 287 }
284 if (!trace->depth)
285 clear_tsk_trace_graph(current);
286 atomic_dec(&data->disabled); 288 atomic_dec(&data->disabled);
287 local_irq_restore(flags); 289 local_irq_restore(flags);
288} 290}
289 291
292void set_graph_array(struct trace_array *tr)
293{
294 graph_array = tr;
295
296 /* Make graph_array visible before we start tracing */
297
298 smp_mb();
299}
300
301void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
302{
303 if (tracing_thresh &&
304 (trace->rettime - trace->calltime < tracing_thresh))
305 return;
306 else
307 trace_graph_return(trace);
308}
309
290static int graph_trace_init(struct trace_array *tr) 310static int graph_trace_init(struct trace_array *tr)
291{ 311{
292 int ret; 312 int ret;
293 313
294 graph_array = tr; 314 set_graph_array(tr);
295 ret = register_ftrace_graph(&trace_graph_return, 315 if (tracing_thresh)
296 &trace_graph_entry); 316 ret = register_ftrace_graph(&trace_graph_thresh_return,
317 &trace_graph_thresh_entry);
318 else
319 ret = register_ftrace_graph(&trace_graph_return,
320 &trace_graph_entry);
297 if (ret) 321 if (ret)
298 return ret; 322 return ret;
299 tracing_start_cmdline_record(); 323 tracing_start_cmdline_record();
@@ -301,11 +325,6 @@ static int graph_trace_init(struct trace_array *tr)
301 return 0; 325 return 0;
302} 326}
303 327
304void set_graph_array(struct trace_array *tr)
305{
306 graph_array = tr;
307}
308
309static void graph_trace_reset(struct trace_array *tr) 328static void graph_trace_reset(struct trace_array *tr)
310{ 329{
311 tracing_stop_cmdline_record(); 330 tracing_stop_cmdline_record();
@@ -673,15 +692,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
673 duration = graph_ret->rettime - graph_ret->calltime; 692 duration = graph_ret->rettime - graph_ret->calltime;
674 693
675 if (data) { 694 if (data) {
695 struct fgraph_cpu_data *cpu_data;
676 int cpu = iter->cpu; 696 int cpu = iter->cpu;
677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 697
698 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
678 699
679 /* 700 /*
680 * Comments display at + 1 to depth. Since 701 * Comments display at + 1 to depth. Since
681 * this is a leaf function, keep the comments 702 * this is a leaf function, keep the comments
682 * equal to this depth. 703 * equal to this depth.
683 */ 704 */
684 *depth = call->depth - 1; 705 cpu_data->depth = call->depth - 1;
706
707 /* No need to keep this function around for this depth */
708 if (call->depth < FTRACE_RETFUNC_DEPTH)
709 cpu_data->enter_funcs[call->depth] = 0;
685 } 710 }
686 711
687 /* Overhead */ 712 /* Overhead */
@@ -721,10 +746,15 @@ print_graph_entry_nested(struct trace_iterator *iter,
721 int i; 746 int i;
722 747
723 if (data) { 748 if (data) {
749 struct fgraph_cpu_data *cpu_data;
724 int cpu = iter->cpu; 750 int cpu = iter->cpu;
725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
726 751
727 *depth = call->depth; 752 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
753 cpu_data->depth = call->depth;
754
755 /* Save this function pointer to see if the exit matches */
756 if (call->depth < FTRACE_RETFUNC_DEPTH)
757 cpu_data->enter_funcs[call->depth] = call->func;
728 } 758 }
729 759
730 /* No overhead */ 760 /* No overhead */
@@ -854,19 +884,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
854 struct fgraph_data *data = iter->private; 884 struct fgraph_data *data = iter->private;
855 pid_t pid = ent->pid; 885 pid_t pid = ent->pid;
856 int cpu = iter->cpu; 886 int cpu = iter->cpu;
887 int func_match = 1;
857 int ret; 888 int ret;
858 int i; 889 int i;
859 890
860 if (data) { 891 if (data) {
892 struct fgraph_cpu_data *cpu_data;
861 int cpu = iter->cpu; 893 int cpu = iter->cpu;
862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 894
895 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
863 896
864 /* 897 /*
865 * Comments display at + 1 to depth. This is the 898 * Comments display at + 1 to depth. This is the
866 * return from a function, we now want the comments 899 * return from a function, we now want the comments
867 * to display at the same level of the bracket. 900 * to display at the same level of the bracket.
868 */ 901 */
869 *depth = trace->depth - 1; 902 cpu_data->depth = trace->depth - 1;
903
904 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
905 if (cpu_data->enter_funcs[trace->depth] != trace->func)
906 func_match = 0;
907 cpu_data->enter_funcs[trace->depth] = 0;
908 }
870 } 909 }
871 910
872 if (print_graph_prologue(iter, s, 0, 0)) 911 if (print_graph_prologue(iter, s, 0, 0))
@@ -891,9 +930,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
891 return TRACE_TYPE_PARTIAL_LINE; 930 return TRACE_TYPE_PARTIAL_LINE;
892 } 931 }
893 932
894 ret = trace_seq_printf(s, "}\n"); 933 /*
895 if (!ret) 934 * If the return function does not have a matching entry,
896 return TRACE_TYPE_PARTIAL_LINE; 935 * then the entry was lost. Instead of just printing
936 * the '}' and letting the user guess what function this
937 * belongs to, write out the function name.
938 */
939 if (func_match) {
940 ret = trace_seq_printf(s, "}\n");
941 if (!ret)
942 return TRACE_TYPE_PARTIAL_LINE;
943 } else {
944 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
945 if (!ret)
946 return TRACE_TYPE_PARTIAL_LINE;
947 }
897 948
898 /* Overrun */ 949 /* Overrun */
899 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 950 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6ea90c0e2c96..505c92273b1a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
91 return retval; 91 return retval;
92} 92}
93 93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, 94static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy) 95 void *dummy)
101{ 96{
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{ 226{
232 int ret = -EINVAL; 227 int ret = -EINVAL;
233 228
234 if (ff->func == fetch_argument) 229 if (ff->func == fetch_register) {
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name; 230 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data)); 231 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name); 232 ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
489 } 482 }
490 } else 483 } else
491 ret = -EINVAL; 484 ret = -EINVAL;
492 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
493 ret = strict_strtoul(arg + 3, 10, &param);
494 if (ret || param > PARAM_MAX_ARGS)
495 ret = -EINVAL;
496 else {
497 ff->func = fetch_argument;
498 ff->data = (void *)param;
499 }
500 } else 485 } else
501 ret = -EINVAL; 486 ret = -EINVAL;
502 return ret; 487 return ret;
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
611 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 596 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
612 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 597 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
613 * Fetch args: 598 * Fetch args:
614 * $argN : fetch Nth of function argument. (N:0-)
615 * $retval : fetch return value 599 * $retval : fetch return value
616 * $stack : fetch stack address 600 * $stack : fetch stack address
617 * $stackN : fetch Nth of stack (N:0-) 601 * $stackN : fetch Nth of stack (N:0-)
@@ -651,12 +635,12 @@ static int create_trace_probe(int argc, char **argv)
651 event = strchr(group, '/') + 1; 635 event = strchr(group, '/') + 1;
652 event[-1] = '\0'; 636 event[-1] = '\0';
653 if (strlen(group) == 0) { 637 if (strlen(group) == 0) {
654 pr_info("Group name is not specifiled\n"); 638 pr_info("Group name is not specified\n");
655 return -EINVAL; 639 return -EINVAL;
656 } 640 }
657 } 641 }
658 if (strlen(event) == 0) { 642 if (strlen(event) == 0) {
659 pr_info("Event name is not specifiled\n"); 643 pr_info("Event name is not specified\n");
660 return -EINVAL; 644 return -EINVAL;
661 } 645 }
662 } 646 }
@@ -689,7 +673,7 @@ static int create_trace_probe(int argc, char **argv)
689 return -EINVAL; 673 return -EINVAL;
690 } 674 }
691 /* an address specified */ 675 /* an address specified */
692 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); 676 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
693 if (ret) { 677 if (ret) {
694 pr_info("Failed to parse address.\n"); 678 pr_info("Failed to parse address.\n");
695 return ret; 679 return ret;
@@ -958,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = {
958}; 942};
959 943
960/* Kprobe handler */ 944/* Kprobe handler */
961static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 945static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
962{ 946{
963 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 947 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
964 struct kprobe_trace_entry *entry; 948 struct kprobe_trace_entry *entry;
@@ -978,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
978 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 962 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
979 irq_flags, pc); 963 irq_flags, pc);
980 if (!event) 964 if (!event)
981 return 0; 965 return;
982 966
983 entry = ring_buffer_event_data(event); 967 entry = ring_buffer_event_data(event);
984 entry->nargs = tp->nr_args; 968 entry->nargs = tp->nr_args;
@@ -988,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
988 972
989 if (!filter_current_check_discard(buffer, call, entry, event)) 973 if (!filter_current_check_discard(buffer, call, entry, event))
990 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 974 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
991 return 0;
992} 975}
993 976
994/* Kretprobe handler */ 977/* Kretprobe handler */
995static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, 978static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
996 struct pt_regs *regs) 979 struct pt_regs *regs)
997{ 980{
998 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 981 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1011,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
1011 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 994 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
1012 irq_flags, pc); 995 irq_flags, pc);
1013 if (!event) 996 if (!event)
1014 return 0; 997 return;
1015 998
1016 entry = ring_buffer_event_data(event); 999 entry = ring_buffer_event_data(event);
1017 entry->nargs = tp->nr_args; 1000 entry->nargs = tp->nr_args;
@@ -1022,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
1022 1005
1023 if (!filter_current_check_discard(buffer, call, entry, event)) 1006 if (!filter_current_check_discard(buffer, call, entry, event))
1024 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1007 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1025
1026 return 0;
1027} 1008}
1028 1009
1029/* Event entry printers */ 1010/* Event entry printers */
@@ -1174,213 +1155,123 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1174 return 0; 1155 return 0;
1175} 1156}
1176 1157
1177static int __probe_event_show_format(struct trace_seq *s, 1158static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1178 struct trace_probe *tp, const char *fmt,
1179 const char *arg)
1180{ 1159{
1181 int i; 1160 int i;
1161 int pos = 0;
1182 1162
1183 /* Show format */ 1163 const char *fmt, *arg;
1184 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1185 return 0;
1186 1164
1187 for (i = 0; i < tp->nr_args; i++) 1165 if (!probe_is_return(tp)) {
1188 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) 1166 fmt = "(%lx)";
1189 return 0; 1167 arg = "REC->" FIELD_STRING_IP;
1168 } else {
1169 fmt = "(%lx <- %lx)";
1170 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1171 }
1190 1172
1191 if (!trace_seq_printf(s, "\", %s", arg)) 1173 /* When len=0, we just calculate the needed length */
1192 return 0; 1174#define LEN_OR_ZERO (len ? len - pos : 0)
1193 1175
1194 for (i = 0; i < tp->nr_args; i++) 1176 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1195 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1196 return 0;
1197
1198 return trace_seq_puts(s, "\n");
1199}
1200 1177
1201#undef SHOW_FIELD 1178 for (i = 0; i < tp->nr_args; i++) {
1202#define SHOW_FIELD(type, item, name) \ 1179 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx",
1203 do { \ 1180 tp->args[i].name);
1204 ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ 1181 }
1205 "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
1206 (unsigned int)offsetof(typeof(field), item),\
1207 (unsigned int)sizeof(type), \
1208 is_signed_type(type)); \
1209 if (!ret) \
1210 return 0; \
1211 } while (0)
1212 1182
1213static int kprobe_event_show_format(struct ftrace_event_call *call, 1183 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1214 struct trace_seq *s)
1215{
1216 struct kprobe_trace_entry field __attribute__((unused));
1217 int ret, i;
1218 struct trace_probe *tp = (struct trace_probe *)call->data;
1219 1184
1220 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); 1185 for (i = 0; i < tp->nr_args; i++) {
1221 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1186 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1187 tp->args[i].name);
1188 }
1222 1189
1223 /* Show fields */ 1190#undef LEN_OR_ZERO
1224 for (i = 0; i < tp->nr_args; i++)
1225 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1226 trace_seq_puts(s, "\n");
1227 1191
1228 return __probe_event_show_format(s, tp, "(%lx)", 1192 /* return the length of print_fmt */
1229 "REC->" FIELD_STRING_IP); 1193 return pos;
1230} 1194}
1231 1195
1232static int kretprobe_event_show_format(struct ftrace_event_call *call, 1196static int set_print_fmt(struct trace_probe *tp)
1233 struct trace_seq *s)
1234{ 1197{
1235 struct kretprobe_trace_entry field __attribute__((unused)); 1198 int len;
1236 int ret, i; 1199 char *print_fmt;
1237 struct trace_probe *tp = (struct trace_probe *)call->data;
1238 1200
1239 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); 1201 /* First: called with 0 length to calculate the needed length */
1240 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); 1202 len = __set_print_fmt(tp, NULL, 0);
1241 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1203 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1204 if (!print_fmt)
1205 return -ENOMEM;
1242 1206
1243 /* Show fields */ 1207 /* Second: actually write the @print_fmt */
1244 for (i = 0; i < tp->nr_args; i++) 1208 __set_print_fmt(tp, print_fmt, len + 1);
1245 SHOW_FIELD(unsigned long, args[i], tp->args[i].name); 1209 tp->call.print_fmt = print_fmt;
1246 trace_seq_puts(s, "\n");
1247 1210
1248 return __probe_event_show_format(s, tp, "(%lx <- %lx)", 1211 return 0;
1249 "REC->" FIELD_STRING_FUNC
1250 ", REC->" FIELD_STRING_RETIP);
1251} 1212}
1252 1213
1253#ifdef CONFIG_EVENT_PROFILE 1214#ifdef CONFIG_PERF_EVENTS
1254 1215
1255/* Kprobe profile handler */ 1216/* Kprobe profile handler */
1256static __kprobes int kprobe_profile_func(struct kprobe *kp, 1217static __kprobes void kprobe_profile_func(struct kprobe *kp,
1257 struct pt_regs *regs) 1218 struct pt_regs *regs)
1258{ 1219{
1259 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1260 struct ftrace_event_call *call = &tp->call; 1221 struct ftrace_event_call *call = &tp->call;
1261 struct kprobe_trace_entry *entry; 1222 struct kprobe_trace_entry *entry;
1262 struct trace_entry *ent; 1223 int size, __size, i;
1263 int size, __size, i, pc, __cpu;
1264 unsigned long irq_flags; 1224 unsigned long irq_flags;
1265 char *trace_buf;
1266 char *raw_data;
1267 int rctx; 1225 int rctx;
1268 1226
1269 pc = preempt_count();
1270 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1271 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1272 size -= sizeof(u32); 1229 size -= sizeof(u32);
1273 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1230 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1274 "profile buffer not large enough")) 1231 "profile buffer not large enough"))
1275 return 0; 1232 return;
1276
1277 /*
1278 * Protect the non nmi buffer
1279 * This also protects the rcu read side
1280 */
1281 local_irq_save(irq_flags);
1282
1283 rctx = perf_swevent_get_recursion_context();
1284 if (rctx < 0)
1285 goto end_recursion;
1286
1287 __cpu = smp_processor_id();
1288
1289 if (in_nmi())
1290 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1291 else
1292 trace_buf = rcu_dereference(perf_trace_buf);
1293 1233
1294 if (!trace_buf) 1234 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
1295 goto end; 1235 if (!entry)
1296 1236 return;
1297 raw_data = per_cpu_ptr(trace_buf, __cpu);
1298
1299 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1300 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1301 entry = (struct kprobe_trace_entry *)raw_data;
1302 ent = &entry->ent;
1303 1237
1304 tracing_generic_entry_update(ent, irq_flags, pc);
1305 ent->type = call->id;
1306 entry->nargs = tp->nr_args; 1238 entry->nargs = tp->nr_args;
1307 entry->ip = (unsigned long)kp->addr; 1239 entry->ip = (unsigned long)kp->addr;
1308 for (i = 0; i < tp->nr_args; i++) 1240 for (i = 0; i < tp->nr_args; i++)
1309 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1310 perf_tp_event(call->id, entry->ip, 1, entry, size);
1311
1312end:
1313 perf_swevent_put_recursion_context(rctx);
1314end_recursion:
1315 local_irq_restore(irq_flags);
1316 1242
1317 return 0; 1243 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
1318} 1244}
1319 1245
1320/* Kretprobe profile handler */ 1246/* Kretprobe profile handler */
1321static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, 1247static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1322 struct pt_regs *regs) 1248 struct pt_regs *regs)
1323{ 1249{
1324 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1325 struct ftrace_event_call *call = &tp->call; 1251 struct ftrace_event_call *call = &tp->call;
1326 struct kretprobe_trace_entry *entry; 1252 struct kretprobe_trace_entry *entry;
1327 struct trace_entry *ent; 1253 int size, __size, i;
1328 int size, __size, i, pc, __cpu;
1329 unsigned long irq_flags; 1254 unsigned long irq_flags;
1330 char *trace_buf;
1331 char *raw_data;
1332 int rctx; 1255 int rctx;
1333 1256
1334 pc = preempt_count();
1335 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1336 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1337 size -= sizeof(u32); 1259 size -= sizeof(u32);
1338 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1260 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1339 "profile buffer not large enough")) 1261 "profile buffer not large enough"))
1340 return 0; 1262 return;
1341
1342 /*
1343 * Protect the non nmi buffer
1344 * This also protects the rcu read side
1345 */
1346 local_irq_save(irq_flags);
1347
1348 rctx = perf_swevent_get_recursion_context();
1349 if (rctx < 0)
1350 goto end_recursion;
1351
1352 __cpu = smp_processor_id();
1353 1263
1354 if (in_nmi()) 1264 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
1355 trace_buf = rcu_dereference(perf_trace_buf_nmi); 1265 if (!entry)
1356 else 1266 return;
1357 trace_buf = rcu_dereference(perf_trace_buf);
1358
1359 if (!trace_buf)
1360 goto end;
1361
1362 raw_data = per_cpu_ptr(trace_buf, __cpu);
1363
1364 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1365 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1366 entry = (struct kretprobe_trace_entry *)raw_data;
1367 ent = &entry->ent;
1368 1267
1369 tracing_generic_entry_update(ent, irq_flags, pc);
1370 ent->type = call->id;
1371 entry->nargs = tp->nr_args; 1268 entry->nargs = tp->nr_args;
1372 entry->func = (unsigned long)tp->rp.kp.addr; 1269 entry->func = (unsigned long)tp->rp.kp.addr;
1373 entry->ret_ip = (unsigned long)ri->ret_addr; 1270 entry->ret_ip = (unsigned long)ri->ret_addr;
1374 for (i = 0; i < tp->nr_args; i++) 1271 for (i = 0; i < tp->nr_args; i++)
1375 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1376 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1377
1378end:
1379 perf_swevent_put_recursion_context(rctx);
1380end_recursion:
1381 local_irq_restore(irq_flags);
1382 1273
1383 return 0; 1274 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
1384} 1275}
1385 1276
1386static int probe_profile_enable(struct ftrace_event_call *call) 1277static int probe_profile_enable(struct ftrace_event_call *call)
@@ -1408,7 +1299,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
1408 disable_kprobe(&tp->rp.kp); 1299 disable_kprobe(&tp->rp.kp);
1409 } 1300 }
1410} 1301}
1411#endif /* CONFIG_EVENT_PROFILE */ 1302#endif /* CONFIG_PERF_EVENTS */
1412 1303
1413 1304
1414static __kprobes 1305static __kprobes
@@ -1418,10 +1309,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1418 1309
1419 if (tp->flags & TP_FLAG_TRACE) 1310 if (tp->flags & TP_FLAG_TRACE)
1420 kprobe_trace_func(kp, regs); 1311 kprobe_trace_func(kp, regs);
1421#ifdef CONFIG_EVENT_PROFILE 1312#ifdef CONFIG_PERF_EVENTS
1422 if (tp->flags & TP_FLAG_PROFILE) 1313 if (tp->flags & TP_FLAG_PROFILE)
1423 kprobe_profile_func(kp, regs); 1314 kprobe_profile_func(kp, regs);
1424#endif /* CONFIG_EVENT_PROFILE */ 1315#endif
1425 return 0; /* We don't tweek kernel, so just return 0 */ 1316 return 0; /* We don't tweek kernel, so just return 0 */
1426} 1317}
1427 1318
@@ -1432,10 +1323,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1432 1323
1433 if (tp->flags & TP_FLAG_TRACE) 1324 if (tp->flags & TP_FLAG_TRACE)
1434 kretprobe_trace_func(ri, regs); 1325 kretprobe_trace_func(ri, regs);
1435#ifdef CONFIG_EVENT_PROFILE 1326#ifdef CONFIG_PERF_EVENTS
1436 if (tp->flags & TP_FLAG_PROFILE) 1327 if (tp->flags & TP_FLAG_PROFILE)
1437 kretprobe_profile_func(ri, regs); 1328 kretprobe_profile_func(ri, regs);
1438#endif /* CONFIG_EVENT_PROFILE */ 1329#endif
1439 return 0; /* We don't tweek kernel, so just return 0 */ 1330 return 0; /* We don't tweek kernel, so just return 0 */
1440} 1331}
1441 1332
@@ -1448,23 +1339,25 @@ static int register_probe_event(struct trace_probe *tp)
1448 if (probe_is_return(tp)) { 1339 if (probe_is_return(tp)) {
1449 tp->event.trace = print_kretprobe_event; 1340 tp->event.trace = print_kretprobe_event;
1450 call->raw_init = probe_event_raw_init; 1341 call->raw_init = probe_event_raw_init;
1451 call->show_format = kretprobe_event_show_format;
1452 call->define_fields = kretprobe_event_define_fields; 1342 call->define_fields = kretprobe_event_define_fields;
1453 } else { 1343 } else {
1454 tp->event.trace = print_kprobe_event; 1344 tp->event.trace = print_kprobe_event;
1455 call->raw_init = probe_event_raw_init; 1345 call->raw_init = probe_event_raw_init;
1456 call->show_format = kprobe_event_show_format;
1457 call->define_fields = kprobe_event_define_fields; 1346 call->define_fields = kprobe_event_define_fields;
1458 } 1347 }
1348 if (set_print_fmt(tp) < 0)
1349 return -ENOMEM;
1459 call->event = &tp->event; 1350 call->event = &tp->event;
1460 call->id = register_ftrace_event(&tp->event); 1351 call->id = register_ftrace_event(&tp->event);
1461 if (!call->id) 1352 if (!call->id) {
1353 kfree(call->print_fmt);
1462 return -ENODEV; 1354 return -ENODEV;
1355 }
1463 call->enabled = 0; 1356 call->enabled = 0;
1464 call->regfunc = probe_event_enable; 1357 call->regfunc = probe_event_enable;
1465 call->unregfunc = probe_event_disable; 1358 call->unregfunc = probe_event_disable;
1466 1359
1467#ifdef CONFIG_EVENT_PROFILE 1360#ifdef CONFIG_PERF_EVENTS
1468 call->profile_enable = probe_profile_enable; 1361 call->profile_enable = probe_profile_enable;
1469 call->profile_disable = probe_profile_disable; 1362 call->profile_disable = probe_profile_disable;
1470#endif 1363#endif
@@ -1472,6 +1365,7 @@ static int register_probe_event(struct trace_probe *tp)
1472 ret = trace_add_event_call(call); 1365 ret = trace_add_event_call(call);
1473 if (ret) { 1366 if (ret) {
1474 pr_info("Failed to register kprobe event: %s\n", call->name); 1367 pr_info("Failed to register kprobe event: %s\n", call->name);
1368 kfree(call->print_fmt);
1475 unregister_ftrace_event(&tp->event); 1369 unregister_ftrace_event(&tp->event);
1476 } 1370 }
1477 return ret; 1371 return ret;
@@ -1481,6 +1375,7 @@ static void unregister_probe_event(struct trace_probe *tp)
1481{ 1375{
1482 /* tp->event is unregistered in trace_remove_event_call() */ 1376 /* tp->event is unregistered in trace_remove_event_call() */
1483 trace_remove_event_call(&tp->call); 1377 trace_remove_event_call(&tp->call);
1378 kfree(tp->call.print_fmt);
1484} 1379}
1485 1380
1486/* Make a debugfs interface for controling probe points */ 1381/* Make a debugfs interface for controling probe points */
@@ -1523,28 +1418,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1523 1418
1524static __init int kprobe_trace_self_tests_init(void) 1419static __init int kprobe_trace_self_tests_init(void)
1525{ 1420{
1526 int ret; 1421 int ret, warn = 0;
1527 int (*target)(int, int, int, int, int, int); 1422 int (*target)(int, int, int, int, int, int);
1423 struct trace_probe *tp;
1528 1424
1529 target = kprobe_trace_selftest_target; 1425 target = kprobe_trace_selftest_target;
1530 1426
1531 pr_info("Testing kprobe tracing: "); 1427 pr_info("Testing kprobe tracing: ");
1532 1428
1533 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1429 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1534 "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); 1430 "$stack $stack0 +0($stack)");
1535 if (WARN_ON_ONCE(ret)) 1431 if (WARN_ON_ONCE(ret)) {
1536 pr_warning("error enabling function entry\n"); 1432 pr_warning("error on probing function entry.\n");
1433 warn++;
1434 } else {
1435 /* Enable trace point */
1436 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1437 if (WARN_ON_ONCE(tp == NULL)) {
1438 pr_warning("error on getting new probe.\n");
1439 warn++;
1440 } else
1441 probe_event_enable(&tp->call);
1442 }
1537 1443
1538 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1444 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1539 "$retval"); 1445 "$retval");
1540 if (WARN_ON_ONCE(ret)) 1446 if (WARN_ON_ONCE(ret)) {
1541 pr_warning("error enabling function return\n"); 1447 pr_warning("error on probing function return.\n");
1448 warn++;
1449 } else {
1450 /* Enable trace point */
1451 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1452 if (WARN_ON_ONCE(tp == NULL)) {
1453 pr_warning("error on getting new probe.\n");
1454 warn++;
1455 } else
1456 probe_event_enable(&tp->call);
1457 }
1458
1459 if (warn)
1460 goto end;
1542 1461
1543 ret = target(1, 2, 3, 4, 5, 6); 1462 ret = target(1, 2, 3, 4, 5, 6);
1544 1463
1545 cleanup_all_probes(); 1464 ret = command_trace_probe("-:testprobe");
1465 if (WARN_ON_ONCE(ret)) {
1466 pr_warning("error on deleting a probe.\n");
1467 warn++;
1468 }
1546 1469
1547 pr_cont("OK\n"); 1470 ret = command_trace_probe("-:testprobe2");
1471 if (WARN_ON_ONCE(ret)) {
1472 pr_warning("error on deleting a probe.\n");
1473 warn++;
1474 }
1475
1476end:
1477 cleanup_all_probes();
1478 if (warn)
1479 pr_cont("NG: Some tests are failed. Please check them.\n");
1480 else
1481 pr_cont("OK\n");
1548 return 0; 1482 return 0;
1549} 1483}
1550 1484
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
174 arch_spin_lock(&max_stack_lock); 184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 arch_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 218
207static void *t_start(struct seq_file *m, loff_t *pos) 219static void *t_start(struct seq_file *m, loff_t *pos)
208{ 220{
221 int cpu;
222
209 local_irq_disable(); 223 local_irq_disable();
224
225 cpu = smp_processor_id();
226 per_cpu(trace_active, cpu)++;
227
210 arch_spin_lock(&max_stack_lock); 228 arch_spin_lock(&max_stack_lock);
211 229
212 if (*pos == 0) 230 if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 235
218static void t_stop(struct seq_file *m, void *p) 236static void t_stop(struct seq_file *m, void *p)
219{ 237{
238 int cpu;
239
220 arch_spin_unlock(&max_stack_lock); 240 arch_spin_unlock(&max_stack_lock);
241
242 cpu = smp_processor_id();
243 per_cpu(trace_active, cpu)--;
244
221 local_irq_enable(); 245 local_irq_enable();
222} 246}
223 247
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f372dd2..cba47d7935cc 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -143,70 +143,65 @@ extern char *__bad_type_size(void);
143 #type, #name, offsetof(typeof(trace), name), \ 143 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type) 144 sizeof(trace.name), is_signed_type(type)
145 145
146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 146static
147int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
147{ 148{
148 int i; 149 int i;
149 int ret; 150 int pos = 0;
150 struct syscall_metadata *entry = call->data;
151 struct syscall_trace_enter trace;
152 int offset = offsetof(struct syscall_trace_enter, args);
153 151
154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 152 /* When len=0, we just calculate the needed length */
155 "\tsigned:%u;\n", 153#define LEN_OR_ZERO (len ? len - pos : 0)
156 SYSCALL_FIELD(int, nr));
157 if (!ret)
158 return 0;
159 154
155 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
160 for (i = 0; i < entry->nb_args; i++) { 156 for (i = 0; i < entry->nb_args; i++) {
161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 157 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
162 entry->args[i]); 158 entry->args[i], sizeof(unsigned long),
163 if (!ret) 159 i == entry->nb_args - 1 ? "" : ", ");
164 return 0;
165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
169 if (!ret)
170 return 0;
171 offset += sizeof(unsigned long);
172 } 160 }
161 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
173 162
174 trace_seq_puts(s, "\nprint fmt: \"");
175 for (i = 0; i < entry->nb_args; i++) { 163 for (i = 0; i < entry->nb_args; i++) {
176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 164 pos += snprintf(buf + pos, LEN_OR_ZERO,
177 sizeof(unsigned long), 165 ", ((unsigned long)(REC->%s))", entry->args[i]);
178 i == entry->nb_args - 1 ? "" : ", ");
179 if (!ret)
180 return 0;
181 } 166 }
182 trace_seq_putc(s, '"');
183 167
184 for (i = 0; i < entry->nb_args; i++) { 168#undef LEN_OR_ZERO
185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
186 entry->args[i]);
187 if (!ret)
188 return 0;
189 }
190 169
191 return trace_seq_putc(s, '\n'); 170 /* return the length of print_fmt */
171 return pos;
192} 172}
193 173
194int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 174static int set_syscall_print_fmt(struct ftrace_event_call *call)
195{ 175{
196 int ret; 176 char *print_fmt;
197 struct syscall_trace_exit trace; 177 int len;
178 struct syscall_metadata *entry = call->data;
198 179
199 ret = trace_seq_printf(s, 180 if (entry->enter_event != call) {
200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 181 call->print_fmt = "\"0x%lx\", REC->ret";
201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
204 SYSCALL_FIELD(int, nr),
205 SYSCALL_FIELD(long, ret));
206 if (!ret)
207 return 0; 182 return 0;
183 }
184
185 /* First: called with 0 length to calculate the needed length */
186 len = __set_enter_print_fmt(entry, NULL, 0);
187
188 print_fmt = kmalloc(len + 1, GFP_KERNEL);
189 if (!print_fmt)
190 return -ENOMEM;
191
192 /* Second: actually write the @print_fmt */
193 __set_enter_print_fmt(entry, print_fmt, len + 1);
194 call->print_fmt = print_fmt;
208 195
209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 196 return 0;
197}
198
199static void free_syscall_print_fmt(struct ftrace_event_call *call)
200{
201 struct syscall_metadata *entry = call->data;
202
203 if (entry->enter_event == call)
204 kfree(call->print_fmt);
210} 205}
211 206
212int syscall_enter_define_fields(struct ftrace_event_call *call) 207int syscall_enter_define_fields(struct ftrace_event_call *call)
@@ -386,12 +381,22 @@ int init_syscall_trace(struct ftrace_event_call *call)
386{ 381{
387 int id; 382 int id;
388 383
389 id = register_ftrace_event(call->event); 384 if (set_syscall_print_fmt(call) < 0)
390 if (!id) 385 return -ENOMEM;
391 return -ENODEV; 386
392 call->id = id; 387 id = trace_event_raw_init(call);
393 INIT_LIST_HEAD(&call->fields); 388
394 return 0; 389 if (id < 0) {
390 free_syscall_print_fmt(call);
391 return id;
392 }
393
394 return id;
395}
396
397unsigned long __init arch_syscall_addr(int nr)
398{
399 return (unsigned long)sys_call_table[nr];
395} 400}
396 401
397int __init init_ftrace_syscalls(void) 402int __init init_ftrace_syscalls(void)
@@ -421,7 +426,7 @@ int __init init_ftrace_syscalls(void)
421} 426}
422core_initcall(init_ftrace_syscalls); 427core_initcall(init_ftrace_syscalls);
423 428
424#ifdef CONFIG_EVENT_PROFILE 429#ifdef CONFIG_PERF_EVENTS
425 430
426static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 431static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
427static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 432static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -433,12 +438,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
433 struct syscall_metadata *sys_data; 438 struct syscall_metadata *sys_data;
434 struct syscall_trace_enter *rec; 439 struct syscall_trace_enter *rec;
435 unsigned long flags; 440 unsigned long flags;
436 char *trace_buf;
437 char *raw_data;
438 int syscall_nr; 441 int syscall_nr;
439 int rctx; 442 int rctx;
440 int size; 443 int size;
441 int cpu;
442 444
443 syscall_nr = syscall_get_nr(current, regs); 445 syscall_nr = syscall_get_nr(current, regs);
444 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 446 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -457,37 +459,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
457 "profile buffer not large enough")) 459 "profile buffer not large enough"))
458 return; 460 return;
459 461
460 /* Protect the per cpu buffer, begin the rcu read side */ 462 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
461 local_irq_save(flags); 463 sys_data->enter_event->id, &rctx, &flags);
462 464 if (!rec)
463 rctx = perf_swevent_get_recursion_context(); 465 return;
464 if (rctx < 0)
465 goto end_recursion;
466
467 cpu = smp_processor_id();
468
469 trace_buf = rcu_dereference(perf_trace_buf);
470
471 if (!trace_buf)
472 goto end;
473
474 raw_data = per_cpu_ptr(trace_buf, cpu);
475
476 /* zero the dead bytes from align to not leak stack to user */
477 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
478 466
479 rec = (struct syscall_trace_enter *) raw_data;
480 tracing_generic_entry_update(&rec->ent, 0, 0);
481 rec->ent.type = sys_data->enter_event->id;
482 rec->nr = syscall_nr; 467 rec->nr = syscall_nr;
483 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 468 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
484 (unsigned long *)&rec->args); 469 (unsigned long *)&rec->args);
485 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); 470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
486
487end:
488 perf_swevent_put_recursion_context(rctx);
489end_recursion:
490 local_irq_restore(flags);
491} 471}
492 472
493int prof_sysenter_enable(struct ftrace_event_call *call) 473int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -531,11 +511,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
531 struct syscall_trace_exit *rec; 511 struct syscall_trace_exit *rec;
532 unsigned long flags; 512 unsigned long flags;
533 int syscall_nr; 513 int syscall_nr;
534 char *trace_buf;
535 char *raw_data;
536 int rctx; 514 int rctx;
537 int size; 515 int size;
538 int cpu;
539 516
540 syscall_nr = syscall_get_nr(current, regs); 517 syscall_nr = syscall_get_nr(current, regs);
541 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 518 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -557,38 +534,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
557 "exit event has grown above profile buffer size")) 534 "exit event has grown above profile buffer size"))
558 return; 535 return;
559 536
560 /* Protect the per cpu buffer, begin the rcu read side */ 537 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
561 local_irq_save(flags); 538 sys_data->exit_event->id, &rctx, &flags);
562 539 if (!rec)
563 rctx = perf_swevent_get_recursion_context(); 540 return;
564 if (rctx < 0)
565 goto end_recursion;
566
567 cpu = smp_processor_id();
568
569 trace_buf = rcu_dereference(perf_trace_buf);
570
571 if (!trace_buf)
572 goto end;
573
574 raw_data = per_cpu_ptr(trace_buf, cpu);
575
576 /* zero the dead bytes from align to not leak stack to user */
577 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
578
579 rec = (struct syscall_trace_exit *)raw_data;
580 541
581 tracing_generic_entry_update(&rec->ent, 0, 0);
582 rec->ent.type = sys_data->exit_event->id;
583 rec->nr = syscall_nr; 542 rec->nr = syscall_nr;
584 rec->ret = syscall_get_return_value(current, regs); 543 rec->ret = syscall_get_return_value(current, regs);
585 544
586 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); 545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
587
588end:
589 perf_swevent_put_recursion_context(rctx);
590end_recursion:
591 local_irq_restore(flags);
592} 546}
593 547
594int prof_sysexit_enable(struct ftrace_event_call *call) 548int prof_sysexit_enable(struct ftrace_event_call *call)
@@ -603,7 +557,7 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
603 ret = register_trace_sys_exit(prof_syscall_exit); 557 ret = register_trace_sys_exit(prof_syscall_exit);
604 if (ret) { 558 if (ret) {
605 pr_info("event trace: Could not activate" 559 pr_info("event trace: Could not activate"
606 "syscall entry trace point"); 560 "syscall exit trace point");
607 } else { 561 } else {
608 set_bit(num, enabled_prof_exit_syscalls); 562 set_bit(num, enabled_prof_exit_syscalls);
609 sys_prof_refcount_exit++; 563 sys_prof_refcount_exit++;
@@ -626,6 +580,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
626 mutex_unlock(&syscall_trace_lock); 580 mutex_unlock(&syscall_trace_lock);
627} 581}
628 582
629#endif 583#endif /* CONFIG_PERF_EVENTS */
630
631 584