aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2010-04-23 08:30:22 -0400
committerRobert Richter <robert.richter@amd.com>2010-04-23 08:30:22 -0400
commita36bf32e9e8a86f291f746b7f8292e042ee04a46 (patch)
treec5c999baa4c214218e3adea9b336cbd9f23950ad /kernel/trace
parentbc078e4eab65f11bbaeed380593ab8151b30d703 (diff)
parent01bf0b64579ead8a82e7cfc32ae44bc667e7ad0f (diff)
Merge commit 'v2.6.34-rc5' into oprofile/core
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/blktrace.c6
-rw-r--r--kernel/trace/ftrace.c136
-rw-r--r--kernel/trace/power-traces.c1
-rw-r--r--kernel/trace/ring_buffer.c40
-rw-r--r--kernel/trace/ring_buffer_benchmark.c1
-rw-r--r--kernel/trace/trace.c201
-rw-r--r--kernel/trace/trace.h11
-rw-r--r--kernel/trace/trace_branch.c19
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_event_perf.c175
-rw-r--r--kernel/trace/trace_event_profile.c122
-rw-r--r--kernel/trace/trace_events.c84
-rw-r--r--kernel/trace/trace_events_filter.c5
-rw-r--r--kernel/trace/trace_export.c87
-rw-r--r--kernel/trace/trace_functions_graph.c108
-rw-r--r--kernel/trace/trace_kprobe.c323
-rw-r--r--kernel/trace/trace_ksym.c1
-rw-r--r--kernel/trace/trace_mmiotrace.c1
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stack.c24
-rw-r--r--kernel/trace/trace_stat.c1
-rw-r--r--kernel/trace/trace_syscalls.c254
-rw-r--r--kernel/trace/trace_workqueue.c1
25 files changed, 850 insertions, 772 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 60e2ce0181ee..13e13d428cd3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -328,15 +328,6 @@ config BRANCH_TRACER
328 328
329 Say N if unsure. 329 Say N if unsure.
330 330
331config POWER_TRACER
332 bool "Trace power consumption behavior"
333 depends on X86
334 select GENERIC_TRACER
335 help
336 This tracer helps developers to analyze and optimize the kernel's
337 power management decisions, specifically the C-state and P-state
338 behavior.
339
340config KSYM_TRACER 331config KSYM_TRACER
341 bool "Trace read and write access on kernel memory locations" 332 bool "Trace read and write access on kernel memory locations"
342 depends on HAVE_HW_BREAKPOINT 333 depends on HAVE_HW_BREAKPOINT
@@ -449,7 +440,7 @@ config BLK_DEV_IO_TRACE
449 440
450config KPROBE_EVENT 441config KPROBE_EVENT
451 depends on KPROBES 442 depends on KPROBES
452 depends on X86 443 depends on HAVE_REGS_AND_STACK_ACCESS_API
453 bool "Enable kprobes-based dynamic events" 444 bool "Enable kprobes-based dynamic events"
454 select TRACING 445 select TRACING
455 default y 446 default y
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..78edc6490038 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@ endif
51obj-$(CONFIG_EVENT_TRACING) += trace_events.o 51obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
56endif
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o 59obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d9d6206e0b14..b3bc91a3f510 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h>
24#include <linux/debugfs.h> 25#include <linux/debugfs.h>
25#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
26#include <linux/time.h> 27#include <linux/time.h>
@@ -540,9 +541,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
540 if (ret) 541 if (ret)
541 return ret; 542 return ret;
542 543
543 if (copy_to_user(arg, &buts, sizeof(buts))) 544 if (copy_to_user(arg, &buts, sizeof(buts))) {
545 blk_trace_remove(q);
544 return -EFAULT; 546 return -EFAULT;
545 547 }
546 return 0; 548 return 0;
547} 549}
548EXPORT_SYMBOL_GPL(blk_trace_setup); 550EXPORT_SYMBOL_GPL(blk_trace_setup);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1e6640f80454..2404b59b3097 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,12 +22,13 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/slab.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31#include <linux/rcupdate.h>
31 32
32#include <trace/events/sched.h> 33#include <trace/events/sched.h>
33 34
@@ -85,22 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 86ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 87ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
87 88
88#ifdef CONFIG_FUNCTION_GRAPH_TRACER 89/*
89static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); 90 * Traverse the ftrace_list, invoking all entries. The reason that we
90#endif 91 * can use rcu_dereference_raw() is that elements removed from this list
91 92 * are simply leaked, so there is no need to interact with a grace-period
93 * mechanism. The rcu_dereference_raw() calls are needed to handle
94 * concurrent insertions into the ftrace_list.
95 *
96 * Silly Alpha and silly pointer-speculation compiler optimizations!
97 */
92static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 98static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
93{ 99{
94 struct ftrace_ops *op = ftrace_list; 100 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
95
96 /* in case someone actually ports this to alpha! */
97 read_barrier_depends();
98 101
99 while (op != &ftrace_list_end) { 102 while (op != &ftrace_list_end) {
100 /* silly alpha */
101 read_barrier_depends();
102 op->func(ip, parent_ip); 103 op->func(ip, parent_ip);
103 op = op->next; 104 op = rcu_dereference_raw(op->next); /*see above*/
104 }; 105 };
105} 106}
106 107
@@ -155,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 * the ops->next pointer is valid before another CPU sees 156 * the ops->next pointer is valid before another CPU sees
156 * the ops pointer included into the ftrace_list. 157 * the ops pointer included into the ftrace_list.
157 */ 158 */
158 smp_wmb(); 159 rcu_assign_pointer(ftrace_list, ops);
159 ftrace_list = ops;
160 160
161 if (ftrace_enabled) { 161 if (ftrace_enabled) {
162 ftrace_func_t func; 162 ftrace_func_t func;
@@ -898,36 +898,6 @@ static struct dyn_ftrace *ftrace_free_records;
898 } \ 898 } \
899 } 899 }
900 900
901#ifdef CONFIG_KPROBES
902
903static int frozen_record_count;
904
905static inline void freeze_record(struct dyn_ftrace *rec)
906{
907 if (!(rec->flags & FTRACE_FL_FROZEN)) {
908 rec->flags |= FTRACE_FL_FROZEN;
909 frozen_record_count++;
910 }
911}
912
913static inline void unfreeze_record(struct dyn_ftrace *rec)
914{
915 if (rec->flags & FTRACE_FL_FROZEN) {
916 rec->flags &= ~FTRACE_FL_FROZEN;
917 frozen_record_count--;
918 }
919}
920
921static inline int record_frozen(struct dyn_ftrace *rec)
922{
923 return rec->flags & FTRACE_FL_FROZEN;
924}
925#else
926# define freeze_record(rec) ({ 0; })
927# define unfreeze_record(rec) ({ 0; })
928# define record_frozen(rec) ({ 0; })
929#endif /* CONFIG_KPROBES */
930
931static void ftrace_free_rec(struct dyn_ftrace *rec) 901static void ftrace_free_rec(struct dyn_ftrace *rec)
932{ 902{
933 rec->freelist = ftrace_free_records; 903 rec->freelist = ftrace_free_records;
@@ -1025,6 +995,21 @@ static void ftrace_bug(int failed, unsigned long ip)
1025} 995}
1026 996
1027 997
998/* Return 1 if the address range is reserved for ftrace */
999int ftrace_text_reserved(void *start, void *end)
1000{
1001 struct dyn_ftrace *rec;
1002 struct ftrace_page *pg;
1003
1004 do_for_each_ftrace_rec(pg, rec) {
1005 if (rec->ip <= (unsigned long)end &&
1006 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1007 return 1;
1008 } while_for_each_ftrace_rec();
1009 return 0;
1010}
1011
1012
1028static int 1013static int
1029__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1014__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1030{ 1015{
@@ -1076,14 +1061,6 @@ static void ftrace_replace_code(int enable)
1076 !(rec->flags & FTRACE_FL_CONVERTED)) 1061 !(rec->flags & FTRACE_FL_CONVERTED))
1077 continue; 1062 continue;
1078 1063
1079 /* ignore updates to this record's mcount site */
1080 if (get_kprobe((void *)rec->ip)) {
1081 freeze_record(rec);
1082 continue;
1083 } else {
1084 unfreeze_record(rec);
1085 }
1086
1087 failed = __ftrace_replace_code(rec, enable); 1064 failed = __ftrace_replace_code(rec, enable);
1088 if (failed) { 1065 if (failed) {
1089 rec->flags |= FTRACE_FL_FAILED; 1066 rec->flags |= FTRACE_FL_FAILED;
@@ -2300,6 +2277,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
2300 2277
2301#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2278#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2302static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; 2279static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2280static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2281
2303static int __init set_graph_function(char *str) 2282static int __init set_graph_function(char *str)
2304{ 2283{
2305 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); 2284 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -2426,6 +2405,7 @@ static const struct file_operations ftrace_notrace_fops = {
2426static DEFINE_MUTEX(graph_lock); 2405static DEFINE_MUTEX(graph_lock);
2427 2406
2428int ftrace_graph_count; 2407int ftrace_graph_count;
2408int ftrace_graph_filter_enabled;
2429unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2409unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2430 2410
2431static void * 2411static void *
@@ -2448,7 +2428,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
2448 mutex_lock(&graph_lock); 2428 mutex_lock(&graph_lock);
2449 2429
2450 /* Nothing, tell g_show to print all functions are enabled */ 2430 /* Nothing, tell g_show to print all functions are enabled */
2451 if (!ftrace_graph_count && !*pos) 2431 if (!ftrace_graph_filter_enabled && !*pos)
2452 return (void *)1; 2432 return (void *)1;
2453 2433
2454 return __g_next(m, pos); 2434 return __g_next(m, pos);
@@ -2494,6 +2474,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2494 mutex_lock(&graph_lock); 2474 mutex_lock(&graph_lock);
2495 if ((file->f_mode & FMODE_WRITE) && 2475 if ((file->f_mode & FMODE_WRITE) &&
2496 (file->f_flags & O_TRUNC)) { 2476 (file->f_flags & O_TRUNC)) {
2477 ftrace_graph_filter_enabled = 0;
2497 ftrace_graph_count = 0; 2478 ftrace_graph_count = 0;
2498 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2479 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2499 } 2480 }
@@ -2519,7 +2500,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2519 struct dyn_ftrace *rec; 2500 struct dyn_ftrace *rec;
2520 struct ftrace_page *pg; 2501 struct ftrace_page *pg;
2521 int search_len; 2502 int search_len;
2522 int found = 0; 2503 int fail = 1;
2523 int type, not; 2504 int type, not;
2524 char *search; 2505 char *search;
2525 bool exists; 2506 bool exists;
@@ -2530,37 +2511,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2530 2511
2531 /* decode regex */ 2512 /* decode regex */
2532 type = filter_parse_regex(buffer, strlen(buffer), &search, &not); 2513 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2533 if (not) 2514 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2534 return -EINVAL; 2515 return -EBUSY;
2535 2516
2536 search_len = strlen(search); 2517 search_len = strlen(search);
2537 2518
2538 mutex_lock(&ftrace_lock); 2519 mutex_lock(&ftrace_lock);
2539 do_for_each_ftrace_rec(pg, rec) { 2520 do_for_each_ftrace_rec(pg, rec) {
2540 2521
2541 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2542 break;
2543
2544 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2522 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2545 continue; 2523 continue;
2546 2524
2547 if (ftrace_match_record(rec, search, search_len, type)) { 2525 if (ftrace_match_record(rec, search, search_len, type)) {
2548 /* ensure it is not already in the array */ 2526 /* if it is in the array */
2549 exists = false; 2527 exists = false;
2550 for (i = 0; i < *idx; i++) 2528 for (i = 0; i < *idx; i++) {
2551 if (array[i] == rec->ip) { 2529 if (array[i] == rec->ip) {
2552 exists = true; 2530 exists = true;
2553 break; 2531 break;
2554 } 2532 }
2555 if (!exists) 2533 }
2556 array[(*idx)++] = rec->ip; 2534
2557 found = 1; 2535 if (!not) {
2536 fail = 0;
2537 if (!exists) {
2538 array[(*idx)++] = rec->ip;
2539 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2540 goto out;
2541 }
2542 } else {
2543 if (exists) {
2544 array[i] = array[--(*idx)];
2545 array[*idx] = 0;
2546 fail = 0;
2547 }
2548 }
2558 } 2549 }
2559 } while_for_each_ftrace_rec(); 2550 } while_for_each_ftrace_rec();
2560 2551out:
2561 mutex_unlock(&ftrace_lock); 2552 mutex_unlock(&ftrace_lock);
2562 2553
2563 return found ? 0 : -EINVAL; 2554 if (fail)
2555 return -EINVAL;
2556
2557 ftrace_graph_filter_enabled = 1;
2558 return 0;
2564} 2559}
2565 2560
2566static ssize_t 2561static ssize_t
@@ -2570,16 +2565,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2570 struct trace_parser parser; 2565 struct trace_parser parser;
2571 ssize_t read, ret; 2566 ssize_t read, ret;
2572 2567
2573 if (!cnt || cnt < 0) 2568 if (!cnt)
2574 return 0; 2569 return 0;
2575 2570
2576 mutex_lock(&graph_lock); 2571 mutex_lock(&graph_lock);
2577 2572
2578 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2579 ret = -EBUSY;
2580 goto out_unlock;
2581 }
2582
2583 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2573 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2584 ret = -ENOMEM; 2574 ret = -ENOMEM;
2585 goto out_unlock; 2575 goto out_unlock;
@@ -3364,6 +3354,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3364{ 3354{
3365 /* Make sure we do not use the parent ret_stack */ 3355 /* Make sure we do not use the parent ret_stack */
3366 t->ret_stack = NULL; 3356 t->ret_stack = NULL;
3357 t->curr_ret_stack = -1;
3367 3358
3368 if (ftrace_graph_active) { 3359 if (ftrace_graph_active) {
3369 struct ftrace_ret_stack *ret_stack; 3360 struct ftrace_ret_stack *ret_stack;
@@ -3373,7 +3364,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3373 GFP_KERNEL); 3364 GFP_KERNEL);
3374 if (!ret_stack) 3365 if (!ret_stack)
3375 return; 3366 return;
3376 t->curr_ret_stack = -1;
3377 atomic_set(&t->tracing_graph_pause, 0); 3367 atomic_set(&t->tracing_graph_pause, 0);
3378 atomic_set(&t->trace_overrun, 0); 3368 atomic_set(&t->trace_overrun, 0);
3379 t->ftrace_timestamp = 0; 3369 t->ftrace_timestamp = 0;
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 9f4f565b01e6..a22582a06161 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -9,7 +9,6 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/slab.h>
13 12
14#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 14#include <trace/events/power.h>
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8c1b2d290718..41ca394feb22 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,12 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22 23
24#include <asm/local.h>
23#include "trace.h" 25#include "trace.h"
24 26
25/* 27/*
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
207#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
208 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
211 221
@@ -1200,18 +1210,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1200 1210
1201 for (i = 0; i < nr_pages; i++) { 1211 for (i = 0; i < nr_pages; i++) {
1202 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1212 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1203 return; 1213 goto out;
1204 p = cpu_buffer->pages->next; 1214 p = cpu_buffer->pages->next;
1205 bpage = list_entry(p, struct buffer_page, list); 1215 bpage = list_entry(p, struct buffer_page, list);
1206 list_del_init(&bpage->list); 1216 list_del_init(&bpage->list);
1207 free_buffer_page(bpage); 1217 free_buffer_page(bpage);
1208 } 1218 }
1209 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1219 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1210 return; 1220 goto out;
1211 1221
1212 rb_reset_cpu(cpu_buffer); 1222 rb_reset_cpu(cpu_buffer);
1213 rb_check_pages(cpu_buffer); 1223 rb_check_pages(cpu_buffer);
1214 1224
1225out:
1215 spin_unlock_irq(&cpu_buffer->reader_lock); 1226 spin_unlock_irq(&cpu_buffer->reader_lock);
1216} 1227}
1217 1228
@@ -1228,7 +1239,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1228 1239
1229 for (i = 0; i < nr_pages; i++) { 1240 for (i = 0; i < nr_pages; i++) {
1230 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1241 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1231 return; 1242 goto out;
1232 p = pages->next; 1243 p = pages->next;
1233 bpage = list_entry(p, struct buffer_page, list); 1244 bpage = list_entry(p, struct buffer_page, list);
1234 list_del_init(&bpage->list); 1245 list_del_init(&bpage->list);
@@ -1237,6 +1248,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1237 rb_reset_cpu(cpu_buffer); 1248 rb_reset_cpu(cpu_buffer);
1238 rb_check_pages(cpu_buffer); 1249 rb_check_pages(cpu_buffer);
1239 1250
1251out:
1240 spin_unlock_irq(&cpu_buffer->reader_lock); 1252 spin_unlock_irq(&cpu_buffer->reader_lock);
1241} 1253}
1242 1254
@@ -1546,7 +1558,7 @@ rb_update_event(struct ring_buffer_event *event,
1546 1558
1547 case 0: 1559 case 0:
1548 length -= RB_EVNT_HDR_SIZE; 1560 length -= RB_EVNT_HDR_SIZE;
1549 if (length > RB_MAX_SMALL_DATA) 1561 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1550 event->array[0] = length; 1562 event->array[0] = length;
1551 else 1563 else
1552 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1564 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1721,11 +1733,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1721 if (!length) 1733 if (!length)
1722 length = 1; 1734 length = 1;
1723 1735
1724 if (length > RB_MAX_SMALL_DATA) 1736 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1725 length += sizeof(event.array[0]); 1737 length += sizeof(event.array[0]);
1726 1738
1727 length += RB_EVNT_HDR_SIZE; 1739 length += RB_EVNT_HDR_SIZE;
1728 length = ALIGN(length, RB_ALIGNMENT); 1740 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1729 1741
1730 return length; 1742 return length;
1731} 1743}
@@ -2232,12 +2244,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2232 if (ring_buffer_flags != RB_BUFFERS_ON) 2244 if (ring_buffer_flags != RB_BUFFERS_ON)
2233 return NULL; 2245 return NULL;
2234 2246
2235 if (atomic_read(&buffer->record_disabled))
2236 return NULL;
2237
2238 /* If we are tracing schedule, we don't want to recurse */ 2247 /* If we are tracing schedule, we don't want to recurse */
2239 resched = ftrace_preempt_disable(); 2248 resched = ftrace_preempt_disable();
2240 2249
2250 if (atomic_read(&buffer->record_disabled))
2251 goto out_nocheck;
2252
2241 if (trace_recursive_lock()) 2253 if (trace_recursive_lock())
2242 goto out_nocheck; 2254 goto out_nocheck;
2243 2255
@@ -2469,11 +2481,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2469 if (ring_buffer_flags != RB_BUFFERS_ON) 2481 if (ring_buffer_flags != RB_BUFFERS_ON)
2470 return -EBUSY; 2482 return -EBUSY;
2471 2483
2472 if (atomic_read(&buffer->record_disabled))
2473 return -EBUSY;
2474
2475 resched = ftrace_preempt_disable(); 2484 resched = ftrace_preempt_disable();
2476 2485
2486 if (atomic_read(&buffer->record_disabled))
2487 goto out;
2488
2477 cpu = raw_smp_processor_id(); 2489 cpu = raw_smp_processor_id();
2478 2490
2479 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2491 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2541,7 +2553,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2541 * @buffer: The ring buffer to enable writes 2553 * @buffer: The ring buffer to enable writes
2542 * 2554 *
2543 * Note, multiple disables will need the same number of enables 2555 * Note, multiple disables will need the same number of enables
2544 * to truely enable the writing (much like preempt_disable). 2556 * to truly enable the writing (much like preempt_disable).
2545 */ 2557 */
2546void ring_buffer_record_enable(struct ring_buffer *buffer) 2558void ring_buffer_record_enable(struct ring_buffer *buffer)
2547{ 2559{
@@ -2577,7 +2589,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2577 * @cpu: The CPU to enable. 2589 * @cpu: The CPU to enable.
2578 * 2590 *
2579 * Note, multiple disables will need the same number of enables 2591 * Note, multiple disables will need the same number of enables
2580 * to truely enable the writing (much like preempt_disable). 2592 * to truly enable the writing (much like preempt_disable).
2581 */ 2593 */
2582void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2594void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2583{ 2595{
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index b2477caf09c2..df74c7982255 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <asm/local.h>
11 12
12struct rb_page { 13struct rb_page {
13 u64 ts; 14 u64 ts;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index eac6875cb990..44f916a04065 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,10 +32,11 @@
32#include <linux/splice.h> 32#include <linux/splice.h>
33#include <linux/kdebug.h> 33#include <linux/kdebug.h>
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/rwsem.h>
36#include <linux/slab.h>
35#include <linux/ctype.h> 37#include <linux/ctype.h>
36#include <linux/init.h> 38#include <linux/init.h>
37#include <linux/poll.h> 39#include <linux/poll.h>
38#include <linux/gfp.h>
39#include <linux/fs.h> 40#include <linux/fs.h>
40 41
41#include "trace.h" 42#include "trace.h"
@@ -91,20 +92,17 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
91static inline void ftrace_disable_cpu(void) 92static inline void ftrace_disable_cpu(void)
92{ 93{
93 preempt_disable(); 94 preempt_disable();
94 __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); 95 __this_cpu_inc(ftrace_cpu_disabled);
95} 96}
96 97
97static inline void ftrace_enable_cpu(void) 98static inline void ftrace_enable_cpu(void)
98{ 99{
99 __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); 100 __this_cpu_dec(ftrace_cpu_disabled);
100 preempt_enable(); 101 preempt_enable();
101} 102}
102 103
103static cpumask_var_t __read_mostly tracing_buffer_mask; 104static cpumask_var_t __read_mostly tracing_buffer_mask;
104 105
105/* Define which cpu buffers are currently read in trace_pipe */
106static cpumask_var_t tracing_reader_cpumask;
107
108#define for_each_tracing_cpu(cpu) \ 106#define for_each_tracing_cpu(cpu) \
109 for_each_cpu(cpu, tracing_buffer_mask) 107 for_each_cpu(cpu, tracing_buffer_mask)
110 108
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly;
243 241
244/* 242/*
245 * trace_types_lock is used to protect the trace_types list. 243 * trace_types_lock is used to protect the trace_types list.
246 * This lock is also used to keep user access serialized.
247 * Accesses from userspace will grab this lock while userspace
248 * activities happen inside the kernel.
249 */ 244 */
250static DEFINE_MUTEX(trace_types_lock); 245static DEFINE_MUTEX(trace_types_lock);
251 246
247/*
248 * serialize the access of the ring buffer
249 *
250 * ring buffer serializes readers, but it is low level protection.
251 * The validity of the events (which returns by ring_buffer_peek() ..etc)
252 * are not protected by ring buffer.
253 *
254 * The content of events may become garbage if we allow other process consumes
255 * these events concurrently:
256 * A) the page of the consumed events may become a normal page
257 * (not reader page) in ring buffer, and this page will be rewrited
258 * by events producer.
259 * B) The page of the consumed events may become a page for splice_read,
260 * and this page will be returned to system.
261 *
262 * These primitives allow multi process access to different cpu ring buffer
263 * concurrently.
264 *
265 * These primitives don't distinguish read-only and read-consume access.
266 * Multi read-only access are also serialized.
267 */
268
269#ifdef CONFIG_SMP
270static DECLARE_RWSEM(all_cpu_access_lock);
271static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
272
273static inline void trace_access_lock(int cpu)
274{
275 if (cpu == TRACE_PIPE_ALL_CPU) {
276 /* gain it for accessing the whole ring buffer. */
277 down_write(&all_cpu_access_lock);
278 } else {
279 /* gain it for accessing a cpu ring buffer. */
280
281 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
282 down_read(&all_cpu_access_lock);
283
284 /* Secondly block other access to this @cpu ring buffer. */
285 mutex_lock(&per_cpu(cpu_access_lock, cpu));
286 }
287}
288
289static inline void trace_access_unlock(int cpu)
290{
291 if (cpu == TRACE_PIPE_ALL_CPU) {
292 up_write(&all_cpu_access_lock);
293 } else {
294 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
295 up_read(&all_cpu_access_lock);
296 }
297}
298
299static inline void trace_access_lock_init(void)
300{
301 int cpu;
302
303 for_each_possible_cpu(cpu)
304 mutex_init(&per_cpu(cpu_access_lock, cpu));
305}
306
307#else
308
309static DEFINE_MUTEX(access_lock);
310
311static inline void trace_access_lock(int cpu)
312{
313 (void)cpu;
314 mutex_lock(&access_lock);
315}
316
317static inline void trace_access_unlock(int cpu)
318{
319 (void)cpu;
320 mutex_unlock(&access_lock);
321}
322
323static inline void trace_access_lock_init(void)
324{
325}
326
327#endif
328
252/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 329/* trace_wait is a waitqueue for tasks blocked on trace_poll */
253static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 330static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
254 331
@@ -297,6 +374,21 @@ static int __init set_buf_size(char *str)
297} 374}
298__setup("trace_buf_size=", set_buf_size); 375__setup("trace_buf_size=", set_buf_size);
299 376
377static int __init set_tracing_thresh(char *str)
378{
379 unsigned long threshhold;
380 int ret;
381
382 if (!str)
383 return 0;
384 ret = strict_strtoul(str, 0, &threshhold);
385 if (ret < 0)
386 return 0;
387 tracing_thresh = threshhold * 1000;
388 return 1;
389}
390__setup("tracing_thresh=", set_tracing_thresh);
391
300unsigned long nsecs_to_usecs(unsigned long nsecs) 392unsigned long nsecs_to_usecs(unsigned long nsecs)
301{ 393{
302 return nsecs / 1000; 394 return nsecs / 1000;
@@ -502,9 +594,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
502static arch_spinlock_t ftrace_max_lock = 594static arch_spinlock_t ftrace_max_lock =
503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 595 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
504 596
597unsigned long __read_mostly tracing_thresh;
598
505#ifdef CONFIG_TRACER_MAX_TRACE 599#ifdef CONFIG_TRACER_MAX_TRACE
506unsigned long __read_mostly tracing_max_latency; 600unsigned long __read_mostly tracing_max_latency;
507unsigned long __read_mostly tracing_thresh;
508 601
509/* 602/*
510 * Copy the new maximum trace into the separate maximum-trace 603 * Copy the new maximum trace into the separate maximum-trace
@@ -515,7 +608,7 @@ static void
515__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 608__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
516{ 609{
517 struct trace_array_cpu *data = tr->data[cpu]; 610 struct trace_array_cpu *data = tr->data[cpu];
518 struct trace_array_cpu *max_data = tr->data[cpu]; 611 struct trace_array_cpu *max_data;
519 612
520 max_tr.cpu = cpu; 613 max_tr.cpu = cpu;
521 max_tr.time_start = data->preempt_timestamp; 614 max_tr.time_start = data->preempt_timestamp;
@@ -525,7 +618,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
525 max_data->critical_start = data->critical_start; 618 max_data->critical_start = data->critical_start;
526 max_data->critical_end = data->critical_end; 619 max_data->critical_end = data->critical_end;
527 620
528 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 621 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
529 max_data->pid = tsk->pid; 622 max_data->pid = tsk->pid;
530 max_data->uid = task_uid(tsk); 623 max_data->uid = task_uid(tsk);
531 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 624 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -747,10 +840,10 @@ out:
747 mutex_unlock(&trace_types_lock); 840 mutex_unlock(&trace_types_lock);
748} 841}
749 842
750static void __tracing_reset(struct trace_array *tr, int cpu) 843static void __tracing_reset(struct ring_buffer *buffer, int cpu)
751{ 844{
752 ftrace_disable_cpu(); 845 ftrace_disable_cpu();
753 ring_buffer_reset_cpu(tr->buffer, cpu); 846 ring_buffer_reset_cpu(buffer, cpu);
754 ftrace_enable_cpu(); 847 ftrace_enable_cpu();
755} 848}
756 849
@@ -762,7 +855,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
762 855
763 /* Make sure all commits have finished */ 856 /* Make sure all commits have finished */
764 synchronize_sched(); 857 synchronize_sched();
765 __tracing_reset(tr, cpu); 858 __tracing_reset(buffer, cpu);
766 859
767 ring_buffer_record_enable(buffer); 860 ring_buffer_record_enable(buffer);
768} 861}
@@ -780,7 +873,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
780 tr->time_start = ftrace_now(tr->cpu); 873 tr->time_start = ftrace_now(tr->cpu);
781 874
782 for_each_online_cpu(cpu) 875 for_each_online_cpu(cpu)
783 __tracing_reset(tr, cpu); 876 __tracing_reset(buffer, cpu);
784 877
785 ring_buffer_record_enable(buffer); 878 ring_buffer_record_enable(buffer);
786} 879}
@@ -857,6 +950,8 @@ void tracing_start(void)
857 goto out; 950 goto out;
858 } 951 }
859 952
953 /* Prevent the buffers from switching */
954 arch_spin_lock(&ftrace_max_lock);
860 955
861 buffer = global_trace.buffer; 956 buffer = global_trace.buffer;
862 if (buffer) 957 if (buffer)
@@ -866,6 +961,8 @@ void tracing_start(void)
866 if (buffer) 961 if (buffer)
867 ring_buffer_record_enable(buffer); 962 ring_buffer_record_enable(buffer);
868 963
964 arch_spin_unlock(&ftrace_max_lock);
965
869 ftrace_start(); 966 ftrace_start();
870 out: 967 out:
871 spin_unlock_irqrestore(&tracing_start_lock, flags); 968 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -887,6 +984,9 @@ void tracing_stop(void)
887 if (trace_stop_count++) 984 if (trace_stop_count++)
888 goto out; 985 goto out;
889 986
987 /* Prevent the buffers from switching */
988 arch_spin_lock(&ftrace_max_lock);
989
890 buffer = global_trace.buffer; 990 buffer = global_trace.buffer;
891 if (buffer) 991 if (buffer)
892 ring_buffer_record_disable(buffer); 992 ring_buffer_record_disable(buffer);
@@ -895,6 +995,8 @@ void tracing_stop(void)
895 if (buffer) 995 if (buffer)
896 ring_buffer_record_disable(buffer); 996 ring_buffer_record_disable(buffer);
897 997
998 arch_spin_unlock(&ftrace_max_lock);
999
898 out: 1000 out:
899 spin_unlock_irqrestore(&tracing_start_lock, flags); 1001 spin_unlock_irqrestore(&tracing_start_lock, flags);
900} 1002}
@@ -1089,7 +1191,7 @@ trace_function(struct trace_array *tr,
1089 struct ftrace_entry *entry; 1191 struct ftrace_entry *entry;
1090 1192
1091 /* If we are reading the ring buffer, don't trace */ 1193 /* If we are reading the ring buffer, don't trace */
1092 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 1194 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1093 return; 1195 return;
1094 1196
1095 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1197 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1182,6 +1284,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1182 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1284 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1183 return; 1285 return;
1184 1286
1287 /*
1288 * NMIs can not handle page faults, even with fix ups.
1289 * The save user stack can (and often does) fault.
1290 */
1291 if (unlikely(in_nmi()))
1292 return;
1293
1185 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1294 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1186 sizeof(*entry), flags, pc); 1295 sizeof(*entry), flags, pc);
1187 if (!event) 1296 if (!event)
@@ -1320,8 +1429,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1320 entry->fmt = fmt; 1429 entry->fmt = fmt;
1321 1430
1322 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1431 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1323 if (!filter_check_discard(call, entry, buffer, event)) 1432 if (!filter_check_discard(call, entry, buffer, event)) {
1324 ring_buffer_unlock_commit(buffer, event); 1433 ring_buffer_unlock_commit(buffer, event);
1434 ftrace_trace_stack(buffer, flags, 6, pc);
1435 }
1325 1436
1326out_unlock: 1437out_unlock:
1327 arch_spin_unlock(&trace_buf_lock); 1438 arch_spin_unlock(&trace_buf_lock);
@@ -1394,8 +1505,10 @@ int trace_array_vprintk(struct trace_array *tr,
1394 1505
1395 memcpy(&entry->buf, trace_buf, len); 1506 memcpy(&entry->buf, trace_buf, len);
1396 entry->buf[len] = '\0'; 1507 entry->buf[len] = '\0';
1397 if (!filter_check_discard(call, entry, buffer, event)) 1508 if (!filter_check_discard(call, entry, buffer, event)) {
1398 ring_buffer_unlock_commit(buffer, event); 1509 ring_buffer_unlock_commit(buffer, event);
1510 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1511 }
1399 1512
1400 out_unlock: 1513 out_unlock:
1401 arch_spin_unlock(&trace_buf_lock); 1514 arch_spin_unlock(&trace_buf_lock);
@@ -1585,12 +1698,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1585} 1698}
1586 1699
1587/* 1700/*
1588 * No necessary locking here. The worst thing which can
1589 * happen is loosing events consumed at the same time
1590 * by a trace_pipe reader.
1591 * Other than that, we don't risk to crash the ring buffer
1592 * because it serializes the readers.
1593 *
1594 * The current tracer is copied to avoid a global locking 1701 * The current tracer is copied to avoid a global locking
1595 * all around. 1702 * all around.
1596 */ 1703 */
@@ -1628,6 +1735,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1628 1735
1629 ftrace_enable_cpu(); 1736 ftrace_enable_cpu();
1630 1737
1738 iter->leftover = 0;
1631 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1739 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1632 ; 1740 ;
1633 1741
@@ -1645,12 +1753,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1645 } 1753 }
1646 1754
1647 trace_event_read_lock(); 1755 trace_event_read_lock();
1756 trace_access_lock(cpu_file);
1648 return p; 1757 return p;
1649} 1758}
1650 1759
1651static void s_stop(struct seq_file *m, void *p) 1760static void s_stop(struct seq_file *m, void *p)
1652{ 1761{
1762 struct trace_iterator *iter = m->private;
1763
1653 atomic_dec(&trace_record_cmdline_disabled); 1764 atomic_dec(&trace_record_cmdline_disabled);
1765 trace_access_unlock(iter->cpu_file);
1654 trace_event_read_unlock(); 1766 trace_event_read_unlock();
1655} 1767}
1656 1768
@@ -2841,22 +2953,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2841 2953
2842 mutex_lock(&trace_types_lock); 2954 mutex_lock(&trace_types_lock);
2843 2955
2844 /* We only allow one reader per cpu */
2845 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2846 if (!cpumask_empty(tracing_reader_cpumask)) {
2847 ret = -EBUSY;
2848 goto out;
2849 }
2850 cpumask_setall(tracing_reader_cpumask);
2851 } else {
2852 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2853 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2854 else {
2855 ret = -EBUSY;
2856 goto out;
2857 }
2858 }
2859
2860 /* create a buffer to store the information to pass to userspace */ 2956 /* create a buffer to store the information to pass to userspace */
2861 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2957 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2862 if (!iter) { 2958 if (!iter) {
@@ -2912,12 +3008,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2912 3008
2913 mutex_lock(&trace_types_lock); 3009 mutex_lock(&trace_types_lock);
2914 3010
2915 if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2916 cpumask_clear(tracing_reader_cpumask);
2917 else
2918 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2919
2920
2921 if (iter->trace->pipe_close) 3011 if (iter->trace->pipe_close)
2922 iter->trace->pipe_close(iter); 3012 iter->trace->pipe_close(iter);
2923 3013
@@ -3079,6 +3169,7 @@ waitagain:
3079 iter->pos = -1; 3169 iter->pos = -1;
3080 3170
3081 trace_event_read_lock(); 3171 trace_event_read_lock();
3172 trace_access_lock(iter->cpu_file);
3082 while (find_next_entry_inc(iter) != NULL) { 3173 while (find_next_entry_inc(iter) != NULL) {
3083 enum print_line_t ret; 3174 enum print_line_t ret;
3084 int len = iter->seq.len; 3175 int len = iter->seq.len;
@@ -3095,6 +3186,7 @@ waitagain:
3095 if (iter->seq.len >= cnt) 3186 if (iter->seq.len >= cnt)
3096 break; 3187 break;
3097 } 3188 }
3189 trace_access_unlock(iter->cpu_file);
3098 trace_event_read_unlock(); 3190 trace_event_read_unlock();
3099 3191
3100 /* Now copy what we have to the user */ 3192 /* Now copy what we have to the user */
@@ -3220,6 +3312,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3220 } 3312 }
3221 3313
3222 trace_event_read_lock(); 3314 trace_event_read_lock();
3315 trace_access_lock(iter->cpu_file);
3223 3316
3224 /* Fill as many pages as possible. */ 3317 /* Fill as many pages as possible. */
3225 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3318 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3243,6 +3336,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3243 trace_seq_init(&iter->seq); 3336 trace_seq_init(&iter->seq);
3244 } 3337 }
3245 3338
3339 trace_access_unlock(iter->cpu_file);
3246 trace_event_read_unlock(); 3340 trace_event_read_unlock();
3247 mutex_unlock(&iter->mutex); 3341 mutex_unlock(&iter->mutex);
3248 3342
@@ -3544,10 +3638,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3544 3638
3545 info->read = 0; 3639 info->read = 0;
3546 3640
3641 trace_access_lock(info->cpu);
3547 ret = ring_buffer_read_page(info->tr->buffer, 3642 ret = ring_buffer_read_page(info->tr->buffer,
3548 &info->spare, 3643 &info->spare,
3549 count, 3644 count,
3550 info->cpu, 0); 3645 info->cpu, 0);
3646 trace_access_unlock(info->cpu);
3551 if (ret < 0) 3647 if (ret < 0)
3552 return 0; 3648 return 0;
3553 3649
@@ -3675,6 +3771,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3675 len &= PAGE_MASK; 3771 len &= PAGE_MASK;
3676 } 3772 }
3677 3773
3774 trace_access_lock(info->cpu);
3678 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3775 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3679 3776
3680 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3777 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3722,6 +3819,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3722 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3819 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3723 } 3820 }
3724 3821
3822 trace_access_unlock(info->cpu);
3725 spd.nr_pages = i; 3823 spd.nr_pages = i;
3726 3824
3727 /* did we read anything? */ 3825 /* did we read anything? */
@@ -4158,6 +4256,8 @@ static __init int tracer_init_debugfs(void)
4158 struct dentry *d_tracer; 4256 struct dentry *d_tracer;
4159 int cpu; 4257 int cpu;
4160 4258
4259 trace_access_lock_init();
4260
4161 d_tracer = tracing_init_dentry(); 4261 d_tracer = tracing_init_dentry();
4162 4262
4163 trace_create_file("tracing_enabled", 0644, d_tracer, 4263 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4181,10 +4281,10 @@ static __init int tracer_init_debugfs(void)
4181#ifdef CONFIG_TRACER_MAX_TRACE 4281#ifdef CONFIG_TRACER_MAX_TRACE
4182 trace_create_file("tracing_max_latency", 0644, d_tracer, 4282 trace_create_file("tracing_max_latency", 0644, d_tracer,
4183 &tracing_max_latency, &tracing_max_lat_fops); 4283 &tracing_max_latency, &tracing_max_lat_fops);
4284#endif
4184 4285
4185 trace_create_file("tracing_thresh", 0644, d_tracer, 4286 trace_create_file("tracing_thresh", 0644, d_tracer,
4186 &tracing_thresh, &tracing_max_lat_fops); 4287 &tracing_thresh, &tracing_max_lat_fops);
4187#endif
4188 4288
4189 trace_create_file("README", 0444, d_tracer, 4289 trace_create_file("README", 0444, d_tracer,
4190 NULL, &tracing_readme_fops); 4290 NULL, &tracing_readme_fops);
@@ -4392,9 +4492,6 @@ __init static int tracer_alloc_buffers(void)
4392 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4492 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4393 goto out_free_buffer_mask; 4493 goto out_free_buffer_mask;
4394 4494
4395 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4396 goto out_free_tracing_cpumask;
4397
4398 /* To save memory, keep the ring buffer size to its minimum */ 4495 /* To save memory, keep the ring buffer size to its minimum */
4399 if (ring_buffer_expanded) 4496 if (ring_buffer_expanded)
4400 ring_buf_size = trace_buf_size; 4497 ring_buf_size = trace_buf_size;
@@ -4452,8 +4549,6 @@ __init static int tracer_alloc_buffers(void)
4452 return 0; 4549 return 0;
4453 4550
4454out_free_cpumask: 4551out_free_cpumask:
4455 free_cpumask_var(tracing_reader_cpumask);
4456out_free_tracing_cpumask:
4457 free_cpumask_var(tracing_cpumask); 4552 free_cpumask_var(tracing_cpumask);
4458out_free_buffer_mask: 4553out_free_buffer_mask:
4459 free_cpumask_var(tracing_buffer_mask); 4554 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4df6a77eb196..2825ef2c0b15 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396 396
397extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
398 398
399extern unsigned long tracing_thresh;
400
399#ifdef CONFIG_TRACER_MAX_TRACE 401#ifdef CONFIG_TRACER_MAX_TRACE
400extern unsigned long tracing_max_latency; 402extern unsigned long tracing_max_latency;
401extern unsigned long tracing_thresh;
402 403
403void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 404void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
404void update_max_tr_single(struct trace_array *tr, 405void update_max_tr_single(struct trace_array *tr,
@@ -497,6 +498,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
497#ifdef CONFIG_DYNAMIC_FTRACE 498#ifdef CONFIG_DYNAMIC_FTRACE
498/* TODO: make this variable */ 499/* TODO: make this variable */
499#define FTRACE_GRAPH_MAX_FUNCS 32 500#define FTRACE_GRAPH_MAX_FUNCS 32
501extern int ftrace_graph_filter_enabled;
500extern int ftrace_graph_count; 502extern int ftrace_graph_count;
501extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 503extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
502 504
@@ -504,7 +506,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
504{ 506{
505 int i; 507 int i;
506 508
507 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 509 if (!ftrace_graph_filter_enabled)
508 return 1; 510 return 1;
509 511
510 for (i = 0; i < ftrace_graph_count; i++) { 512 for (i = 0; i < ftrace_graph_count; i++) {
@@ -549,7 +551,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
549 * struct trace_parser - servers for reading the user input separated by spaces 551 * struct trace_parser - servers for reading the user input separated by spaces
550 * @cont: set if the input is not complete - no final space char was found 552 * @cont: set if the input is not complete - no final space char was found
551 * @buffer: holds the parsed user input 553 * @buffer: holds the parsed user input
552 * @idx: user input lenght 554 * @idx: user input length
553 * @size: buffer size 555 * @size: buffer size
554 */ 556 */
555struct trace_parser { 557struct trace_parser {
@@ -791,7 +793,8 @@ extern const char *__stop___trace_bprintk_fmt[];
791 793
792#undef FTRACE_ENTRY 794#undef FTRACE_ENTRY
793#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 795#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
794 extern struct ftrace_event_call event_##call; 796 extern struct ftrace_event_call \
797 __attribute__((__aligned__(4))) event_##call;
795#undef FTRACE_ENTRY_DUP 798#undef FTRACE_ENTRY_DUP
796#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 799#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
797 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 800 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..b9bc4d470177 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
307 return -1; 307 return -1;
308 if (percent_a > percent_b) 308 if (percent_a > percent_b)
309 return 1; 309 return 1;
310 else 310
311 return 0; 311 if (a->incorrect < b->incorrect)
312 return -1;
313 if (a->incorrect > b->incorrect)
314 return 1;
315
316 /*
317 * Since the above shows worse (incorrect) cases
318 * first, we continue that by showing best (correct)
319 * cases last.
320 */
321 if (a->correct > b->correct)
322 return -1;
323 if (a->correct < b->correct)
324 return 1;
325
326 return 0;
312} 327}
313 328
314static struct tracer_stat annotated_branch_stats = { 329static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7ba072a..9d589d8dcd1a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
@@ -83,7 +84,7 @@ u64 notrace trace_clock_global(void)
83 int this_cpu; 84 int this_cpu;
84 u64 now; 85 u64 now;
85 86
86 raw_local_irq_save(flags); 87 local_irq_save(flags);
87 88
88 this_cpu = raw_smp_processor_id(); 89 this_cpu = raw_smp_processor_id();
89 now = cpu_clock(this_cpu); 90 now = cpu_clock(this_cpu);
@@ -109,7 +110,7 @@ u64 notrace trace_clock_global(void)
109 arch_spin_unlock(&trace_clock_struct.lock); 110 arch_spin_unlock(&trace_clock_struct.lock);
110 111
111 out: 112 out:
112 raw_local_irq_restore(flags); 113 local_irq_restore(flags);
113 114
114 return now; 115 return now;
115} 116}
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
new file mode 100644
index 000000000000..0565bb42566f
--- /dev/null
+++ b/kernel/trace/trace_event_perf.c
@@ -0,0 +1,175 @@
1/*
2 * trace event based perf event profiling/tracing
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */
7
8#include <linux/module.h>
9#include <linux/kprobes.h>
10#include "trace.h"
11
12DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
13EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
14
15EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
16
17static char *perf_trace_buf;
18static char *perf_trace_buf_nmi;
19
20/*
21 * Force it to be aligned to unsigned long to avoid misaligned accesses
22 * suprises
23 */
24typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
25 perf_trace_t;
26
27/* Count the events in use (per event id, not per instance) */
28static int total_ref_count;
29
30static int perf_trace_event_enable(struct ftrace_event_call *event)
31{
32 char *buf;
33 int ret = -ENOMEM;
34
35 if (event->perf_refcount++ > 0)
36 return 0;
37
38 if (!total_ref_count) {
39 buf = (char *)alloc_percpu(perf_trace_t);
40 if (!buf)
41 goto fail_buf;
42
43 rcu_assign_pointer(perf_trace_buf, buf);
44
45 buf = (char *)alloc_percpu(perf_trace_t);
46 if (!buf)
47 goto fail_buf_nmi;
48
49 rcu_assign_pointer(perf_trace_buf_nmi, buf);
50 }
51
52 ret = event->perf_event_enable(event);
53 if (!ret) {
54 total_ref_count++;
55 return 0;
56 }
57
58fail_buf_nmi:
59 if (!total_ref_count) {
60 free_percpu(perf_trace_buf_nmi);
61 free_percpu(perf_trace_buf);
62 perf_trace_buf_nmi = NULL;
63 perf_trace_buf = NULL;
64 }
65fail_buf:
66 event->perf_refcount--;
67
68 return ret;
69}
70
71int perf_trace_enable(int event_id)
72{
73 struct ftrace_event_call *event;
74 int ret = -EINVAL;
75
76 mutex_lock(&event_mutex);
77 list_for_each_entry(event, &ftrace_events, list) {
78 if (event->id == event_id && event->perf_event_enable &&
79 try_module_get(event->mod)) {
80 ret = perf_trace_event_enable(event);
81 break;
82 }
83 }
84 mutex_unlock(&event_mutex);
85
86 return ret;
87}
88
89static void perf_trace_event_disable(struct ftrace_event_call *event)
90{
91 char *buf, *nmi_buf;
92
93 if (--event->perf_refcount > 0)
94 return;
95
96 event->perf_event_disable(event);
97
98 if (!--total_ref_count) {
99 buf = perf_trace_buf;
100 rcu_assign_pointer(perf_trace_buf, NULL);
101
102 nmi_buf = perf_trace_buf_nmi;
103 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
104
105 /*
106 * Ensure every events in profiling have finished before
107 * releasing the buffers
108 */
109 synchronize_sched();
110
111 free_percpu(buf);
112 free_percpu(nmi_buf);
113 }
114}
115
116void perf_trace_disable(int event_id)
117{
118 struct ftrace_event_call *event;
119
120 mutex_lock(&event_mutex);
121 list_for_each_entry(event, &ftrace_events, list) {
122 if (event->id == event_id) {
123 perf_trace_event_disable(event);
124 module_put(event->mod);
125 break;
126 }
127 }
128 mutex_unlock(&event_mutex);
129}
130
131__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
132 int *rctxp, unsigned long *irq_flags)
133{
134 struct trace_entry *entry;
135 char *trace_buf, *raw_data;
136 int pc, cpu;
137
138 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
139
140 pc = preempt_count();
141
142 /* Protect the per cpu buffer, begin the rcu read side */
143 local_irq_save(*irq_flags);
144
145 *rctxp = perf_swevent_get_recursion_context();
146 if (*rctxp < 0)
147 goto err_recursion;
148
149 cpu = smp_processor_id();
150
151 if (in_nmi())
152 trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
153 else
154 trace_buf = rcu_dereference_sched(perf_trace_buf);
155
156 if (!trace_buf)
157 goto err;
158
159 raw_data = per_cpu_ptr(trace_buf, cpu);
160
161 /* zero the dead bytes from align to not leak stack to user */
162 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
163
164 entry = (struct trace_entry *)raw_data;
165 tracing_generic_entry_update(entry, *irq_flags, pc);
166 entry->type = type;
167
168 return raw_data;
169err:
170 perf_swevent_put_recursion_context(*rctxp);
171err_recursion:
172 local_irq_restore(*irq_flags);
173 return NULL;
174}
175EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
deleted file mode 100644
index 9e25573242cf..000000000000
--- a/kernel/trace/trace_event_profile.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * trace event based perf counter profiling
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include "trace.h"
10
11
12char *perf_trace_buf;
13EXPORT_SYMBOL_GPL(perf_trace_buf);
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
17
18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
19
20/* Count the events in use (per event id, not per instance) */
21static int total_profile_count;
22
23static int ftrace_profile_enable_event(struct ftrace_event_call *event)
24{
25 char *buf;
26 int ret = -ENOMEM;
27
28 if (event->profile_count++ > 0)
29 return 0;
30
31 if (!total_profile_count) {
32 buf = (char *)alloc_percpu(perf_trace_t);
33 if (!buf)
34 goto fail_buf;
35
36 rcu_assign_pointer(perf_trace_buf, buf);
37
38 buf = (char *)alloc_percpu(perf_trace_t);
39 if (!buf)
40 goto fail_buf_nmi;
41
42 rcu_assign_pointer(perf_trace_buf_nmi, buf);
43 }
44
45 ret = event->profile_enable(event);
46 if (!ret) {
47 total_profile_count++;
48 return 0;
49 }
50
51fail_buf_nmi:
52 if (!total_profile_count) {
53 free_percpu(perf_trace_buf_nmi);
54 free_percpu(perf_trace_buf);
55 perf_trace_buf_nmi = NULL;
56 perf_trace_buf = NULL;
57 }
58fail_buf:
59 event->profile_count--;
60
61 return ret;
62}
63
64int ftrace_profile_enable(int event_id)
65{
66 struct ftrace_event_call *event;
67 int ret = -EINVAL;
68
69 mutex_lock(&event_mutex);
70 list_for_each_entry(event, &ftrace_events, list) {
71 if (event->id == event_id && event->profile_enable &&
72 try_module_get(event->mod)) {
73 ret = ftrace_profile_enable_event(event);
74 break;
75 }
76 }
77 mutex_unlock(&event_mutex);
78
79 return ret;
80}
81
82static void ftrace_profile_disable_event(struct ftrace_event_call *event)
83{
84 char *buf, *nmi_buf;
85
86 if (--event->profile_count > 0)
87 return;
88
89 event->profile_disable(event);
90
91 if (!--total_profile_count) {
92 buf = perf_trace_buf;
93 rcu_assign_pointer(perf_trace_buf, NULL);
94
95 nmi_buf = perf_trace_buf_nmi;
96 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
97
98 /*
99 * Ensure every events in profiling have finished before
100 * releasing the buffers
101 */
102 synchronize_sched();
103
104 free_percpu(buf);
105 free_percpu(nmi_buf);
106 }
107}
108
109void ftrace_profile_disable(int event_id)
110{
111 struct ftrace_event_call *event;
112
113 mutex_lock(&event_mutex);
114 list_for_each_entry(event, &ftrace_events, list) {
115 if (event->id == event_id) {
116 ftrace_profile_disable_event(event);
117 module_put(event->mod);
118 break;
119 }
120 }
121 mutex_unlock(&event_mutex);
122}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 189b09baf4fb..c697c7043349 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/slab.h>
18#include <linux/delay.h> 19#include <linux/delay.h>
19 20
20#include <asm/setup.h> 21#include <asm/setup.h>
@@ -60,10 +61,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
60 return 0; 61 return 0;
61 62
62err: 63err:
63 if (field) { 64 if (field)
64 kfree(field->name); 65 kfree(field->name);
65 kfree(field->type);
66 }
67 kfree(field); 66 kfree(field);
68 67
69 return -ENOMEM; 68 return -ENOMEM;
@@ -520,41 +519,16 @@ out:
520 return ret; 519 return ret;
521} 520}
522 521
523extern char *__bad_type_size(void);
524
525#undef FIELD
526#define FIELD(type, name) \
527 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
528 #type, "common_" #name, offsetof(typeof(field), name), \
529 sizeof(field.name), is_signed_type(type)
530
531static int trace_write_header(struct trace_seq *s)
532{
533 struct trace_entry field;
534
535 /* struct trace_entry */
536 return trace_seq_printf(s,
537 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
538 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
539 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
540 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
541 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
542 "\n",
543 FIELD(unsigned short, type),
544 FIELD(unsigned char, flags),
545 FIELD(unsigned char, preempt_count),
546 FIELD(int, pid),
547 FIELD(int, lock_depth));
548}
549
550static ssize_t 522static ssize_t
551event_format_read(struct file *filp, char __user *ubuf, size_t cnt, 523event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
552 loff_t *ppos) 524 loff_t *ppos)
553{ 525{
554 struct ftrace_event_call *call = filp->private_data; 526 struct ftrace_event_call *call = filp->private_data;
527 struct ftrace_event_field *field;
555 struct trace_seq *s; 528 struct trace_seq *s;
529 int common_field_count = 5;
556 char *buf; 530 char *buf;
557 int r; 531 int r = 0;
558 532
559 if (*ppos) 533 if (*ppos)
560 return 0; 534 return 0;
@@ -565,14 +539,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
565 539
566 trace_seq_init(s); 540 trace_seq_init(s);
567 541
568 /* If any of the first writes fail, so will the show_format. */
569
570 trace_seq_printf(s, "name: %s\n", call->name); 542 trace_seq_printf(s, "name: %s\n", call->name);
571 trace_seq_printf(s, "ID: %d\n", call->id); 543 trace_seq_printf(s, "ID: %d\n", call->id);
572 trace_seq_printf(s, "format:\n"); 544 trace_seq_printf(s, "format:\n");
573 trace_write_header(s);
574 545
575 r = call->show_format(call, s); 546 list_for_each_entry_reverse(field, &call->fields, link) {
547 /*
548 * Smartly shows the array type(except dynamic array).
549 * Normal:
550 * field:TYPE VAR
551 * If TYPE := TYPE[LEN], it is shown:
552 * field:TYPE VAR[LEN]
553 */
554 const char *array_descriptor = strchr(field->type, '[');
555
556 if (!strncmp(field->type, "__data_loc", 10))
557 array_descriptor = NULL;
558
559 if (!array_descriptor) {
560 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
561 "\tsize:%u;\tsigned:%d;\n",
562 field->type, field->name, field->offset,
563 field->size, !!field->is_signed);
564 } else {
565 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
566 "\tsize:%u;\tsigned:%d;\n",
567 (int)(array_descriptor - field->type),
568 field->type, field->name,
569 array_descriptor, field->offset,
570 field->size, !!field->is_signed);
571 }
572
573 if (--common_field_count == 0)
574 r = trace_seq_printf(s, "\n");
575
576 if (!r)
577 break;
578 }
579
580 if (r)
581 r = trace_seq_printf(s, "\nprint fmt: %s\n",
582 call->print_fmt);
583
576 if (!r) { 584 if (!r) {
577 /* 585 /*
578 * ug! The format output is bigger than a PAGE!! 586 * ug! The format output is bigger than a PAGE!!
@@ -931,7 +939,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
931 trace_create_file("enable", 0644, call->dir, call, 939 trace_create_file("enable", 0644, call->dir, call,
932 enable); 940 enable);
933 941
934 if (call->id && call->profile_enable) 942 if (call->id && call->perf_event_enable)
935 trace_create_file("id", 0444, call->dir, call, 943 trace_create_file("id", 0444, call->dir, call,
936 id); 944 id);
937 945
@@ -948,10 +956,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
948 filter); 956 filter);
949 } 957 }
950 958
951 /* A trace may not want to export its format */
952 if (!call->show_format)
953 return 0;
954
955 trace_create_file("format", 0444, call->dir, call, 959 trace_create_file("format", 0444, call->dir, call,
956 format); 960 format);
957 961
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e42af9aad69f..88c0b6dbd7fe 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,6 +22,7 @@
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h> 24#include <linux/perf_event.h>
25#include <linux/slab.h>
25 26
26#include "trace.h" 27#include "trace.h"
27#include "trace_output.h" 28#include "trace_output.h"
@@ -1371,7 +1372,7 @@ out_unlock:
1371 return err; 1372 return err;
1372} 1373}
1373 1374
1374#ifdef CONFIG_EVENT_PROFILE 1375#ifdef CONFIG_PERF_EVENTS
1375 1376
1376void ftrace_profile_free_filter(struct perf_event *event) 1377void ftrace_profile_free_filter(struct perf_event *event)
1377{ 1378{
@@ -1439,5 +1440,5 @@ out_unlock:
1439 return err; 1440 return err;
1440} 1441}
1441 1442
1442#endif /* CONFIG_EVENT_PROFILE */ 1443#endif /* CONFIG_PERF_EVENTS */
1443 1444
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d4fa5dc1ee4e..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \
62 62
63#include "trace_entries.h" 63#include "trace_entries.h"
64 64
65
66#undef __field
67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \
71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \
73 return 0;
74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item), \
81 is_signed_type(type)); \
82 if (!ret) \
83 return 0;
84
85#undef __array
86#define __array(type, item, len) \
87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
89 offsetof(typeof(field), item), \
90 sizeof(field.item), is_signed_type(type)); \
91 if (!ret) \
92 return 0;
93
94#undef __array_desc
95#define __array_desc(type, container, item, len) \
96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
98 offsetof(typeof(field), container.item), \
99 sizeof(field.container.item), \
100 is_signed_type(type)); \
101 if (!ret) \
102 return 0;
103
104#undef __dynamic_array
105#define __dynamic_array(type, item) \
106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
110 if (!ret) \
111 return 0;
112
113#undef F_printk
114#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
115
116#undef __entry
117#define __entry REC
118
119#undef FTRACE_ENTRY
120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
121static int \
122ftrace_format_##name(struct ftrace_event_call *unused, \
123 struct trace_seq *s) \
124{ \
125 struct struct_name field __attribute__((unused)); \
126 int ret = 0; \
127 \
128 tstruct; \
129 \
130 trace_seq_printf(s, "\nprint fmt: " print); \
131 \
132 return ret; \
133}
134
135#include "trace_entries.h"
136
137#undef __field 65#undef __field
138#define __field(type, item) \ 66#define __field(type, item) \
139 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
@@ -175,7 +103,12 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
175 return ret; 103 return ret;
176 104
177#undef __dynamic_array 105#undef __dynamic_array
178#define __dynamic_array(type, item) 106#define __dynamic_array(type, item) \
107 ret = trace_define_field(event_call, #type, #item, \
108 offsetof(typeof(field), item), \
109 0, is_signed_type(type), FILTER_OTHER);\
110 if (ret) \
111 return ret;
179 112
180#undef FTRACE_ENTRY 113#undef FTRACE_ENTRY
181#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 114#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -198,6 +131,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
198 return 0; 131 return 0;
199} 132}
200 133
134#undef __entry
135#define __entry REC
136
201#undef __field 137#undef __field
202#define __field(type, item) 138#define __field(type, item)
203 139
@@ -213,6 +149,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
213#undef __dynamic_array 149#undef __dynamic_array
214#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
215 151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
216#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
217#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
218 \ 157 \
@@ -223,7 +162,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
223 .id = type, \ 162 .id = type, \
224 .system = __stringify(TRACE_SYSTEM), \ 163 .system = __stringify(TRACE_SYSTEM), \
225 .raw_init = ftrace_raw_init_event, \ 164 .raw_init = ftrace_raw_init_event, \
226 .show_format = ftrace_format_##call, \ 165 .print_fmt = print, \
227 .define_fields = ftrace_define_fields_##call, \ 166 .define_fields = ftrace_define_fields_##call, \
228}; \ 167}; \
229 168
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1342c5d37cf..9aed1a5cf553 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -9,6 +9,7 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/slab.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14#include "trace.h" 15#include "trace.h"
@@ -18,6 +19,7 @@ struct fgraph_cpu_data {
18 pid_t last_pid; 19 pid_t last_pid;
19 int depth; 20 int depth;
20 int ignore; 21 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
21}; 23};
22 24
23struct fgraph_data { 25struct fgraph_data {
@@ -187,7 +189,7 @@ static int __trace_graph_entry(struct trace_array *tr,
187 struct ring_buffer *buffer = tr->buffer; 189 struct ring_buffer *buffer = tr->buffer;
188 struct ftrace_graph_ent_entry *entry; 190 struct ftrace_graph_ent_entry *entry;
189 191
190 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 192 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
191 return 0; 193 return 0;
192 194
193 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 195 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -212,13 +214,11 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
212 int cpu; 214 int cpu;
213 int pc; 215 int pc;
214 216
215 if (unlikely(!tr))
216 return 0;
217
218 if (!ftrace_trace_task(current)) 217 if (!ftrace_trace_task(current))
219 return 0; 218 return 0;
220 219
221 if (!ftrace_graph_addr(trace->func)) 220 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func)))
222 return 0; 222 return 0;
223 223
224 local_irq_save(flags); 224 local_irq_save(flags);
@@ -231,9 +231,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
231 } else { 231 } else {
232 ret = 0; 232 ret = 0;
233 } 233 }
234 /* Only do the atomic if it is not already set */
235 if (!test_tsk_trace_graph(current))
236 set_tsk_trace_graph(current);
237 234
238 atomic_dec(&data->disabled); 235 atomic_dec(&data->disabled);
239 local_irq_restore(flags); 236 local_irq_restore(flags);
@@ -241,6 +238,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
241 return ret; 238 return ret;
242} 239}
243 240
241int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
242{
243 if (tracing_thresh)
244 return 1;
245 else
246 return trace_graph_entry(trace);
247}
248
244static void __trace_graph_return(struct trace_array *tr, 249static void __trace_graph_return(struct trace_array *tr,
245 struct ftrace_graph_ret *trace, 250 struct ftrace_graph_ret *trace,
246 unsigned long flags, 251 unsigned long flags,
@@ -251,7 +256,7 @@ static void __trace_graph_return(struct trace_array *tr,
251 struct ring_buffer *buffer = tr->buffer; 256 struct ring_buffer *buffer = tr->buffer;
252 struct ftrace_graph_ret_entry *entry; 257 struct ftrace_graph_ret_entry *entry;
253 258
254 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 259 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
255 return; 260 return;
256 261
257 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 262 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -281,19 +286,39 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
281 pc = preempt_count(); 286 pc = preempt_count();
282 __trace_graph_return(tr, trace, flags, pc); 287 __trace_graph_return(tr, trace, flags, pc);
283 } 288 }
284 if (!trace->depth)
285 clear_tsk_trace_graph(current);
286 atomic_dec(&data->disabled); 289 atomic_dec(&data->disabled);
287 local_irq_restore(flags); 290 local_irq_restore(flags);
288} 291}
289 292
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296
297 /* Make graph_array visible before we start tracing */
298
299 smp_mb();
300}
301
302void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
303{
304 if (tracing_thresh &&
305 (trace->rettime - trace->calltime < tracing_thresh))
306 return;
307 else
308 trace_graph_return(trace);
309}
310
290static int graph_trace_init(struct trace_array *tr) 311static int graph_trace_init(struct trace_array *tr)
291{ 312{
292 int ret; 313 int ret;
293 314
294 graph_array = tr; 315 set_graph_array(tr);
295 ret = register_ftrace_graph(&trace_graph_return, 316 if (tracing_thresh)
296 &trace_graph_entry); 317 ret = register_ftrace_graph(&trace_graph_thresh_return,
318 &trace_graph_thresh_entry);
319 else
320 ret = register_ftrace_graph(&trace_graph_return,
321 &trace_graph_entry);
297 if (ret) 322 if (ret)
298 return ret; 323 return ret;
299 tracing_start_cmdline_record(); 324 tracing_start_cmdline_record();
@@ -301,11 +326,6 @@ static int graph_trace_init(struct trace_array *tr)
301 return 0; 326 return 0;
302} 327}
303 328
304void set_graph_array(struct trace_array *tr)
305{
306 graph_array = tr;
307}
308
309static void graph_trace_reset(struct trace_array *tr) 329static void graph_trace_reset(struct trace_array *tr)
310{ 330{
311 tracing_stop_cmdline_record(); 331 tracing_stop_cmdline_record();
@@ -673,15 +693,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
673 duration = graph_ret->rettime - graph_ret->calltime; 693 duration = graph_ret->rettime - graph_ret->calltime;
674 694
675 if (data) { 695 if (data) {
696 struct fgraph_cpu_data *cpu_data;
676 int cpu = iter->cpu; 697 int cpu = iter->cpu;
677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 698
699 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
678 700
679 /* 701 /*
680 * Comments display at + 1 to depth. Since 702 * Comments display at + 1 to depth. Since
681 * this is a leaf function, keep the comments 703 * this is a leaf function, keep the comments
682 * equal to this depth. 704 * equal to this depth.
683 */ 705 */
684 *depth = call->depth - 1; 706 cpu_data->depth = call->depth - 1;
707
708 /* No need to keep this function around for this depth */
709 if (call->depth < FTRACE_RETFUNC_DEPTH)
710 cpu_data->enter_funcs[call->depth] = 0;
685 } 711 }
686 712
687 /* Overhead */ 713 /* Overhead */
@@ -721,10 +747,15 @@ print_graph_entry_nested(struct trace_iterator *iter,
721 int i; 747 int i;
722 748
723 if (data) { 749 if (data) {
750 struct fgraph_cpu_data *cpu_data;
724 int cpu = iter->cpu; 751 int cpu = iter->cpu;
725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
726 752
727 *depth = call->depth; 753 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
754 cpu_data->depth = call->depth;
755
756 /* Save this function pointer to see if the exit matches */
757 if (call->depth < FTRACE_RETFUNC_DEPTH)
758 cpu_data->enter_funcs[call->depth] = call->func;
728 } 759 }
729 760
730 /* No overhead */ 761 /* No overhead */
@@ -854,19 +885,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
854 struct fgraph_data *data = iter->private; 885 struct fgraph_data *data = iter->private;
855 pid_t pid = ent->pid; 886 pid_t pid = ent->pid;
856 int cpu = iter->cpu; 887 int cpu = iter->cpu;
888 int func_match = 1;
857 int ret; 889 int ret;
858 int i; 890 int i;
859 891
860 if (data) { 892 if (data) {
893 struct fgraph_cpu_data *cpu_data;
861 int cpu = iter->cpu; 894 int cpu = iter->cpu;
862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 895
896 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
863 897
864 /* 898 /*
865 * Comments display at + 1 to depth. This is the 899 * Comments display at + 1 to depth. This is the
866 * return from a function, we now want the comments 900 * return from a function, we now want the comments
867 * to display at the same level of the bracket. 901 * to display at the same level of the bracket.
868 */ 902 */
869 *depth = trace->depth - 1; 903 cpu_data->depth = trace->depth - 1;
904
905 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
906 if (cpu_data->enter_funcs[trace->depth] != trace->func)
907 func_match = 0;
908 cpu_data->enter_funcs[trace->depth] = 0;
909 }
870 } 910 }
871 911
872 if (print_graph_prologue(iter, s, 0, 0)) 912 if (print_graph_prologue(iter, s, 0, 0))
@@ -891,9 +931,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
891 return TRACE_TYPE_PARTIAL_LINE; 931 return TRACE_TYPE_PARTIAL_LINE;
892 } 932 }
893 933
894 ret = trace_seq_printf(s, "}\n"); 934 /*
895 if (!ret) 935 * If the return function does not have a matching entry,
896 return TRACE_TYPE_PARTIAL_LINE; 936 * then the entry was lost. Instead of just printing
937 * the '}' and letting the user guess what function this
938 * belongs to, write out the function name.
939 */
940 if (func_match) {
941 ret = trace_seq_printf(s, "}\n");
942 if (!ret)
943 return TRACE_TYPE_PARTIAL_LINE;
944 } else {
945 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
946 if (!ret)
947 return TRACE_TYPE_PARTIAL_LINE;
948 }
897 949
898 /* Overrun */ 950 /* Overrun */
899 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 951 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6ea90c0e2c96..1251e367bae9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
91 return retval; 91 return retval;
92} 92}
93 93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, 94static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy) 95 void *dummy)
101{ 96{
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{ 226{
232 int ret = -EINVAL; 227 int ret = -EINVAL;
233 228
234 if (ff->func == fetch_argument) 229 if (ff->func == fetch_register) {
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name; 230 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data)); 231 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name); 232 ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
489 } 482 }
490 } else 483 } else
491 ret = -EINVAL; 484 ret = -EINVAL;
492 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
493 ret = strict_strtoul(arg + 3, 10, &param);
494 if (ret || param > PARAM_MAX_ARGS)
495 ret = -EINVAL;
496 else {
497 ff->func = fetch_argument;
498 ff->data = (void *)param;
499 }
500 } else 485 } else
501 ret = -EINVAL; 486 ret = -EINVAL;
502 return ret; 487 return ret;
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
611 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 596 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
612 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 597 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
613 * Fetch args: 598 * Fetch args:
614 * $argN : fetch Nth of function argument. (N:0-)
615 * $retval : fetch return value 599 * $retval : fetch return value
616 * $stack : fetch stack address 600 * $stack : fetch stack address
617 * $stackN : fetch Nth of stack (N:0-) 601 * $stackN : fetch Nth of stack (N:0-)
@@ -651,12 +635,12 @@ static int create_trace_probe(int argc, char **argv)
651 event = strchr(group, '/') + 1; 635 event = strchr(group, '/') + 1;
652 event[-1] = '\0'; 636 event[-1] = '\0';
653 if (strlen(group) == 0) { 637 if (strlen(group) == 0) {
654 pr_info("Group name is not specifiled\n"); 638 pr_info("Group name is not specified\n");
655 return -EINVAL; 639 return -EINVAL;
656 } 640 }
657 } 641 }
658 if (strlen(event) == 0) { 642 if (strlen(event) == 0) {
659 pr_info("Event name is not specifiled\n"); 643 pr_info("Event name is not specified\n");
660 return -EINVAL; 644 return -EINVAL;
661 } 645 }
662 } 646 }
@@ -689,7 +673,7 @@ static int create_trace_probe(int argc, char **argv)
689 return -EINVAL; 673 return -EINVAL;
690 } 674 }
691 /* an address specified */ 675 /* an address specified */
692 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); 676 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
693 if (ret) { 677 if (ret) {
694 pr_info("Failed to parse address.\n"); 678 pr_info("Failed to parse address.\n");
695 return ret; 679 return ret;
@@ -958,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = {
958}; 942};
959 943
960/* Kprobe handler */ 944/* Kprobe handler */
961static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 945static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
962{ 946{
963 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 947 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
964 struct kprobe_trace_entry *entry; 948 struct kprobe_trace_entry *entry;
@@ -978,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
978 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 962 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
979 irq_flags, pc); 963 irq_flags, pc);
980 if (!event) 964 if (!event)
981 return 0; 965 return;
982 966
983 entry = ring_buffer_event_data(event); 967 entry = ring_buffer_event_data(event);
984 entry->nargs = tp->nr_args; 968 entry->nargs = tp->nr_args;
@@ -988,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
988 972
989 if (!filter_current_check_discard(buffer, call, entry, event)) 973 if (!filter_current_check_discard(buffer, call, entry, event))
990 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 974 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
991 return 0;
992} 975}
993 976
994/* Kretprobe handler */ 977/* Kretprobe handler */
995static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, 978static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
996 struct pt_regs *regs) 979 struct pt_regs *regs)
997{ 980{
998 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 981 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1011,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
1011 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 994 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
1012 irq_flags, pc); 995 irq_flags, pc);
1013 if (!event) 996 if (!event)
1014 return 0; 997 return;
1015 998
1016 entry = ring_buffer_event_data(event); 999 entry = ring_buffer_event_data(event);
1017 entry->nargs = tp->nr_args; 1000 entry->nargs = tp->nr_args;
@@ -1022,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
1022 1005
1023 if (!filter_current_check_discard(buffer, call, entry, event)) 1006 if (!filter_current_check_discard(buffer, call, entry, event))
1024 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1007 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1025
1026 return 0;
1027} 1008}
1028 1009
1029/* Event entry printers */ 1010/* Event entry printers */
@@ -1174,216 +1155,127 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1174 return 0; 1155 return 0;
1175} 1156}
1176 1157
1177static int __probe_event_show_format(struct trace_seq *s, 1158static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1178 struct trace_probe *tp, const char *fmt,
1179 const char *arg)
1180{ 1159{
1181 int i; 1160 int i;
1161 int pos = 0;
1182 1162
1183 /* Show format */ 1163 const char *fmt, *arg;
1184 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1185 return 0;
1186 1164
1187 for (i = 0; i < tp->nr_args; i++) 1165 if (!probe_is_return(tp)) {
1188 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) 1166 fmt = "(%lx)";
1189 return 0; 1167 arg = "REC->" FIELD_STRING_IP;
1168 } else {
1169 fmt = "(%lx <- %lx)";
1170 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1171 }
1190 1172
1191 if (!trace_seq_printf(s, "\", %s", arg)) 1173 /* When len=0, we just calculate the needed length */
1192 return 0; 1174#define LEN_OR_ZERO (len ? len - pos : 0)
1193 1175
1194 for (i = 0; i < tp->nr_args; i++) 1176 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1195 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1196 return 0;
1197
1198 return trace_seq_puts(s, "\n");
1199}
1200 1177
1201#undef SHOW_FIELD 1178 for (i = 0; i < tp->nr_args; i++) {
1202#define SHOW_FIELD(type, item, name) \ 1179 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx",
1203 do { \ 1180 tp->args[i].name);
1204 ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ 1181 }
1205 "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
1206 (unsigned int)offsetof(typeof(field), item),\
1207 (unsigned int)sizeof(type), \
1208 is_signed_type(type)); \
1209 if (!ret) \
1210 return 0; \
1211 } while (0)
1212 1182
1213static int kprobe_event_show_format(struct ftrace_event_call *call, 1183 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1214 struct trace_seq *s)
1215{
1216 struct kprobe_trace_entry field __attribute__((unused));
1217 int ret, i;
1218 struct trace_probe *tp = (struct trace_probe *)call->data;
1219 1184
1220 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); 1185 for (i = 0; i < tp->nr_args; i++) {
1221 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1186 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1187 tp->args[i].name);
1188 }
1222 1189
1223 /* Show fields */ 1190#undef LEN_OR_ZERO
1224 for (i = 0; i < tp->nr_args; i++)
1225 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1226 trace_seq_puts(s, "\n");
1227 1191
1228 return __probe_event_show_format(s, tp, "(%lx)", 1192 /* return the length of print_fmt */
1229 "REC->" FIELD_STRING_IP); 1193 return pos;
1230} 1194}
1231 1195
1232static int kretprobe_event_show_format(struct ftrace_event_call *call, 1196static int set_print_fmt(struct trace_probe *tp)
1233 struct trace_seq *s)
1234{ 1197{
1235 struct kretprobe_trace_entry field __attribute__((unused)); 1198 int len;
1236 int ret, i; 1199 char *print_fmt;
1237 struct trace_probe *tp = (struct trace_probe *)call->data;
1238 1200
1239 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); 1201 /* First: called with 0 length to calculate the needed length */
1240 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); 1202 len = __set_print_fmt(tp, NULL, 0);
1241 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1203 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1204 if (!print_fmt)
1205 return -ENOMEM;
1242 1206
1243 /* Show fields */ 1207 /* Second: actually write the @print_fmt */
1244 for (i = 0; i < tp->nr_args; i++) 1208 __set_print_fmt(tp, print_fmt, len + 1);
1245 SHOW_FIELD(unsigned long, args[i], tp->args[i].name); 1209 tp->call.print_fmt = print_fmt;
1246 trace_seq_puts(s, "\n");
1247 1210
1248 return __probe_event_show_format(s, tp, "(%lx <- %lx)", 1211 return 0;
1249 "REC->" FIELD_STRING_FUNC
1250 ", REC->" FIELD_STRING_RETIP);
1251} 1212}
1252 1213
1253#ifdef CONFIG_EVENT_PROFILE 1214#ifdef CONFIG_PERF_EVENTS
1254 1215
1255/* Kprobe profile handler */ 1216/* Kprobe profile handler */
1256static __kprobes int kprobe_profile_func(struct kprobe *kp, 1217static __kprobes void kprobe_perf_func(struct kprobe *kp,
1257 struct pt_regs *regs) 1218 struct pt_regs *regs)
1258{ 1219{
1259 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1260 struct ftrace_event_call *call = &tp->call; 1221 struct ftrace_event_call *call = &tp->call;
1261 struct kprobe_trace_entry *entry; 1222 struct kprobe_trace_entry *entry;
1262 struct trace_entry *ent; 1223 int size, __size, i;
1263 int size, __size, i, pc, __cpu;
1264 unsigned long irq_flags; 1224 unsigned long irq_flags;
1265 char *trace_buf;
1266 char *raw_data;
1267 int rctx; 1225 int rctx;
1268 1226
1269 pc = preempt_count();
1270 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1271 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1272 size -= sizeof(u32); 1229 size -= sizeof(u32);
1273 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1230 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1274 "profile buffer not large enough")) 1231 "profile buffer not large enough"))
1275 return 0; 1232 return;
1276
1277 /*
1278 * Protect the non nmi buffer
1279 * This also protects the rcu read side
1280 */
1281 local_irq_save(irq_flags);
1282
1283 rctx = perf_swevent_get_recursion_context();
1284 if (rctx < 0)
1285 goto end_recursion;
1286
1287 __cpu = smp_processor_id();
1288
1289 if (in_nmi())
1290 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1291 else
1292 trace_buf = rcu_dereference(perf_trace_buf);
1293 1233
1294 if (!trace_buf) 1234 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1295 goto end; 1235 if (!entry)
1296 1236 return;
1297 raw_data = per_cpu_ptr(trace_buf, __cpu);
1298
1299 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1300 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1301 entry = (struct kprobe_trace_entry *)raw_data;
1302 ent = &entry->ent;
1303 1237
1304 tracing_generic_entry_update(ent, irq_flags, pc);
1305 ent->type = call->id;
1306 entry->nargs = tp->nr_args; 1238 entry->nargs = tp->nr_args;
1307 entry->ip = (unsigned long)kp->addr; 1239 entry->ip = (unsigned long)kp->addr;
1308 for (i = 0; i < tp->nr_args; i++) 1240 for (i = 0; i < tp->nr_args; i++)
1309 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1310 perf_tp_event(call->id, entry->ip, 1, entry, size);
1311
1312end:
1313 perf_swevent_put_recursion_context(rctx);
1314end_recursion:
1315 local_irq_restore(irq_flags);
1316 1242
1317 return 0; 1243 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
1318} 1244}
1319 1245
1320/* Kretprobe profile handler */ 1246/* Kretprobe profile handler */
1321static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, 1247static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1322 struct pt_regs *regs) 1248 struct pt_regs *regs)
1323{ 1249{
1324 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1325 struct ftrace_event_call *call = &tp->call; 1251 struct ftrace_event_call *call = &tp->call;
1326 struct kretprobe_trace_entry *entry; 1252 struct kretprobe_trace_entry *entry;
1327 struct trace_entry *ent; 1253 int size, __size, i;
1328 int size, __size, i, pc, __cpu;
1329 unsigned long irq_flags; 1254 unsigned long irq_flags;
1330 char *trace_buf;
1331 char *raw_data;
1332 int rctx; 1255 int rctx;
1333 1256
1334 pc = preempt_count();
1335 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1336 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1337 size -= sizeof(u32); 1259 size -= sizeof(u32);
1338 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1260 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1339 "profile buffer not large enough")) 1261 "profile buffer not large enough"))
1340 return 0; 1262 return;
1341
1342 /*
1343 * Protect the non nmi buffer
1344 * This also protects the rcu read side
1345 */
1346 local_irq_save(irq_flags);
1347
1348 rctx = perf_swevent_get_recursion_context();
1349 if (rctx < 0)
1350 goto end_recursion;
1351
1352 __cpu = smp_processor_id();
1353 1263
1354 if (in_nmi()) 1264 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1355 trace_buf = rcu_dereference(perf_trace_buf_nmi); 1265 if (!entry)
1356 else 1266 return;
1357 trace_buf = rcu_dereference(perf_trace_buf);
1358
1359 if (!trace_buf)
1360 goto end;
1361
1362 raw_data = per_cpu_ptr(trace_buf, __cpu);
1363
1364 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1365 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1366 entry = (struct kretprobe_trace_entry *)raw_data;
1367 ent = &entry->ent;
1368 1267
1369 tracing_generic_entry_update(ent, irq_flags, pc);
1370 ent->type = call->id;
1371 entry->nargs = tp->nr_args; 1268 entry->nargs = tp->nr_args;
1372 entry->func = (unsigned long)tp->rp.kp.addr; 1269 entry->func = (unsigned long)tp->rp.kp.addr;
1373 entry->ret_ip = (unsigned long)ri->ret_addr; 1270 entry->ret_ip = (unsigned long)ri->ret_addr;
1374 for (i = 0; i < tp->nr_args; i++) 1271 for (i = 0; i < tp->nr_args; i++)
1375 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1376 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1377
1378end:
1379 perf_swevent_put_recursion_context(rctx);
1380end_recursion:
1381 local_irq_restore(irq_flags);
1382 1273
1383 return 0; 1274 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
1275 irq_flags, regs);
1384} 1276}
1385 1277
1386static int probe_profile_enable(struct ftrace_event_call *call) 1278static int probe_perf_enable(struct ftrace_event_call *call)
1387{ 1279{
1388 struct trace_probe *tp = (struct trace_probe *)call->data; 1280 struct trace_probe *tp = (struct trace_probe *)call->data;
1389 1281
@@ -1395,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
1395 return enable_kprobe(&tp->rp.kp); 1287 return enable_kprobe(&tp->rp.kp);
1396} 1288}
1397 1289
1398static void probe_profile_disable(struct ftrace_event_call *call) 1290static void probe_perf_disable(struct ftrace_event_call *call)
1399{ 1291{
1400 struct trace_probe *tp = (struct trace_probe *)call->data; 1292 struct trace_probe *tp = (struct trace_probe *)call->data;
1401 1293
@@ -1408,7 +1300,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
1408 disable_kprobe(&tp->rp.kp); 1300 disable_kprobe(&tp->rp.kp);
1409 } 1301 }
1410} 1302}
1411#endif /* CONFIG_EVENT_PROFILE */ 1303#endif /* CONFIG_PERF_EVENTS */
1412 1304
1413 1305
1414static __kprobes 1306static __kprobes
@@ -1418,10 +1310,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1418 1310
1419 if (tp->flags & TP_FLAG_TRACE) 1311 if (tp->flags & TP_FLAG_TRACE)
1420 kprobe_trace_func(kp, regs); 1312 kprobe_trace_func(kp, regs);
1421#ifdef CONFIG_EVENT_PROFILE 1313#ifdef CONFIG_PERF_EVENTS
1422 if (tp->flags & TP_FLAG_PROFILE) 1314 if (tp->flags & TP_FLAG_PROFILE)
1423 kprobe_profile_func(kp, regs); 1315 kprobe_perf_func(kp, regs);
1424#endif /* CONFIG_EVENT_PROFILE */ 1316#endif
1425 return 0; /* We don't tweek kernel, so just return 0 */ 1317 return 0; /* We don't tweek kernel, so just return 0 */
1426} 1318}
1427 1319
@@ -1432,10 +1324,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1432 1324
1433 if (tp->flags & TP_FLAG_TRACE) 1325 if (tp->flags & TP_FLAG_TRACE)
1434 kretprobe_trace_func(ri, regs); 1326 kretprobe_trace_func(ri, regs);
1435#ifdef CONFIG_EVENT_PROFILE 1327#ifdef CONFIG_PERF_EVENTS
1436 if (tp->flags & TP_FLAG_PROFILE) 1328 if (tp->flags & TP_FLAG_PROFILE)
1437 kretprobe_profile_func(ri, regs); 1329 kretprobe_perf_func(ri, regs);
1438#endif /* CONFIG_EVENT_PROFILE */ 1330#endif
1439 return 0; /* We don't tweek kernel, so just return 0 */ 1331 return 0; /* We don't tweek kernel, so just return 0 */
1440} 1332}
1441 1333
@@ -1448,30 +1340,33 @@ static int register_probe_event(struct trace_probe *tp)
1448 if (probe_is_return(tp)) { 1340 if (probe_is_return(tp)) {
1449 tp->event.trace = print_kretprobe_event; 1341 tp->event.trace = print_kretprobe_event;
1450 call->raw_init = probe_event_raw_init; 1342 call->raw_init = probe_event_raw_init;
1451 call->show_format = kretprobe_event_show_format;
1452 call->define_fields = kretprobe_event_define_fields; 1343 call->define_fields = kretprobe_event_define_fields;
1453 } else { 1344 } else {
1454 tp->event.trace = print_kprobe_event; 1345 tp->event.trace = print_kprobe_event;
1455 call->raw_init = probe_event_raw_init; 1346 call->raw_init = probe_event_raw_init;
1456 call->show_format = kprobe_event_show_format;
1457 call->define_fields = kprobe_event_define_fields; 1347 call->define_fields = kprobe_event_define_fields;
1458 } 1348 }
1349 if (set_print_fmt(tp) < 0)
1350 return -ENOMEM;
1459 call->event = &tp->event; 1351 call->event = &tp->event;
1460 call->id = register_ftrace_event(&tp->event); 1352 call->id = register_ftrace_event(&tp->event);
1461 if (!call->id) 1353 if (!call->id) {
1354 kfree(call->print_fmt);
1462 return -ENODEV; 1355 return -ENODEV;
1356 }
1463 call->enabled = 0; 1357 call->enabled = 0;
1464 call->regfunc = probe_event_enable; 1358 call->regfunc = probe_event_enable;
1465 call->unregfunc = probe_event_disable; 1359 call->unregfunc = probe_event_disable;
1466 1360
1467#ifdef CONFIG_EVENT_PROFILE 1361#ifdef CONFIG_PERF_EVENTS
1468 call->profile_enable = probe_profile_enable; 1362 call->perf_event_enable = probe_perf_enable;
1469 call->profile_disable = probe_profile_disable; 1363 call->perf_event_disable = probe_perf_disable;
1470#endif 1364#endif
1471 call->data = tp; 1365 call->data = tp;
1472 ret = trace_add_event_call(call); 1366 ret = trace_add_event_call(call);
1473 if (ret) { 1367 if (ret) {
1474 pr_info("Failed to register kprobe event: %s\n", call->name); 1368 pr_info("Failed to register kprobe event: %s\n", call->name);
1369 kfree(call->print_fmt);
1475 unregister_ftrace_event(&tp->event); 1370 unregister_ftrace_event(&tp->event);
1476 } 1371 }
1477 return ret; 1372 return ret;
@@ -1481,6 +1376,7 @@ static void unregister_probe_event(struct trace_probe *tp)
1481{ 1376{
1482 /* tp->event is unregistered in trace_remove_event_call() */ 1377 /* tp->event is unregistered in trace_remove_event_call() */
1483 trace_remove_event_call(&tp->call); 1378 trace_remove_event_call(&tp->call);
1379 kfree(tp->call.print_fmt);
1484} 1380}
1485 1381
1486/* Make a debugfs interface for controling probe points */ 1382/* Make a debugfs interface for controling probe points */
@@ -1523,28 +1419,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1523 1419
1524static __init int kprobe_trace_self_tests_init(void) 1420static __init int kprobe_trace_self_tests_init(void)
1525{ 1421{
1526 int ret; 1422 int ret, warn = 0;
1527 int (*target)(int, int, int, int, int, int); 1423 int (*target)(int, int, int, int, int, int);
1424 struct trace_probe *tp;
1528 1425
1529 target = kprobe_trace_selftest_target; 1426 target = kprobe_trace_selftest_target;
1530 1427
1531 pr_info("Testing kprobe tracing: "); 1428 pr_info("Testing kprobe tracing: ");
1532 1429
1533 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1430 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1534 "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); 1431 "$stack $stack0 +0($stack)");
1535 if (WARN_ON_ONCE(ret)) 1432 if (WARN_ON_ONCE(ret)) {
1536 pr_warning("error enabling function entry\n"); 1433 pr_warning("error on probing function entry.\n");
1434 warn++;
1435 } else {
1436 /* Enable trace point */
1437 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1438 if (WARN_ON_ONCE(tp == NULL)) {
1439 pr_warning("error on getting new probe.\n");
1440 warn++;
1441 } else
1442 probe_event_enable(&tp->call);
1443 }
1537 1444
1538 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1445 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1539 "$retval"); 1446 "$retval");
1540 if (WARN_ON_ONCE(ret)) 1447 if (WARN_ON_ONCE(ret)) {
1541 pr_warning("error enabling function return\n"); 1448 pr_warning("error on probing function return.\n");
1449 warn++;
1450 } else {
1451 /* Enable trace point */
1452 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1453 if (WARN_ON_ONCE(tp == NULL)) {
1454 pr_warning("error on getting new probe.\n");
1455 warn++;
1456 } else
1457 probe_event_enable(&tp->call);
1458 }
1459
1460 if (warn)
1461 goto end;
1542 1462
1543 ret = target(1, 2, 3, 4, 5, 6); 1463 ret = target(1, 2, 3, 4, 5, 6);
1544 1464
1545 cleanup_all_probes(); 1465 ret = command_trace_probe("-:testprobe");
1466 if (WARN_ON_ONCE(ret)) {
1467 pr_warning("error on deleting a probe.\n");
1468 warn++;
1469 }
1546 1470
1547 pr_cont("OK\n"); 1471 ret = command_trace_probe("-:testprobe2");
1472 if (WARN_ON_ONCE(ret)) {
1473 pr_warning("error on deleting a probe.\n");
1474 warn++;
1475 }
1476
1477end:
1478 cleanup_all_probes();
1479 if (warn)
1480 pr_cont("NG: Some tests are failed. Please check them.\n");
1481 else
1482 pr_cont("OK\n");
1548 return 0; 1483 return 0;
1549} 1484}
1550 1485
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 94103cdcf9d8..d59cd6879477 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -23,6 +23,7 @@
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/slab.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
27 28
28#include "trace_output.h" 29#include "trace_output.h"
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0acd834659ed..017fa376505d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/slab.h>
12#include <linux/time.h> 13#include <linux/time.h>
13 14
14#include <asm/atomic.h> 15#include <asm/atomic.h>
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 280fea470d67..81003b4d617f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -3,6 +3,7 @@
3#include <linux/stringify.h> 3#include <linux/stringify.h>
4#include <linux/kthread.h> 4#include <linux/kthread.h>
5#include <linux/delay.h> 5#include <linux/delay.h>
6#include <linux/slab.h>
6 7
7static inline int trace_valid_entry(struct trace_entry *entry) 8static inline int trace_valid_entry(struct trace_entry *entry)
8{ 9{
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
174 arch_spin_lock(&max_stack_lock); 184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 arch_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 218
207static void *t_start(struct seq_file *m, loff_t *pos) 219static void *t_start(struct seq_file *m, loff_t *pos)
208{ 220{
221 int cpu;
222
209 local_irq_disable(); 223 local_irq_disable();
224
225 cpu = smp_processor_id();
226 per_cpu(trace_active, cpu)++;
227
210 arch_spin_lock(&max_stack_lock); 228 arch_spin_lock(&max_stack_lock);
211 229
212 if (*pos == 0) 230 if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 235
218static void t_stop(struct seq_file *m, void *p) 236static void t_stop(struct seq_file *m, void *p)
219{ 237{
238 int cpu;
239
220 arch_spin_unlock(&max_stack_lock); 240 arch_spin_unlock(&max_stack_lock);
241
242 cpu = smp_processor_id();
243 per_cpu(trace_active, cpu)--;
244
221 local_irq_enable(); 245 local_irq_enable();
222} 246}
223 247
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index a4bb239eb987..96cffb269e73 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -10,6 +10,7 @@
10 10
11 11
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/slab.h>
13#include <linux/rbtree.h> 14#include <linux/rbtree.h>
14#include <linux/debugfs.h> 15#include <linux/debugfs.h>
15#include "trace_stat.h" 16#include "trace_stat.h"
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f372dd2..4d6d711717f2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,6 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/slab.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
4#include <linux/ftrace.h> 5#include <linux/ftrace.h>
5#include <linux/perf_event.h> 6#include <linux/perf_event.h>
@@ -143,70 +144,65 @@ extern char *__bad_type_size(void);
143 #type, #name, offsetof(typeof(trace), name), \ 144 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type) 145 sizeof(trace.name), is_signed_type(type)
145 146
146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 147static
148int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
147{ 149{
148 int i; 150 int i;
149 int ret; 151 int pos = 0;
150 struct syscall_metadata *entry = call->data;
151 struct syscall_trace_enter trace;
152 int offset = offsetof(struct syscall_trace_enter, args);
153 152
154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 153 /* When len=0, we just calculate the needed length */
155 "\tsigned:%u;\n", 154#define LEN_OR_ZERO (len ? len - pos : 0)
156 SYSCALL_FIELD(int, nr));
157 if (!ret)
158 return 0;
159 155
156 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
160 for (i = 0; i < entry->nb_args; i++) { 157 for (i = 0; i < entry->nb_args; i++) {
161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 158 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
162 entry->args[i]); 159 entry->args[i], sizeof(unsigned long),
163 if (!ret) 160 i == entry->nb_args - 1 ? "" : ", ");
164 return 0;
165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
169 if (!ret)
170 return 0;
171 offset += sizeof(unsigned long);
172 } 161 }
162 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
173 163
174 trace_seq_puts(s, "\nprint fmt: \"");
175 for (i = 0; i < entry->nb_args; i++) { 164 for (i = 0; i < entry->nb_args; i++) {
176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 165 pos += snprintf(buf + pos, LEN_OR_ZERO,
177 sizeof(unsigned long), 166 ", ((unsigned long)(REC->%s))", entry->args[i]);
178 i == entry->nb_args - 1 ? "" : ", ");
179 if (!ret)
180 return 0;
181 } 167 }
182 trace_seq_putc(s, '"');
183 168
184 for (i = 0; i < entry->nb_args; i++) { 169#undef LEN_OR_ZERO
185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
186 entry->args[i]);
187 if (!ret)
188 return 0;
189 }
190 170
191 return trace_seq_putc(s, '\n'); 171 /* return the length of print_fmt */
172 return pos;
192} 173}
193 174
194int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 175static int set_syscall_print_fmt(struct ftrace_event_call *call)
195{ 176{
196 int ret; 177 char *print_fmt;
197 struct syscall_trace_exit trace; 178 int len;
179 struct syscall_metadata *entry = call->data;
198 180
199 ret = trace_seq_printf(s, 181 if (entry->enter_event != call) {
200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 182 call->print_fmt = "\"0x%lx\", REC->ret";
201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
204 SYSCALL_FIELD(int, nr),
205 SYSCALL_FIELD(long, ret));
206 if (!ret)
207 return 0; 183 return 0;
184 }
185
186 /* First: called with 0 length to calculate the needed length */
187 len = __set_enter_print_fmt(entry, NULL, 0);
188
189 print_fmt = kmalloc(len + 1, GFP_KERNEL);
190 if (!print_fmt)
191 return -ENOMEM;
192
193 /* Second: actually write the @print_fmt */
194 __set_enter_print_fmt(entry, print_fmt, len + 1);
195 call->print_fmt = print_fmt;
208 196
209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 197 return 0;
198}
199
200static void free_syscall_print_fmt(struct ftrace_event_call *call)
201{
202 struct syscall_metadata *entry = call->data;
203
204 if (entry->enter_event == call)
205 kfree(call->print_fmt);
210} 206}
211 207
212int syscall_enter_define_fields(struct ftrace_event_call *call) 208int syscall_enter_define_fields(struct ftrace_event_call *call)
@@ -386,12 +382,22 @@ int init_syscall_trace(struct ftrace_event_call *call)
386{ 382{
387 int id; 383 int id;
388 384
389 id = register_ftrace_event(call->event); 385 if (set_syscall_print_fmt(call) < 0)
390 if (!id) 386 return -ENOMEM;
391 return -ENODEV; 387
392 call->id = id; 388 id = trace_event_raw_init(call);
393 INIT_LIST_HEAD(&call->fields); 389
394 return 0; 390 if (id < 0) {
391 free_syscall_print_fmt(call);
392 return id;
393 }
394
395 return id;
396}
397
398unsigned long __init arch_syscall_addr(int nr)
399{
400 return (unsigned long)sys_call_table[nr];
395} 401}
396 402
397int __init init_ftrace_syscalls(void) 403int __init init_ftrace_syscalls(void)
@@ -421,27 +427,24 @@ int __init init_ftrace_syscalls(void)
421} 427}
422core_initcall(init_ftrace_syscalls); 428core_initcall(init_ftrace_syscalls);
423 429
424#ifdef CONFIG_EVENT_PROFILE 430#ifdef CONFIG_PERF_EVENTS
425 431
426static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 432static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
427static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 433static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
428static int sys_prof_refcount_enter; 434static int sys_perf_refcount_enter;
429static int sys_prof_refcount_exit; 435static int sys_perf_refcount_exit;
430 436
431static void prof_syscall_enter(struct pt_regs *regs, long id) 437static void perf_syscall_enter(struct pt_regs *regs, long id)
432{ 438{
433 struct syscall_metadata *sys_data; 439 struct syscall_metadata *sys_data;
434 struct syscall_trace_enter *rec; 440 struct syscall_trace_enter *rec;
435 unsigned long flags; 441 unsigned long flags;
436 char *trace_buf;
437 char *raw_data;
438 int syscall_nr; 442 int syscall_nr;
439 int rctx; 443 int rctx;
440 int size; 444 int size;
441 int cpu;
442 445
443 syscall_nr = syscall_get_nr(current, regs); 446 syscall_nr = syscall_get_nr(current, regs);
444 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 447 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
445 return; 448 return;
446 449
447 sys_data = syscall_nr_to_meta(syscall_nr); 450 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -453,44 +456,22 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
453 size = ALIGN(size + sizeof(u32), sizeof(u64)); 456 size = ALIGN(size + sizeof(u32), sizeof(u64));
454 size -= sizeof(u32); 457 size -= sizeof(u32);
455 458
456 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 459 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
457 "profile buffer not large enough")) 460 "perf buffer not large enough"))
458 return; 461 return;
459 462
460 /* Protect the per cpu buffer, begin the rcu read side */ 463 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
461 local_irq_save(flags); 464 sys_data->enter_event->id, &rctx, &flags);
462 465 if (!rec)
463 rctx = perf_swevent_get_recursion_context(); 466 return;
464 if (rctx < 0)
465 goto end_recursion;
466
467 cpu = smp_processor_id();
468
469 trace_buf = rcu_dereference(perf_trace_buf);
470
471 if (!trace_buf)
472 goto end;
473
474 raw_data = per_cpu_ptr(trace_buf, cpu);
475
476 /* zero the dead bytes from align to not leak stack to user */
477 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
478 467
479 rec = (struct syscall_trace_enter *) raw_data;
480 tracing_generic_entry_update(&rec->ent, 0, 0);
481 rec->ent.type = sys_data->enter_event->id;
482 rec->nr = syscall_nr; 468 rec->nr = syscall_nr;
483 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 469 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
484 (unsigned long *)&rec->args); 470 (unsigned long *)&rec->args);
485 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); 471 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
486
487end:
488 perf_swevent_put_recursion_context(rctx);
489end_recursion:
490 local_irq_restore(flags);
491} 472}
492 473
493int prof_sysenter_enable(struct ftrace_event_call *call) 474int perf_sysenter_enable(struct ftrace_event_call *call)
494{ 475{
495 int ret = 0; 476 int ret = 0;
496 int num; 477 int num;
@@ -498,47 +479,44 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
498 num = ((struct syscall_metadata *)call->data)->syscall_nr; 479 num = ((struct syscall_metadata *)call->data)->syscall_nr;
499 480
500 mutex_lock(&syscall_trace_lock); 481 mutex_lock(&syscall_trace_lock);
501 if (!sys_prof_refcount_enter) 482 if (!sys_perf_refcount_enter)
502 ret = register_trace_sys_enter(prof_syscall_enter); 483 ret = register_trace_sys_enter(perf_syscall_enter);
503 if (ret) { 484 if (ret) {
504 pr_info("event trace: Could not activate" 485 pr_info("event trace: Could not activate"
505 "syscall entry trace point"); 486 "syscall entry trace point");
506 } else { 487 } else {
507 set_bit(num, enabled_prof_enter_syscalls); 488 set_bit(num, enabled_perf_enter_syscalls);
508 sys_prof_refcount_enter++; 489 sys_perf_refcount_enter++;
509 } 490 }
510 mutex_unlock(&syscall_trace_lock); 491 mutex_unlock(&syscall_trace_lock);
511 return ret; 492 return ret;
512} 493}
513 494
514void prof_sysenter_disable(struct ftrace_event_call *call) 495void perf_sysenter_disable(struct ftrace_event_call *call)
515{ 496{
516 int num; 497 int num;
517 498
518 num = ((struct syscall_metadata *)call->data)->syscall_nr; 499 num = ((struct syscall_metadata *)call->data)->syscall_nr;
519 500
520 mutex_lock(&syscall_trace_lock); 501 mutex_lock(&syscall_trace_lock);
521 sys_prof_refcount_enter--; 502 sys_perf_refcount_enter--;
522 clear_bit(num, enabled_prof_enter_syscalls); 503 clear_bit(num, enabled_perf_enter_syscalls);
523 if (!sys_prof_refcount_enter) 504 if (!sys_perf_refcount_enter)
524 unregister_trace_sys_enter(prof_syscall_enter); 505 unregister_trace_sys_enter(perf_syscall_enter);
525 mutex_unlock(&syscall_trace_lock); 506 mutex_unlock(&syscall_trace_lock);
526} 507}
527 508
528static void prof_syscall_exit(struct pt_regs *regs, long ret) 509static void perf_syscall_exit(struct pt_regs *regs, long ret)
529{ 510{
530 struct syscall_metadata *sys_data; 511 struct syscall_metadata *sys_data;
531 struct syscall_trace_exit *rec; 512 struct syscall_trace_exit *rec;
532 unsigned long flags; 513 unsigned long flags;
533 int syscall_nr; 514 int syscall_nr;
534 char *trace_buf;
535 char *raw_data;
536 int rctx; 515 int rctx;
537 int size; 516 int size;
538 int cpu;
539 517
540 syscall_nr = syscall_get_nr(current, regs); 518 syscall_nr = syscall_get_nr(current, regs);
541 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 519 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
542 return; 520 return;
543 521
544 sys_data = syscall_nr_to_meta(syscall_nr); 522 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -553,45 +531,22 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
553 * Impossible, but be paranoid with the future 531 * Impossible, but be paranoid with the future
554 * How to put this check outside runtime? 532 * How to put this check outside runtime?
555 */ 533 */
556 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 534 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
557 "exit event has grown above profile buffer size")) 535 "exit event has grown above perf buffer size"))
558 return; 536 return;
559 537
560 /* Protect the per cpu buffer, begin the rcu read side */ 538 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
561 local_irq_save(flags); 539 sys_data->exit_event->id, &rctx, &flags);
562 540 if (!rec)
563 rctx = perf_swevent_get_recursion_context(); 541 return;
564 if (rctx < 0)
565 goto end_recursion;
566
567 cpu = smp_processor_id();
568
569 trace_buf = rcu_dereference(perf_trace_buf);
570
571 if (!trace_buf)
572 goto end;
573
574 raw_data = per_cpu_ptr(trace_buf, cpu);
575
576 /* zero the dead bytes from align to not leak stack to user */
577 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
578
579 rec = (struct syscall_trace_exit *)raw_data;
580 542
581 tracing_generic_entry_update(&rec->ent, 0, 0);
582 rec->ent.type = sys_data->exit_event->id;
583 rec->nr = syscall_nr; 543 rec->nr = syscall_nr;
584 rec->ret = syscall_get_return_value(current, regs); 544 rec->ret = syscall_get_return_value(current, regs);
585 545
586 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); 546 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
587
588end:
589 perf_swevent_put_recursion_context(rctx);
590end_recursion:
591 local_irq_restore(flags);
592} 547}
593 548
594int prof_sysexit_enable(struct ftrace_event_call *call) 549int perf_sysexit_enable(struct ftrace_event_call *call)
595{ 550{
596 int ret = 0; 551 int ret = 0;
597 int num; 552 int num;
@@ -599,33 +554,32 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
599 num = ((struct syscall_metadata *)call->data)->syscall_nr; 554 num = ((struct syscall_metadata *)call->data)->syscall_nr;
600 555
601 mutex_lock(&syscall_trace_lock); 556 mutex_lock(&syscall_trace_lock);
602 if (!sys_prof_refcount_exit) 557 if (!sys_perf_refcount_exit)
603 ret = register_trace_sys_exit(prof_syscall_exit); 558 ret = register_trace_sys_exit(perf_syscall_exit);
604 if (ret) { 559 if (ret) {
605 pr_info("event trace: Could not activate" 560 pr_info("event trace: Could not activate"
606 "syscall entry trace point"); 561 "syscall exit trace point");
607 } else { 562 } else {
608 set_bit(num, enabled_prof_exit_syscalls); 563 set_bit(num, enabled_perf_exit_syscalls);
609 sys_prof_refcount_exit++; 564 sys_perf_refcount_exit++;
610 } 565 }
611 mutex_unlock(&syscall_trace_lock); 566 mutex_unlock(&syscall_trace_lock);
612 return ret; 567 return ret;
613} 568}
614 569
615void prof_sysexit_disable(struct ftrace_event_call *call) 570void perf_sysexit_disable(struct ftrace_event_call *call)
616{ 571{
617 int num; 572 int num;
618 573
619 num = ((struct syscall_metadata *)call->data)->syscall_nr; 574 num = ((struct syscall_metadata *)call->data)->syscall_nr;
620 575
621 mutex_lock(&syscall_trace_lock); 576 mutex_lock(&syscall_trace_lock);
622 sys_prof_refcount_exit--; 577 sys_perf_refcount_exit--;
623 clear_bit(num, enabled_prof_exit_syscalls); 578 clear_bit(num, enabled_perf_exit_syscalls);
624 if (!sys_prof_refcount_exit) 579 if (!sys_perf_refcount_exit)
625 unregister_trace_sys_exit(prof_syscall_exit); 580 unregister_trace_sys_exit(perf_syscall_exit);
626 mutex_unlock(&syscall_trace_lock); 581 mutex_unlock(&syscall_trace_lock);
627} 582}
628 583
629#endif 584#endif /* CONFIG_PERF_EVENTS */
630
631 585
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 40cafb07dffd..cc2d2faa7d9e 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/slab.h>
12#include <linux/kref.h> 13#include <linux/kref.h>
13#include "trace_stat.h" 14#include "trace_stat.h"
14#include "trace.h" 15#include "trace.h"