aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig15
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c6
-rw-r--r--kernel/trace/ftrace.c82
-rw-r--r--kernel/trace/power-traces.c1
-rw-r--r--kernel/trace/ring_buffer.c64
-rw-r--r--kernel/trace/ring_buffer_benchmark.c1
-rw-r--r--kernel/trace/trace.c206
-rw-r--r--kernel/trace/trace.h11
-rw-r--r--kernel/trace/trace_branch.c19
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_event_perf.c (renamed from kernel/trace/trace_event_profile.c)63
-rw-r--r--kernel/trace/trace_events.c84
-rw-r--r--kernel/trace/trace_events_filter.c1
-rw-r--r--kernel/trace/trace_export.c87
-rw-r--r--kernel/trace/trace_functions_graph.c108
-rw-r--r--kernel/trace/trace_kprobe.c139
-rw-r--r--kernel/trace/trace_ksym.c1
-rw-r--r--kernel/trace/trace_mmiotrace.c1
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stack.c24
-rw-r--r--kernel/trace/trace_stat.c1
-rw-r--r--kernel/trace/trace_syscalls.c186
-rw-r--r--kernel/trace/trace_workqueue.c1
24 files changed, 645 insertions, 464 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a2f289..13e13d428cd3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
@@ -330,15 +328,6 @@ config BRANCH_TRACER
330 328
331 Say N if unsure. 329 Say N if unsure.
332 330
333config POWER_TRACER
334 bool "Trace power consumption behavior"
335 depends on X86
336 select GENERIC_TRACER
337 help
338 This tracer helps developers to analyze and optimize the kernel's
339 power management decisions, specifically the C-state and P-state
340 behavior.
341
342config KSYM_TRACER 331config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations" 332 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT 333 depends on HAVE_HW_BREAKPOINT
@@ -451,7 +440,7 @@ config BLK_DEV_IO_TRACE
451 440
452config KPROBE_EVENT 441config KPROBE_EVENT
453 depends on KPROBES 442 depends on KPROBES
454 depends on X86 443 depends on HAVE_REGS_AND_STACK_ACCESS_API
455 bool "Enable kprobes-based dynamic events" 444 bool "Enable kprobes-based dynamic events"
456 select TRACING 445 select TRACING
457 default y 446 default y
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d00c6fe23f54..78edc6490038 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54ifeq ($(CONFIG_PERF_EVENTS),y) 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o 55obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
56endif 56endif
57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d9d6206e0b14..b3bc91a3f510 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h>
24#include <linux/debugfs.h> 25#include <linux/debugfs.h>
25#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
26#include <linux/time.h> 27#include <linux/time.h>
@@ -540,9 +541,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
540 if (ret) 541 if (ret)
541 return ret; 542 return ret;
542 543
543 if (copy_to_user(arg, &buts, sizeof(buts))) 544 if (copy_to_user(arg, &buts, sizeof(buts))) {
545 blk_trace_remove(q);
544 return -EFAULT; 546 return -EFAULT;
545 547 }
546 return 0; 548 return 0;
547} 549}
548EXPORT_SYMBOL_GPL(blk_trace_setup); 550EXPORT_SYMBOL_GPL(blk_trace_setup);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1904797f4a8a..2404b59b3097 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -24,9 +24,11 @@
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/ftrace.h> 25#include <linux/ftrace.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/slab.h>
27#include <linux/ctype.h> 28#include <linux/ctype.h>
28#include <linux/list.h> 29#include <linux/list.h>
29#include <linux/hash.h> 30#include <linux/hash.h>
31#include <linux/rcupdate.h>
30 32
31#include <trace/events/sched.h> 33#include <trace/events/sched.h>
32 34
@@ -84,22 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
84ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 86ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
85ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 87ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
86 88
87#ifdef CONFIG_FUNCTION_GRAPH_TRACER 89/*
88static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); 90 * Traverse the ftrace_list, invoking all entries. The reason that we
89#endif 91 * can use rcu_dereference_raw() is that elements removed from this list
90 92 * are simply leaked, so there is no need to interact with a grace-period
93 * mechanism. The rcu_dereference_raw() calls are needed to handle
94 * concurrent insertions into the ftrace_list.
95 *
96 * Silly Alpha and silly pointer-speculation compiler optimizations!
97 */
91static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 98static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
92{ 99{
93 struct ftrace_ops *op = ftrace_list; 100 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
94
95 /* in case someone actually ports this to alpha! */
96 read_barrier_depends();
97 101
98 while (op != &ftrace_list_end) { 102 while (op != &ftrace_list_end) {
99 /* silly alpha */
100 read_barrier_depends();
101 op->func(ip, parent_ip); 103 op->func(ip, parent_ip);
102 op = op->next; 104 op = rcu_dereference_raw(op->next); /*see above*/
103 }; 105 };
104} 106}
105 107
@@ -154,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
154 * the ops->next pointer is valid before another CPU sees 156 * the ops->next pointer is valid before another CPU sees
155 * the ops pointer included into the ftrace_list. 157 * the ops pointer included into the ftrace_list.
156 */ 158 */
157 smp_wmb(); 159 rcu_assign_pointer(ftrace_list, ops);
158 ftrace_list = ops;
159 160
160 if (ftrace_enabled) { 161 if (ftrace_enabled) {
161 ftrace_func_t func; 162 ftrace_func_t func;
@@ -2276,6 +2277,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
2276 2277
2277#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2278#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2278static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; 2279static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2280static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2281
2279static int __init set_graph_function(char *str) 2282static int __init set_graph_function(char *str)
2280{ 2283{
2281 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); 2284 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -2402,6 +2405,7 @@ static const struct file_operations ftrace_notrace_fops = {
2402static DEFINE_MUTEX(graph_lock); 2405static DEFINE_MUTEX(graph_lock);
2403 2406
2404int ftrace_graph_count; 2407int ftrace_graph_count;
2408int ftrace_graph_filter_enabled;
2405unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2409unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2406 2410
2407static void * 2411static void *
@@ -2424,7 +2428,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
2424 mutex_lock(&graph_lock); 2428 mutex_lock(&graph_lock);
2425 2429
2426 /* Nothing, tell g_show to print all functions are enabled */ 2430 /* Nothing, tell g_show to print all functions are enabled */
2427 if (!ftrace_graph_count && !*pos) 2431 if (!ftrace_graph_filter_enabled && !*pos)
2428 return (void *)1; 2432 return (void *)1;
2429 2433
2430 return __g_next(m, pos); 2434 return __g_next(m, pos);
@@ -2470,6 +2474,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2470 mutex_lock(&graph_lock); 2474 mutex_lock(&graph_lock);
2471 if ((file->f_mode & FMODE_WRITE) && 2475 if ((file->f_mode & FMODE_WRITE) &&
2472 (file->f_flags & O_TRUNC)) { 2476 (file->f_flags & O_TRUNC)) {
2477 ftrace_graph_filter_enabled = 0;
2473 ftrace_graph_count = 0; 2478 ftrace_graph_count = 0;
2474 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2479 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2475 } 2480 }
@@ -2495,7 +2500,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2495 struct dyn_ftrace *rec; 2500 struct dyn_ftrace *rec;
2496 struct ftrace_page *pg; 2501 struct ftrace_page *pg;
2497 int search_len; 2502 int search_len;
2498 int found = 0; 2503 int fail = 1;
2499 int type, not; 2504 int type, not;
2500 char *search; 2505 char *search;
2501 bool exists; 2506 bool exists;
@@ -2506,37 +2511,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2506 2511
2507 /* decode regex */ 2512 /* decode regex */
2508 type = filter_parse_regex(buffer, strlen(buffer), &search, &not); 2513 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2509 if (not) 2514 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2510 return -EINVAL; 2515 return -EBUSY;
2511 2516
2512 search_len = strlen(search); 2517 search_len = strlen(search);
2513 2518
2514 mutex_lock(&ftrace_lock); 2519 mutex_lock(&ftrace_lock);
2515 do_for_each_ftrace_rec(pg, rec) { 2520 do_for_each_ftrace_rec(pg, rec) {
2516 2521
2517 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2518 break;
2519
2520 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2522 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2521 continue; 2523 continue;
2522 2524
2523 if (ftrace_match_record(rec, search, search_len, type)) { 2525 if (ftrace_match_record(rec, search, search_len, type)) {
2524 /* ensure it is not already in the array */ 2526 /* if it is in the array */
2525 exists = false; 2527 exists = false;
2526 for (i = 0; i < *idx; i++) 2528 for (i = 0; i < *idx; i++) {
2527 if (array[i] == rec->ip) { 2529 if (array[i] == rec->ip) {
2528 exists = true; 2530 exists = true;
2529 break; 2531 break;
2530 } 2532 }
2531 if (!exists) 2533 }
2532 array[(*idx)++] = rec->ip; 2534
2533 found = 1; 2535 if (!not) {
2536 fail = 0;
2537 if (!exists) {
2538 array[(*idx)++] = rec->ip;
2539 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2540 goto out;
2541 }
2542 } else {
2543 if (exists) {
2544 array[i] = array[--(*idx)];
2545 array[*idx] = 0;
2546 fail = 0;
2547 }
2548 }
2534 } 2549 }
2535 } while_for_each_ftrace_rec(); 2550 } while_for_each_ftrace_rec();
2536 2551out:
2537 mutex_unlock(&ftrace_lock); 2552 mutex_unlock(&ftrace_lock);
2538 2553
2539 return found ? 0 : -EINVAL; 2554 if (fail)
2555 return -EINVAL;
2556
2557 ftrace_graph_filter_enabled = 1;
2558 return 0;
2540} 2559}
2541 2560
2542static ssize_t 2561static ssize_t
@@ -2546,16 +2565,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2546 struct trace_parser parser; 2565 struct trace_parser parser;
2547 ssize_t read, ret; 2566 ssize_t read, ret;
2548 2567
2549 if (!cnt || cnt < 0) 2568 if (!cnt)
2550 return 0; 2569 return 0;
2551 2570
2552 mutex_lock(&graph_lock); 2571 mutex_lock(&graph_lock);
2553 2572
2554 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2555 ret = -EBUSY;
2556 goto out_unlock;
2557 }
2558
2559 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2573 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2560 ret = -ENOMEM; 2574 ret = -ENOMEM;
2561 goto out_unlock; 2575 goto out_unlock;
@@ -3340,6 +3354,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3340{ 3354{
3341 /* Make sure we do not use the parent ret_stack */ 3355 /* Make sure we do not use the parent ret_stack */
3342 t->ret_stack = NULL; 3356 t->ret_stack = NULL;
3357 t->curr_ret_stack = -1;
3343 3358
3344 if (ftrace_graph_active) { 3359 if (ftrace_graph_active) {
3345 struct ftrace_ret_stack *ret_stack; 3360 struct ftrace_ret_stack *ret_stack;
@@ -3349,7 +3364,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3349 GFP_KERNEL); 3364 GFP_KERNEL);
3350 if (!ret_stack) 3365 if (!ret_stack)
3351 return; 3366 return;
3352 t->curr_ret_stack = -1;
3353 atomic_set(&t->tracing_graph_pause, 0); 3367 atomic_set(&t->tracing_graph_pause, 0);
3354 atomic_set(&t->trace_overrun, 0); 3368 atomic_set(&t->trace_overrun, 0);
3355 t->ftrace_timestamp = 0; 3369 t->ftrace_timestamp = 0;
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 9f4f565b01e6..a22582a06161 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -9,7 +9,6 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/slab.h>
13 12
14#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 14#include <trace/events/power.h>
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index edefe3b2801b..41ca394feb22 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,12 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22 23
24#include <asm/local.h>
23#include "trace.h" 25#include "trace.h"
24 26
25/* 27/*
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
207#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
208 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
211 221
@@ -464,6 +474,8 @@ struct ring_buffer_iter {
464 struct ring_buffer_per_cpu *cpu_buffer; 474 struct ring_buffer_per_cpu *cpu_buffer;
465 unsigned long head; 475 unsigned long head;
466 struct buffer_page *head_page; 476 struct buffer_page *head_page;
477 struct buffer_page *cache_reader_page;
478 unsigned long cache_read;
467 u64 read_stamp; 479 u64 read_stamp;
468}; 480};
469 481
@@ -1198,18 +1210,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1198 1210
1199 for (i = 0; i < nr_pages; i++) { 1211 for (i = 0; i < nr_pages; i++) {
1200 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1212 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1201 return; 1213 goto out;
1202 p = cpu_buffer->pages->next; 1214 p = cpu_buffer->pages->next;
1203 bpage = list_entry(p, struct buffer_page, list); 1215 bpage = list_entry(p, struct buffer_page, list);
1204 list_del_init(&bpage->list); 1216 list_del_init(&bpage->list);
1205 free_buffer_page(bpage); 1217 free_buffer_page(bpage);
1206 } 1218 }
1207 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1219 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1208 return; 1220 goto out;
1209 1221
1210 rb_reset_cpu(cpu_buffer); 1222 rb_reset_cpu(cpu_buffer);
1211 rb_check_pages(cpu_buffer); 1223 rb_check_pages(cpu_buffer);
1212 1224
1225out:
1213 spin_unlock_irq(&cpu_buffer->reader_lock); 1226 spin_unlock_irq(&cpu_buffer->reader_lock);
1214} 1227}
1215 1228
@@ -1226,7 +1239,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1226 1239
1227 for (i = 0; i < nr_pages; i++) { 1240 for (i = 0; i < nr_pages; i++) {
1228 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1241 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1229 return; 1242 goto out;
1230 p = pages->next; 1243 p = pages->next;
1231 bpage = list_entry(p, struct buffer_page, list); 1244 bpage = list_entry(p, struct buffer_page, list);
1232 list_del_init(&bpage->list); 1245 list_del_init(&bpage->list);
@@ -1235,6 +1248,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1235 rb_reset_cpu(cpu_buffer); 1248 rb_reset_cpu(cpu_buffer);
1236 rb_check_pages(cpu_buffer); 1249 rb_check_pages(cpu_buffer);
1237 1250
1251out:
1238 spin_unlock_irq(&cpu_buffer->reader_lock); 1252 spin_unlock_irq(&cpu_buffer->reader_lock);
1239} 1253}
1240 1254
@@ -1544,7 +1558,7 @@ rb_update_event(struct ring_buffer_event *event,
1544 1558
1545 case 0: 1559 case 0:
1546 length -= RB_EVNT_HDR_SIZE; 1560 length -= RB_EVNT_HDR_SIZE;
1547 if (length > RB_MAX_SMALL_DATA) 1561 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1548 event->array[0] = length; 1562 event->array[0] = length;
1549 else 1563 else
1550 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1564 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1719,11 +1733,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1719 if (!length) 1733 if (!length)
1720 length = 1; 1734 length = 1;
1721 1735
1722 if (length > RB_MAX_SMALL_DATA) 1736 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1723 length += sizeof(event.array[0]); 1737 length += sizeof(event.array[0]);
1724 1738
1725 length += RB_EVNT_HDR_SIZE; 1739 length += RB_EVNT_HDR_SIZE;
1726 length = ALIGN(length, RB_ALIGNMENT); 1740 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1727 1741
1728 return length; 1742 return length;
1729} 1743}
@@ -2230,12 +2244,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2230 if (ring_buffer_flags != RB_BUFFERS_ON) 2244 if (ring_buffer_flags != RB_BUFFERS_ON)
2231 return NULL; 2245 return NULL;
2232 2246
2233 if (atomic_read(&buffer->record_disabled))
2234 return NULL;
2235
2236 /* If we are tracing schedule, we don't want to recurse */ 2247 /* If we are tracing schedule, we don't want to recurse */
2237 resched = ftrace_preempt_disable(); 2248 resched = ftrace_preempt_disable();
2238 2249
2250 if (atomic_read(&buffer->record_disabled))
2251 goto out_nocheck;
2252
2239 if (trace_recursive_lock()) 2253 if (trace_recursive_lock())
2240 goto out_nocheck; 2254 goto out_nocheck;
2241 2255
@@ -2467,11 +2481,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2467 if (ring_buffer_flags != RB_BUFFERS_ON) 2481 if (ring_buffer_flags != RB_BUFFERS_ON)
2468 return -EBUSY; 2482 return -EBUSY;
2469 2483
2470 if (atomic_read(&buffer->record_disabled))
2471 return -EBUSY;
2472
2473 resched = ftrace_preempt_disable(); 2484 resched = ftrace_preempt_disable();
2474 2485
2486 if (atomic_read(&buffer->record_disabled))
2487 goto out;
2488
2475 cpu = raw_smp_processor_id(); 2489 cpu = raw_smp_processor_id();
2476 2490
2477 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2491 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2539,7 +2553,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2539 * @buffer: The ring buffer to enable writes 2553 * @buffer: The ring buffer to enable writes
2540 * 2554 *
2541 * Note, multiple disables will need the same number of enables 2555 * Note, multiple disables will need the same number of enables
2542 * to truely enable the writing (much like preempt_disable). 2556 * to truly enable the writing (much like preempt_disable).
2543 */ 2557 */
2544void ring_buffer_record_enable(struct ring_buffer *buffer) 2558void ring_buffer_record_enable(struct ring_buffer *buffer)
2545{ 2559{
@@ -2575,7 +2589,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2575 * @cpu: The CPU to enable. 2589 * @cpu: The CPU to enable.
2576 * 2590 *
2577 * Note, multiple disables will need the same number of enables 2591 * Note, multiple disables will need the same number of enables
2578 * to truely enable the writing (much like preempt_disable). 2592 * to truly enable the writing (much like preempt_disable).
2579 */ 2593 */
2580void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2594void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2581{ 2595{
@@ -2716,6 +2730,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2716 iter->read_stamp = cpu_buffer->read_stamp; 2730 iter->read_stamp = cpu_buffer->read_stamp;
2717 else 2731 else
2718 iter->read_stamp = iter->head_page->page->time_stamp; 2732 iter->read_stamp = iter->head_page->page->time_stamp;
2733 iter->cache_reader_page = cpu_buffer->reader_page;
2734 iter->cache_read = cpu_buffer->read;
2719} 2735}
2720 2736
2721/** 2737/**
@@ -3060,13 +3076,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3060 struct ring_buffer_event *event; 3076 struct ring_buffer_event *event;
3061 int nr_loops = 0; 3077 int nr_loops = 0;
3062 3078
3063 if (ring_buffer_iter_empty(iter))
3064 return NULL;
3065
3066 cpu_buffer = iter->cpu_buffer; 3079 cpu_buffer = iter->cpu_buffer;
3067 buffer = cpu_buffer->buffer; 3080 buffer = cpu_buffer->buffer;
3068 3081
3082 /*
3083 * Check if someone performed a consuming read to
3084 * the buffer. A consuming read invalidates the iterator
3085 * and we need to reset the iterator in this case.
3086 */
3087 if (unlikely(iter->cache_read != cpu_buffer->read ||
3088 iter->cache_reader_page != cpu_buffer->reader_page))
3089 rb_iter_reset(iter);
3090
3069 again: 3091 again:
3092 if (ring_buffer_iter_empty(iter))
3093 return NULL;
3094
3070 /* 3095 /*
3071 * We repeat when a timestamp is encountered. 3096 * We repeat when a timestamp is encountered.
3072 * We can get multiple timestamps by nested interrupts or also 3097 * We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3106,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3081 if (rb_per_cpu_empty(cpu_buffer)) 3106 if (rb_per_cpu_empty(cpu_buffer))
3082 return NULL; 3107 return NULL;
3083 3108
3109 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3110 rb_inc_iter(iter);
3111 goto again;
3112 }
3113
3084 event = rb_iter_head_event(iter); 3114 event = rb_iter_head_event(iter);
3085 3115
3086 switch (event->type_len) { 3116 switch (event->type_len) {
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index b2477caf09c2..df74c7982255 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <asm/local.h>
11 12
12struct rb_page { 13struct rb_page {
13 u64 ts; 14 u64 ts;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f2cb9e..44f916a04065 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,10 +32,11 @@
32#include <linux/splice.h> 32#include <linux/splice.h>
33#include <linux/kdebug.h> 33#include <linux/kdebug.h>
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/rwsem.h>
36#include <linux/slab.h>
35#include <linux/ctype.h> 37#include <linux/ctype.h>
36#include <linux/init.h> 38#include <linux/init.h>
37#include <linux/poll.h> 39#include <linux/poll.h>
38#include <linux/gfp.h>
39#include <linux/fs.h> 40#include <linux/fs.h>
40 41
41#include "trace.h" 42#include "trace.h"
@@ -91,20 +92,17 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
91static inline void ftrace_disable_cpu(void) 92static inline void ftrace_disable_cpu(void)
92{ 93{
93 preempt_disable(); 94 preempt_disable();
94 __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); 95 __this_cpu_inc(ftrace_cpu_disabled);
95} 96}
96 97
97static inline void ftrace_enable_cpu(void) 98static inline void ftrace_enable_cpu(void)
98{ 99{
99 __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); 100 __this_cpu_dec(ftrace_cpu_disabled);
100 preempt_enable(); 101 preempt_enable();
101} 102}
102 103
103static cpumask_var_t __read_mostly tracing_buffer_mask; 104static cpumask_var_t __read_mostly tracing_buffer_mask;
104 105
105/* Define which cpu buffers are currently read in trace_pipe */
106static cpumask_var_t tracing_reader_cpumask;
107
108#define for_each_tracing_cpu(cpu) \ 106#define for_each_tracing_cpu(cpu) \
109 for_each_cpu(cpu, tracing_buffer_mask) 107 for_each_cpu(cpu, tracing_buffer_mask)
110 108
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly;
243 241
244/* 242/*
245 * trace_types_lock is used to protect the trace_types list. 243 * trace_types_lock is used to protect the trace_types list.
246 * This lock is also used to keep user access serialized.
247 * Accesses from userspace will grab this lock while userspace
248 * activities happen inside the kernel.
249 */ 244 */
250static DEFINE_MUTEX(trace_types_lock); 245static DEFINE_MUTEX(trace_types_lock);
251 246
247/*
248 * serialize the access of the ring buffer
249 *
250 * ring buffer serializes readers, but it is low level protection.
251 * The validity of the events (which returns by ring_buffer_peek() ..etc)
252 * are not protected by ring buffer.
253 *
254 * The content of events may become garbage if we allow other process consumes
255 * these events concurrently:
256 * A) the page of the consumed events may become a normal page
257 * (not reader page) in ring buffer, and this page will be rewrited
258 * by events producer.
259 * B) The page of the consumed events may become a page for splice_read,
260 * and this page will be returned to system.
261 *
262 * These primitives allow multi process access to different cpu ring buffer
263 * concurrently.
264 *
265 * These primitives don't distinguish read-only and read-consume access.
266 * Multi read-only access are also serialized.
267 */
268
269#ifdef CONFIG_SMP
270static DECLARE_RWSEM(all_cpu_access_lock);
271static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
272
273static inline void trace_access_lock(int cpu)
274{
275 if (cpu == TRACE_PIPE_ALL_CPU) {
276 /* gain it for accessing the whole ring buffer. */
277 down_write(&all_cpu_access_lock);
278 } else {
279 /* gain it for accessing a cpu ring buffer. */
280
281 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
282 down_read(&all_cpu_access_lock);
283
284 /* Secondly block other access to this @cpu ring buffer. */
285 mutex_lock(&per_cpu(cpu_access_lock, cpu));
286 }
287}
288
289static inline void trace_access_unlock(int cpu)
290{
291 if (cpu == TRACE_PIPE_ALL_CPU) {
292 up_write(&all_cpu_access_lock);
293 } else {
294 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
295 up_read(&all_cpu_access_lock);
296 }
297}
298
299static inline void trace_access_lock_init(void)
300{
301 int cpu;
302
303 for_each_possible_cpu(cpu)
304 mutex_init(&per_cpu(cpu_access_lock, cpu));
305}
306
307#else
308
309static DEFINE_MUTEX(access_lock);
310
311static inline void trace_access_lock(int cpu)
312{
313 (void)cpu;
314 mutex_lock(&access_lock);
315}
316
317static inline void trace_access_unlock(int cpu)
318{
319 (void)cpu;
320 mutex_unlock(&access_lock);
321}
322
323static inline void trace_access_lock_init(void)
324{
325}
326
327#endif
328
252/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 329/* trace_wait is a waitqueue for tasks blocked on trace_poll */
253static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 330static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
254 331
@@ -297,6 +374,21 @@ static int __init set_buf_size(char *str)
297} 374}
298__setup("trace_buf_size=", set_buf_size); 375__setup("trace_buf_size=", set_buf_size);
299 376
377static int __init set_tracing_thresh(char *str)
378{
379 unsigned long threshhold;
380 int ret;
381
382 if (!str)
383 return 0;
384 ret = strict_strtoul(str, 0, &threshhold);
385 if (ret < 0)
386 return 0;
387 tracing_thresh = threshhold * 1000;
388 return 1;
389}
390__setup("tracing_thresh=", set_tracing_thresh);
391
300unsigned long nsecs_to_usecs(unsigned long nsecs) 392unsigned long nsecs_to_usecs(unsigned long nsecs)
301{ 393{
302 return nsecs / 1000; 394 return nsecs / 1000;
@@ -502,9 +594,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
502static arch_spinlock_t ftrace_max_lock = 594static arch_spinlock_t ftrace_max_lock =
503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 595 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
504 596
597unsigned long __read_mostly tracing_thresh;
598
505#ifdef CONFIG_TRACER_MAX_TRACE 599#ifdef CONFIG_TRACER_MAX_TRACE
506unsigned long __read_mostly tracing_max_latency; 600unsigned long __read_mostly tracing_max_latency;
507unsigned long __read_mostly tracing_thresh;
508 601
509/* 602/*
510 * Copy the new maximum trace into the separate maximum-trace 603 * Copy the new maximum trace into the separate maximum-trace
@@ -515,7 +608,7 @@ static void
515__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 608__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
516{ 609{
517 struct trace_array_cpu *data = tr->data[cpu]; 610 struct trace_array_cpu *data = tr->data[cpu];
518 struct trace_array_cpu *max_data = tr->data[cpu]; 611 struct trace_array_cpu *max_data;
519 612
520 max_tr.cpu = cpu; 613 max_tr.cpu = cpu;
521 max_tr.time_start = data->preempt_timestamp; 614 max_tr.time_start = data->preempt_timestamp;
@@ -525,7 +618,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
525 max_data->critical_start = data->critical_start; 618 max_data->critical_start = data->critical_start;
526 max_data->critical_end = data->critical_end; 619 max_data->critical_end = data->critical_end;
527 620
528 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 621 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
529 max_data->pid = tsk->pid; 622 max_data->pid = tsk->pid;
530 max_data->uid = task_uid(tsk); 623 max_data->uid = task_uid(tsk);
531 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 624 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -747,10 +840,10 @@ out:
747 mutex_unlock(&trace_types_lock); 840 mutex_unlock(&trace_types_lock);
748} 841}
749 842
750static void __tracing_reset(struct trace_array *tr, int cpu) 843static void __tracing_reset(struct ring_buffer *buffer, int cpu)
751{ 844{
752 ftrace_disable_cpu(); 845 ftrace_disable_cpu();
753 ring_buffer_reset_cpu(tr->buffer, cpu); 846 ring_buffer_reset_cpu(buffer, cpu);
754 ftrace_enable_cpu(); 847 ftrace_enable_cpu();
755} 848}
756 849
@@ -762,7 +855,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
762 855
763 /* Make sure all commits have finished */ 856 /* Make sure all commits have finished */
764 synchronize_sched(); 857 synchronize_sched();
765 __tracing_reset(tr, cpu); 858 __tracing_reset(buffer, cpu);
766 859
767 ring_buffer_record_enable(buffer); 860 ring_buffer_record_enable(buffer);
768} 861}
@@ -780,7 +873,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
780 tr->time_start = ftrace_now(tr->cpu); 873 tr->time_start = ftrace_now(tr->cpu);
781 874
782 for_each_online_cpu(cpu) 875 for_each_online_cpu(cpu)
783 __tracing_reset(tr, cpu); 876 __tracing_reset(buffer, cpu);
784 877
785 ring_buffer_record_enable(buffer); 878 ring_buffer_record_enable(buffer);
786} 879}
@@ -857,6 +950,8 @@ void tracing_start(void)
857 goto out; 950 goto out;
858 } 951 }
859 952
953 /* Prevent the buffers from switching */
954 arch_spin_lock(&ftrace_max_lock);
860 955
861 buffer = global_trace.buffer; 956 buffer = global_trace.buffer;
862 if (buffer) 957 if (buffer)
@@ -866,6 +961,8 @@ void tracing_start(void)
866 if (buffer) 961 if (buffer)
867 ring_buffer_record_enable(buffer); 962 ring_buffer_record_enable(buffer);
868 963
964 arch_spin_unlock(&ftrace_max_lock);
965
869 ftrace_start(); 966 ftrace_start();
870 out: 967 out:
871 spin_unlock_irqrestore(&tracing_start_lock, flags); 968 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -887,6 +984,9 @@ void tracing_stop(void)
887 if (trace_stop_count++) 984 if (trace_stop_count++)
888 goto out; 985 goto out;
889 986
987 /* Prevent the buffers from switching */
988 arch_spin_lock(&ftrace_max_lock);
989
890 buffer = global_trace.buffer; 990 buffer = global_trace.buffer;
891 if (buffer) 991 if (buffer)
892 ring_buffer_record_disable(buffer); 992 ring_buffer_record_disable(buffer);
@@ -895,6 +995,8 @@ void tracing_stop(void)
895 if (buffer) 995 if (buffer)
896 ring_buffer_record_disable(buffer); 996 ring_buffer_record_disable(buffer);
897 997
998 arch_spin_unlock(&ftrace_max_lock);
999
898 out: 1000 out:
899 spin_unlock_irqrestore(&tracing_start_lock, flags); 1001 spin_unlock_irqrestore(&tracing_start_lock, flags);
900} 1002}
@@ -951,6 +1053,11 @@ void trace_find_cmdline(int pid, char comm[])
951 return; 1053 return;
952 } 1054 }
953 1055
1056 if (WARN_ON_ONCE(pid < 0)) {
1057 strcpy(comm, "<XXX>");
1058 return;
1059 }
1060
954 if (pid > PID_MAX_DEFAULT) { 1061 if (pid > PID_MAX_DEFAULT) {
955 strcpy(comm, "<...>"); 1062 strcpy(comm, "<...>");
956 return; 1063 return;
@@ -1084,7 +1191,7 @@ trace_function(struct trace_array *tr,
1084 struct ftrace_entry *entry; 1191 struct ftrace_entry *entry;
1085 1192
1086 /* If we are reading the ring buffer, don't trace */ 1193 /* If we are reading the ring buffer, don't trace */
1087 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 1194 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1088 return; 1195 return;
1089 1196
1090 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1197 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1177,6 +1284,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1177 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1284 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1178 return; 1285 return;
1179 1286
1287 /*
1288 * NMIs can not handle page faults, even with fix ups.
1289 * The save user stack can (and often does) fault.
1290 */
1291 if (unlikely(in_nmi()))
1292 return;
1293
1180 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1294 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1181 sizeof(*entry), flags, pc); 1295 sizeof(*entry), flags, pc);
1182 if (!event) 1296 if (!event)
@@ -1315,8 +1429,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1315 entry->fmt = fmt; 1429 entry->fmt = fmt;
1316 1430
1317 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1431 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1318 if (!filter_check_discard(call, entry, buffer, event)) 1432 if (!filter_check_discard(call, entry, buffer, event)) {
1319 ring_buffer_unlock_commit(buffer, event); 1433 ring_buffer_unlock_commit(buffer, event);
1434 ftrace_trace_stack(buffer, flags, 6, pc);
1435 }
1320 1436
1321out_unlock: 1437out_unlock:
1322 arch_spin_unlock(&trace_buf_lock); 1438 arch_spin_unlock(&trace_buf_lock);
@@ -1389,8 +1505,10 @@ int trace_array_vprintk(struct trace_array *tr,
1389 1505
1390 memcpy(&entry->buf, trace_buf, len); 1506 memcpy(&entry->buf, trace_buf, len);
1391 entry->buf[len] = '\0'; 1507 entry->buf[len] = '\0';
1392 if (!filter_check_discard(call, entry, buffer, event)) 1508 if (!filter_check_discard(call, entry, buffer, event)) {
1393 ring_buffer_unlock_commit(buffer, event); 1509 ring_buffer_unlock_commit(buffer, event);
1510 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1511 }
1394 1512
1395 out_unlock: 1513 out_unlock:
1396 arch_spin_unlock(&trace_buf_lock); 1514 arch_spin_unlock(&trace_buf_lock);
@@ -1580,12 +1698,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1580} 1698}
1581 1699
1582/* 1700/*
1583 * No necessary locking here. The worst thing which can
1584 * happen is loosing events consumed at the same time
1585 * by a trace_pipe reader.
1586 * Other than that, we don't risk to crash the ring buffer
1587 * because it serializes the readers.
1588 *
1589 * The current tracer is copied to avoid a global locking 1701 * The current tracer is copied to avoid a global locking
1590 * all around. 1702 * all around.
1591 */ 1703 */
@@ -1623,6 +1735,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1623 1735
1624 ftrace_enable_cpu(); 1736 ftrace_enable_cpu();
1625 1737
1738 iter->leftover = 0;
1626 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1739 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1627 ; 1740 ;
1628 1741
@@ -1640,12 +1753,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1640 } 1753 }
1641 1754
1642 trace_event_read_lock(); 1755 trace_event_read_lock();
1756 trace_access_lock(cpu_file);
1643 return p; 1757 return p;
1644} 1758}
1645 1759
1646static void s_stop(struct seq_file *m, void *p) 1760static void s_stop(struct seq_file *m, void *p)
1647{ 1761{
1762 struct trace_iterator *iter = m->private;
1763
1648 atomic_dec(&trace_record_cmdline_disabled); 1764 atomic_dec(&trace_record_cmdline_disabled);
1765 trace_access_unlock(iter->cpu_file);
1649 trace_event_read_unlock(); 1766 trace_event_read_unlock();
1650} 1767}
1651 1768
@@ -2836,22 +2953,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2836 2953
2837 mutex_lock(&trace_types_lock); 2954 mutex_lock(&trace_types_lock);
2838 2955
2839 /* We only allow one reader per cpu */
2840 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2841 if (!cpumask_empty(tracing_reader_cpumask)) {
2842 ret = -EBUSY;
2843 goto out;
2844 }
2845 cpumask_setall(tracing_reader_cpumask);
2846 } else {
2847 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2848 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2849 else {
2850 ret = -EBUSY;
2851 goto out;
2852 }
2853 }
2854
2855 /* create a buffer to store the information to pass to userspace */ 2956 /* create a buffer to store the information to pass to userspace */
2856 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2957 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2857 if (!iter) { 2958 if (!iter) {
@@ -2907,12 +3008,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2907 3008
2908 mutex_lock(&trace_types_lock); 3009 mutex_lock(&trace_types_lock);
2909 3010
2910 if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2911 cpumask_clear(tracing_reader_cpumask);
2912 else
2913 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2914
2915
2916 if (iter->trace->pipe_close) 3011 if (iter->trace->pipe_close)
2917 iter->trace->pipe_close(iter); 3012 iter->trace->pipe_close(iter);
2918 3013
@@ -3074,6 +3169,7 @@ waitagain:
3074 iter->pos = -1; 3169 iter->pos = -1;
3075 3170
3076 trace_event_read_lock(); 3171 trace_event_read_lock();
3172 trace_access_lock(iter->cpu_file);
3077 while (find_next_entry_inc(iter) != NULL) { 3173 while (find_next_entry_inc(iter) != NULL) {
3078 enum print_line_t ret; 3174 enum print_line_t ret;
3079 int len = iter->seq.len; 3175 int len = iter->seq.len;
@@ -3090,6 +3186,7 @@ waitagain:
3090 if (iter->seq.len >= cnt) 3186 if (iter->seq.len >= cnt)
3091 break; 3187 break;
3092 } 3188 }
3189 trace_access_unlock(iter->cpu_file);
3093 trace_event_read_unlock(); 3190 trace_event_read_unlock();
3094 3191
3095 /* Now copy what we have to the user */ 3192 /* Now copy what we have to the user */
@@ -3215,6 +3312,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3215 } 3312 }
3216 3313
3217 trace_event_read_lock(); 3314 trace_event_read_lock();
3315 trace_access_lock(iter->cpu_file);
3218 3316
3219 /* Fill as many pages as possible. */ 3317 /* Fill as many pages as possible. */
3220 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3318 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3238,6 +3336,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3238 trace_seq_init(&iter->seq); 3336 trace_seq_init(&iter->seq);
3239 } 3337 }
3240 3338
3339 trace_access_unlock(iter->cpu_file);
3241 trace_event_read_unlock(); 3340 trace_event_read_unlock();
3242 mutex_unlock(&iter->mutex); 3341 mutex_unlock(&iter->mutex);
3243 3342
@@ -3539,10 +3638,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3539 3638
3540 info->read = 0; 3639 info->read = 0;
3541 3640
3641 trace_access_lock(info->cpu);
3542 ret = ring_buffer_read_page(info->tr->buffer, 3642 ret = ring_buffer_read_page(info->tr->buffer,
3543 &info->spare, 3643 &info->spare,
3544 count, 3644 count,
3545 info->cpu, 0); 3645 info->cpu, 0);
3646 trace_access_unlock(info->cpu);
3546 if (ret < 0) 3647 if (ret < 0)
3547 return 0; 3648 return 0;
3548 3649
@@ -3670,6 +3771,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3670 len &= PAGE_MASK; 3771 len &= PAGE_MASK;
3671 } 3772 }
3672 3773
3774 trace_access_lock(info->cpu);
3673 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3775 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3674 3776
3675 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3777 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3717,6 +3819,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3717 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3819 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3718 } 3820 }
3719 3821
3822 trace_access_unlock(info->cpu);
3720 spd.nr_pages = i; 3823 spd.nr_pages = i;
3721 3824
3722 /* did we read anything? */ 3825 /* did we read anything? */
@@ -4153,6 +4256,8 @@ static __init int tracer_init_debugfs(void)
4153 struct dentry *d_tracer; 4256 struct dentry *d_tracer;
4154 int cpu; 4257 int cpu;
4155 4258
4259 trace_access_lock_init();
4260
4156 d_tracer = tracing_init_dentry(); 4261 d_tracer = tracing_init_dentry();
4157 4262
4158 trace_create_file("tracing_enabled", 0644, d_tracer, 4263 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4176,10 +4281,10 @@ static __init int tracer_init_debugfs(void)
4176#ifdef CONFIG_TRACER_MAX_TRACE 4281#ifdef CONFIG_TRACER_MAX_TRACE
4177 trace_create_file("tracing_max_latency", 0644, d_tracer, 4282 trace_create_file("tracing_max_latency", 0644, d_tracer,
4178 &tracing_max_latency, &tracing_max_lat_fops); 4283 &tracing_max_latency, &tracing_max_lat_fops);
4284#endif
4179 4285
4180 trace_create_file("tracing_thresh", 0644, d_tracer, 4286 trace_create_file("tracing_thresh", 0644, d_tracer,
4181 &tracing_thresh, &tracing_max_lat_fops); 4287 &tracing_thresh, &tracing_max_lat_fops);
4182#endif
4183 4288
4184 trace_create_file("README", 0444, d_tracer, 4289 trace_create_file("README", 0444, d_tracer,
4185 NULL, &tracing_readme_fops); 4290 NULL, &tracing_readme_fops);
@@ -4387,9 +4492,6 @@ __init static int tracer_alloc_buffers(void)
4387 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4492 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4388 goto out_free_buffer_mask; 4493 goto out_free_buffer_mask;
4389 4494
4390 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4391 goto out_free_tracing_cpumask;
4392
4393 /* To save memory, keep the ring buffer size to its minimum */ 4495 /* To save memory, keep the ring buffer size to its minimum */
4394 if (ring_buffer_expanded) 4496 if (ring_buffer_expanded)
4395 ring_buf_size = trace_buf_size; 4497 ring_buf_size = trace_buf_size;
@@ -4447,8 +4549,6 @@ __init static int tracer_alloc_buffers(void)
4447 return 0; 4549 return 0;
4448 4550
4449out_free_cpumask: 4551out_free_cpumask:
4450 free_cpumask_var(tracing_reader_cpumask);
4451out_free_tracing_cpumask:
4452 free_cpumask_var(tracing_cpumask); 4552 free_cpumask_var(tracing_cpumask);
4453out_free_buffer_mask: 4553out_free_buffer_mask:
4454 free_cpumask_var(tracing_buffer_mask); 4554 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4df6a77eb196..2825ef2c0b15 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396 396
397extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
398 398
399extern unsigned long tracing_thresh;
400
399#ifdef CONFIG_TRACER_MAX_TRACE 401#ifdef CONFIG_TRACER_MAX_TRACE
400extern unsigned long tracing_max_latency; 402extern unsigned long tracing_max_latency;
401extern unsigned long tracing_thresh;
402 403
403void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 404void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
404void update_max_tr_single(struct trace_array *tr, 405void update_max_tr_single(struct trace_array *tr,
@@ -497,6 +498,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
497#ifdef CONFIG_DYNAMIC_FTRACE 498#ifdef CONFIG_DYNAMIC_FTRACE
498/* TODO: make this variable */ 499/* TODO: make this variable */
499#define FTRACE_GRAPH_MAX_FUNCS 32 500#define FTRACE_GRAPH_MAX_FUNCS 32
501extern int ftrace_graph_filter_enabled;
500extern int ftrace_graph_count; 502extern int ftrace_graph_count;
501extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 503extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
502 504
@@ -504,7 +506,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
504{ 506{
505 int i; 507 int i;
506 508
507 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 509 if (!ftrace_graph_filter_enabled)
508 return 1; 510 return 1;
509 511
510 for (i = 0; i < ftrace_graph_count; i++) { 512 for (i = 0; i < ftrace_graph_count; i++) {
@@ -549,7 +551,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
549 * struct trace_parser - servers for reading the user input separated by spaces 551 * struct trace_parser - servers for reading the user input separated by spaces
550 * @cont: set if the input is not complete - no final space char was found 552 * @cont: set if the input is not complete - no final space char was found
551 * @buffer: holds the parsed user input 553 * @buffer: holds the parsed user input
552 * @idx: user input lenght 554 * @idx: user input length
553 * @size: buffer size 555 * @size: buffer size
554 */ 556 */
555struct trace_parser { 557struct trace_parser {
@@ -791,7 +793,8 @@ extern const char *__stop___trace_bprintk_fmt[];
791 793
792#undef FTRACE_ENTRY 794#undef FTRACE_ENTRY
793#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 795#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
794 extern struct ftrace_event_call event_##call; 796 extern struct ftrace_event_call \
797 __attribute__((__aligned__(4))) event_##call;
795#undef FTRACE_ENTRY_DUP 798#undef FTRACE_ENTRY_DUP
796#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 799#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
797 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 800 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..b9bc4d470177 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
307 return -1; 307 return -1;
308 if (percent_a > percent_b) 308 if (percent_a > percent_b)
309 return 1; 309 return 1;
310 else 310
311 return 0; 311 if (a->incorrect < b->incorrect)
312 return -1;
313 if (a->incorrect > b->incorrect)
314 return 1;
315
316 /*
317 * Since the above shows worse (incorrect) cases
318 * first, we continue that by showing best (correct)
319 * cases last.
320 */
321 if (a->correct > b->correct)
322 return -1;
323 if (a->correct < b->correct)
324 return 1;
325
326 return 0;
312} 327}
313 328
314static struct tracer_stat annotated_branch_stats = { 329static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7ba072a..9d589d8dcd1a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
@@ -83,7 +84,7 @@ u64 notrace trace_clock_global(void)
83 int this_cpu; 84 int this_cpu;
84 u64 now; 85 u64 now;
85 86
86 raw_local_irq_save(flags); 87 local_irq_save(flags);
87 88
88 this_cpu = raw_smp_processor_id(); 89 this_cpu = raw_smp_processor_id();
89 now = cpu_clock(this_cpu); 90 now = cpu_clock(this_cpu);
@@ -109,7 +110,7 @@ u64 notrace trace_clock_global(void)
109 arch_spin_unlock(&trace_clock_struct.lock); 110 arch_spin_unlock(&trace_clock_struct.lock);
110 111
111 out: 112 out:
112 raw_local_irq_restore(flags); 113 local_irq_restore(flags);
113 114
114 return now; 115 return now;
115} 116}
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_perf.c
index f0d693005075..0565bb42566f 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_perf.c
@@ -1,32 +1,41 @@
1/* 1/*
2 * trace event based perf counter profiling 2 * trace event based perf event profiling/tracing
3 * 3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> 4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * 5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
13EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
14
15EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
12 16
13static char *perf_trace_buf; 17static char *perf_trace_buf;
14static char *perf_trace_buf_nmi; 18static char *perf_trace_buf_nmi;
15 19
16typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; 20/*
21 * Force it to be aligned to unsigned long to avoid misaligned accesses
22 * suprises
23 */
24typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
25 perf_trace_t;
17 26
18/* Count the events in use (per event id, not per instance) */ 27/* Count the events in use (per event id, not per instance) */
19static int total_profile_count; 28static int total_ref_count;
20 29
21static int ftrace_profile_enable_event(struct ftrace_event_call *event) 30static int perf_trace_event_enable(struct ftrace_event_call *event)
22{ 31{
23 char *buf; 32 char *buf;
24 int ret = -ENOMEM; 33 int ret = -ENOMEM;
25 34
26 if (event->profile_count++ > 0) 35 if (event->perf_refcount++ > 0)
27 return 0; 36 return 0;
28 37
29 if (!total_profile_count) { 38 if (!total_ref_count) {
30 buf = (char *)alloc_percpu(perf_trace_t); 39 buf = (char *)alloc_percpu(perf_trace_t);
31 if (!buf) 40 if (!buf)
32 goto fail_buf; 41 goto fail_buf;
@@ -40,35 +49,35 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
40 rcu_assign_pointer(perf_trace_buf_nmi, buf); 49 rcu_assign_pointer(perf_trace_buf_nmi, buf);
41 } 50 }
42 51
43 ret = event->profile_enable(event); 52 ret = event->perf_event_enable(event);
44 if (!ret) { 53 if (!ret) {
45 total_profile_count++; 54 total_ref_count++;
46 return 0; 55 return 0;
47 } 56 }
48 57
49fail_buf_nmi: 58fail_buf_nmi:
50 if (!total_profile_count) { 59 if (!total_ref_count) {
51 free_percpu(perf_trace_buf_nmi); 60 free_percpu(perf_trace_buf_nmi);
52 free_percpu(perf_trace_buf); 61 free_percpu(perf_trace_buf);
53 perf_trace_buf_nmi = NULL; 62 perf_trace_buf_nmi = NULL;
54 perf_trace_buf = NULL; 63 perf_trace_buf = NULL;
55 } 64 }
56fail_buf: 65fail_buf:
57 event->profile_count--; 66 event->perf_refcount--;
58 67
59 return ret; 68 return ret;
60} 69}
61 70
62int ftrace_profile_enable(int event_id) 71int perf_trace_enable(int event_id)
63{ 72{
64 struct ftrace_event_call *event; 73 struct ftrace_event_call *event;
65 int ret = -EINVAL; 74 int ret = -EINVAL;
66 75
67 mutex_lock(&event_mutex); 76 mutex_lock(&event_mutex);
68 list_for_each_entry(event, &ftrace_events, list) { 77 list_for_each_entry(event, &ftrace_events, list) {
69 if (event->id == event_id && event->profile_enable && 78 if (event->id == event_id && event->perf_event_enable &&
70 try_module_get(event->mod)) { 79 try_module_get(event->mod)) {
71 ret = ftrace_profile_enable_event(event); 80 ret = perf_trace_event_enable(event);
72 break; 81 break;
73 } 82 }
74 } 83 }
@@ -77,16 +86,16 @@ int ftrace_profile_enable(int event_id)
77 return ret; 86 return ret;
78} 87}
79 88
80static void ftrace_profile_disable_event(struct ftrace_event_call *event) 89static void perf_trace_event_disable(struct ftrace_event_call *event)
81{ 90{
82 char *buf, *nmi_buf; 91 char *buf, *nmi_buf;
83 92
84 if (--event->profile_count > 0) 93 if (--event->perf_refcount > 0)
85 return; 94 return;
86 95
87 event->profile_disable(event); 96 event->perf_event_disable(event);
88 97
89 if (!--total_profile_count) { 98 if (!--total_ref_count) {
90 buf = perf_trace_buf; 99 buf = perf_trace_buf;
91 rcu_assign_pointer(perf_trace_buf, NULL); 100 rcu_assign_pointer(perf_trace_buf, NULL);
92 101
@@ -104,14 +113,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
104 } 113 }
105} 114}
106 115
107void ftrace_profile_disable(int event_id) 116void perf_trace_disable(int event_id)
108{ 117{
109 struct ftrace_event_call *event; 118 struct ftrace_event_call *event;
110 119
111 mutex_lock(&event_mutex); 120 mutex_lock(&event_mutex);
112 list_for_each_entry(event, &ftrace_events, list) { 121 list_for_each_entry(event, &ftrace_events, list) {
113 if (event->id == event_id) { 122 if (event->id == event_id) {
114 ftrace_profile_disable_event(event); 123 perf_trace_event_disable(event);
115 module_put(event->mod); 124 module_put(event->mod);
116 break; 125 break;
117 } 126 }
@@ -119,13 +128,15 @@ void ftrace_profile_disable(int event_id)
119 mutex_unlock(&event_mutex); 128 mutex_unlock(&event_mutex);
120} 129}
121 130
122__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, 131__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
123 int *rctxp, unsigned long *irq_flags) 132 int *rctxp, unsigned long *irq_flags)
124{ 133{
125 struct trace_entry *entry; 134 struct trace_entry *entry;
126 char *trace_buf, *raw_data; 135 char *trace_buf, *raw_data;
127 int pc, cpu; 136 int pc, cpu;
128 137
138 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
139
129 pc = preempt_count(); 140 pc = preempt_count();
130 141
131 /* Protect the per cpu buffer, begin the rcu read side */ 142 /* Protect the per cpu buffer, begin the rcu read side */
@@ -138,9 +149,9 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
138 cpu = smp_processor_id(); 149 cpu = smp_processor_id();
139 150
140 if (in_nmi()) 151 if (in_nmi())
141 trace_buf = rcu_dereference(perf_trace_buf_nmi); 152 trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
142 else 153 else
143 trace_buf = rcu_dereference(perf_trace_buf); 154 trace_buf = rcu_dereference_sched(perf_trace_buf);
144 155
145 if (!trace_buf) 156 if (!trace_buf)
146 goto err; 157 goto err;
@@ -148,7 +159,7 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
148 raw_data = per_cpu_ptr(trace_buf, cpu); 159 raw_data = per_cpu_ptr(trace_buf, cpu);
149 160
150 /* zero the dead bytes from align to not leak stack to user */ 161 /* zero the dead bytes from align to not leak stack to user */
151 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 162 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
152 163
153 entry = (struct trace_entry *)raw_data; 164 entry = (struct trace_entry *)raw_data;
154 tracing_generic_entry_update(entry, *irq_flags, pc); 165 tracing_generic_entry_update(entry, *irq_flags, pc);
@@ -161,4 +172,4 @@ err_recursion:
161 local_irq_restore(*irq_flags); 172 local_irq_restore(*irq_flags);
162 return NULL; 173 return NULL;
163} 174}
164EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); 175EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 189b09baf4fb..c697c7043349 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/slab.h>
18#include <linux/delay.h> 19#include <linux/delay.h>
19 20
20#include <asm/setup.h> 21#include <asm/setup.h>
@@ -60,10 +61,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
60 return 0; 61 return 0;
61 62
62err: 63err:
63 if (field) { 64 if (field)
64 kfree(field->name); 65 kfree(field->name);
65 kfree(field->type);
66 }
67 kfree(field); 66 kfree(field);
68 67
69 return -ENOMEM; 68 return -ENOMEM;
@@ -520,41 +519,16 @@ out:
520 return ret; 519 return ret;
521} 520}
522 521
523extern char *__bad_type_size(void);
524
525#undef FIELD
526#define FIELD(type, name) \
527 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
528 #type, "common_" #name, offsetof(typeof(field), name), \
529 sizeof(field.name), is_signed_type(type)
530
531static int trace_write_header(struct trace_seq *s)
532{
533 struct trace_entry field;
534
535 /* struct trace_entry */
536 return trace_seq_printf(s,
537 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
538 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
539 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
540 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
541 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
542 "\n",
543 FIELD(unsigned short, type),
544 FIELD(unsigned char, flags),
545 FIELD(unsigned char, preempt_count),
546 FIELD(int, pid),
547 FIELD(int, lock_depth));
548}
549
550static ssize_t 522static ssize_t
551event_format_read(struct file *filp, char __user *ubuf, size_t cnt, 523event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
552 loff_t *ppos) 524 loff_t *ppos)
553{ 525{
554 struct ftrace_event_call *call = filp->private_data; 526 struct ftrace_event_call *call = filp->private_data;
527 struct ftrace_event_field *field;
555 struct trace_seq *s; 528 struct trace_seq *s;
529 int common_field_count = 5;
556 char *buf; 530 char *buf;
557 int r; 531 int r = 0;
558 532
559 if (*ppos) 533 if (*ppos)
560 return 0; 534 return 0;
@@ -565,14 +539,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
565 539
566 trace_seq_init(s); 540 trace_seq_init(s);
567 541
568 /* If any of the first writes fail, so will the show_format. */
569
570 trace_seq_printf(s, "name: %s\n", call->name); 542 trace_seq_printf(s, "name: %s\n", call->name);
571 trace_seq_printf(s, "ID: %d\n", call->id); 543 trace_seq_printf(s, "ID: %d\n", call->id);
572 trace_seq_printf(s, "format:\n"); 544 trace_seq_printf(s, "format:\n");
573 trace_write_header(s);
574 545
575 r = call->show_format(call, s); 546 list_for_each_entry_reverse(field, &call->fields, link) {
547 /*
548 * Smartly shows the array type(except dynamic array).
549 * Normal:
550 * field:TYPE VAR
551 * If TYPE := TYPE[LEN], it is shown:
552 * field:TYPE VAR[LEN]
553 */
554 const char *array_descriptor = strchr(field->type, '[');
555
556 if (!strncmp(field->type, "__data_loc", 10))
557 array_descriptor = NULL;
558
559 if (!array_descriptor) {
560 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
561 "\tsize:%u;\tsigned:%d;\n",
562 field->type, field->name, field->offset,
563 field->size, !!field->is_signed);
564 } else {
565 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
566 "\tsize:%u;\tsigned:%d;\n",
567 (int)(array_descriptor - field->type),
568 field->type, field->name,
569 array_descriptor, field->offset,
570 field->size, !!field->is_signed);
571 }
572
573 if (--common_field_count == 0)
574 r = trace_seq_printf(s, "\n");
575
576 if (!r)
577 break;
578 }
579
580 if (r)
581 r = trace_seq_printf(s, "\nprint fmt: %s\n",
582 call->print_fmt);
583
576 if (!r) { 584 if (!r) {
577 /* 585 /*
578 * ug! The format output is bigger than a PAGE!! 586 * ug! The format output is bigger than a PAGE!!
@@ -931,7 +939,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
931 trace_create_file("enable", 0644, call->dir, call, 939 trace_create_file("enable", 0644, call->dir, call,
932 enable); 940 enable);
933 941
934 if (call->id && call->profile_enable) 942 if (call->id && call->perf_event_enable)
935 trace_create_file("id", 0444, call->dir, call, 943 trace_create_file("id", 0444, call->dir, call,
936 id); 944 id);
937 945
@@ -948,10 +956,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
948 filter); 956 filter);
949 } 957 }
950 958
951 /* A trace may not want to export its format */
952 if (!call->show_format)
953 return 0;
954
955 trace_create_file("format", 0444, call->dir, call, 959 trace_create_file("format", 0444, call->dir, call,
956 format); 960 format);
957 961
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 4615f62a04f1..88c0b6dbd7fe 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,6 +22,7 @@
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h> 24#include <linux/perf_event.h>
25#include <linux/slab.h>
25 26
26#include "trace.h" 27#include "trace.h"
27#include "trace_output.h" 28#include "trace_output.h"
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d4fa5dc1ee4e..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \
62 62
63#include "trace_entries.h" 63#include "trace_entries.h"
64 64
65
66#undef __field
67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \
71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \
73 return 0;
74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item), \
81 is_signed_type(type)); \
82 if (!ret) \
83 return 0;
84
85#undef __array
86#define __array(type, item, len) \
87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
89 offsetof(typeof(field), item), \
90 sizeof(field.item), is_signed_type(type)); \
91 if (!ret) \
92 return 0;
93
94#undef __array_desc
95#define __array_desc(type, container, item, len) \
96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
98 offsetof(typeof(field), container.item), \
99 sizeof(field.container.item), \
100 is_signed_type(type)); \
101 if (!ret) \
102 return 0;
103
104#undef __dynamic_array
105#define __dynamic_array(type, item) \
106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
110 if (!ret) \
111 return 0;
112
113#undef F_printk
114#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
115
116#undef __entry
117#define __entry REC
118
119#undef FTRACE_ENTRY
120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
121static int \
122ftrace_format_##name(struct ftrace_event_call *unused, \
123 struct trace_seq *s) \
124{ \
125 struct struct_name field __attribute__((unused)); \
126 int ret = 0; \
127 \
128 tstruct; \
129 \
130 trace_seq_printf(s, "\nprint fmt: " print); \
131 \
132 return ret; \
133}
134
135#include "trace_entries.h"
136
137#undef __field 65#undef __field
138#define __field(type, item) \ 66#define __field(type, item) \
139 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
@@ -175,7 +103,12 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
175 return ret; 103 return ret;
176 104
177#undef __dynamic_array 105#undef __dynamic_array
178#define __dynamic_array(type, item) 106#define __dynamic_array(type, item) \
107 ret = trace_define_field(event_call, #type, #item, \
108 offsetof(typeof(field), item), \
109 0, is_signed_type(type), FILTER_OTHER);\
110 if (ret) \
111 return ret;
179 112
180#undef FTRACE_ENTRY 113#undef FTRACE_ENTRY
181#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 114#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -198,6 +131,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
198 return 0; 131 return 0;
199} 132}
200 133
134#undef __entry
135#define __entry REC
136
201#undef __field 137#undef __field
202#define __field(type, item) 138#define __field(type, item)
203 139
@@ -213,6 +149,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
213#undef __dynamic_array 149#undef __dynamic_array
214#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
215 151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
216#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
217#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
218 \ 157 \
@@ -223,7 +162,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
223 .id = type, \ 162 .id = type, \
224 .system = __stringify(TRACE_SYSTEM), \ 163 .system = __stringify(TRACE_SYSTEM), \
225 .raw_init = ftrace_raw_init_event, \ 164 .raw_init = ftrace_raw_init_event, \
226 .show_format = ftrace_format_##call, \ 165 .print_fmt = print, \
227 .define_fields = ftrace_define_fields_##call, \ 166 .define_fields = ftrace_define_fields_##call, \
228}; \ 167}; \
229 168
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1342c5d37cf..9aed1a5cf553 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -9,6 +9,7 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/slab.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14#include "trace.h" 15#include "trace.h"
@@ -18,6 +19,7 @@ struct fgraph_cpu_data {
18 pid_t last_pid; 19 pid_t last_pid;
19 int depth; 20 int depth;
20 int ignore; 21 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
21}; 23};
22 24
23struct fgraph_data { 25struct fgraph_data {
@@ -187,7 +189,7 @@ static int __trace_graph_entry(struct trace_array *tr,
187 struct ring_buffer *buffer = tr->buffer; 189 struct ring_buffer *buffer = tr->buffer;
188 struct ftrace_graph_ent_entry *entry; 190 struct ftrace_graph_ent_entry *entry;
189 191
190 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 192 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
191 return 0; 193 return 0;
192 194
193 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 195 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -212,13 +214,11 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
212 int cpu; 214 int cpu;
213 int pc; 215 int pc;
214 216
215 if (unlikely(!tr))
216 return 0;
217
218 if (!ftrace_trace_task(current)) 217 if (!ftrace_trace_task(current))
219 return 0; 218 return 0;
220 219
221 if (!ftrace_graph_addr(trace->func)) 220 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func)))
222 return 0; 222 return 0;
223 223
224 local_irq_save(flags); 224 local_irq_save(flags);
@@ -231,9 +231,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
231 } else { 231 } else {
232 ret = 0; 232 ret = 0;
233 } 233 }
234 /* Only do the atomic if it is not already set */
235 if (!test_tsk_trace_graph(current))
236 set_tsk_trace_graph(current);
237 234
238 atomic_dec(&data->disabled); 235 atomic_dec(&data->disabled);
239 local_irq_restore(flags); 236 local_irq_restore(flags);
@@ -241,6 +238,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
241 return ret; 238 return ret;
242} 239}
243 240
241int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
242{
243 if (tracing_thresh)
244 return 1;
245 else
246 return trace_graph_entry(trace);
247}
248
244static void __trace_graph_return(struct trace_array *tr, 249static void __trace_graph_return(struct trace_array *tr,
245 struct ftrace_graph_ret *trace, 250 struct ftrace_graph_ret *trace,
246 unsigned long flags, 251 unsigned long flags,
@@ -251,7 +256,7 @@ static void __trace_graph_return(struct trace_array *tr,
251 struct ring_buffer *buffer = tr->buffer; 256 struct ring_buffer *buffer = tr->buffer;
252 struct ftrace_graph_ret_entry *entry; 257 struct ftrace_graph_ret_entry *entry;
253 258
254 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 259 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
255 return; 260 return;
256 261
257 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 262 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -281,19 +286,39 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
281 pc = preempt_count(); 286 pc = preempt_count();
282 __trace_graph_return(tr, trace, flags, pc); 287 __trace_graph_return(tr, trace, flags, pc);
283 } 288 }
284 if (!trace->depth)
285 clear_tsk_trace_graph(current);
286 atomic_dec(&data->disabled); 289 atomic_dec(&data->disabled);
287 local_irq_restore(flags); 290 local_irq_restore(flags);
288} 291}
289 292
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296
297 /* Make graph_array visible before we start tracing */
298
299 smp_mb();
300}
301
302void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
303{
304 if (tracing_thresh &&
305 (trace->rettime - trace->calltime < tracing_thresh))
306 return;
307 else
308 trace_graph_return(trace);
309}
310
290static int graph_trace_init(struct trace_array *tr) 311static int graph_trace_init(struct trace_array *tr)
291{ 312{
292 int ret; 313 int ret;
293 314
294 graph_array = tr; 315 set_graph_array(tr);
295 ret = register_ftrace_graph(&trace_graph_return, 316 if (tracing_thresh)
296 &trace_graph_entry); 317 ret = register_ftrace_graph(&trace_graph_thresh_return,
318 &trace_graph_thresh_entry);
319 else
320 ret = register_ftrace_graph(&trace_graph_return,
321 &trace_graph_entry);
297 if (ret) 322 if (ret)
298 return ret; 323 return ret;
299 tracing_start_cmdline_record(); 324 tracing_start_cmdline_record();
@@ -301,11 +326,6 @@ static int graph_trace_init(struct trace_array *tr)
301 return 0; 326 return 0;
302} 327}
303 328
304void set_graph_array(struct trace_array *tr)
305{
306 graph_array = tr;
307}
308
309static void graph_trace_reset(struct trace_array *tr) 329static void graph_trace_reset(struct trace_array *tr)
310{ 330{
311 tracing_stop_cmdline_record(); 331 tracing_stop_cmdline_record();
@@ -673,15 +693,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
673 duration = graph_ret->rettime - graph_ret->calltime; 693 duration = graph_ret->rettime - graph_ret->calltime;
674 694
675 if (data) { 695 if (data) {
696 struct fgraph_cpu_data *cpu_data;
676 int cpu = iter->cpu; 697 int cpu = iter->cpu;
677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 698
699 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
678 700
679 /* 701 /*
680 * Comments display at + 1 to depth. Since 702 * Comments display at + 1 to depth. Since
681 * this is a leaf function, keep the comments 703 * this is a leaf function, keep the comments
682 * equal to this depth. 704 * equal to this depth.
683 */ 705 */
684 *depth = call->depth - 1; 706 cpu_data->depth = call->depth - 1;
707
708 /* No need to keep this function around for this depth */
709 if (call->depth < FTRACE_RETFUNC_DEPTH)
710 cpu_data->enter_funcs[call->depth] = 0;
685 } 711 }
686 712
687 /* Overhead */ 713 /* Overhead */
@@ -721,10 +747,15 @@ print_graph_entry_nested(struct trace_iterator *iter,
721 int i; 747 int i;
722 748
723 if (data) { 749 if (data) {
750 struct fgraph_cpu_data *cpu_data;
724 int cpu = iter->cpu; 751 int cpu = iter->cpu;
725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
726 752
727 *depth = call->depth; 753 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
754 cpu_data->depth = call->depth;
755
756 /* Save this function pointer to see if the exit matches */
757 if (call->depth < FTRACE_RETFUNC_DEPTH)
758 cpu_data->enter_funcs[call->depth] = call->func;
728 } 759 }
729 760
730 /* No overhead */ 761 /* No overhead */
@@ -854,19 +885,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
854 struct fgraph_data *data = iter->private; 885 struct fgraph_data *data = iter->private;
855 pid_t pid = ent->pid; 886 pid_t pid = ent->pid;
856 int cpu = iter->cpu; 887 int cpu = iter->cpu;
888 int func_match = 1;
857 int ret; 889 int ret;
858 int i; 890 int i;
859 891
860 if (data) { 892 if (data) {
893 struct fgraph_cpu_data *cpu_data;
861 int cpu = iter->cpu; 894 int cpu = iter->cpu;
862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 895
896 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
863 897
864 /* 898 /*
865 * Comments display at + 1 to depth. This is the 899 * Comments display at + 1 to depth. This is the
866 * return from a function, we now want the comments 900 * return from a function, we now want the comments
867 * to display at the same level of the bracket. 901 * to display at the same level of the bracket.
868 */ 902 */
869 *depth = trace->depth - 1; 903 cpu_data->depth = trace->depth - 1;
904
905 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
906 if (cpu_data->enter_funcs[trace->depth] != trace->func)
907 func_match = 0;
908 cpu_data->enter_funcs[trace->depth] = 0;
909 }
870 } 910 }
871 911
872 if (print_graph_prologue(iter, s, 0, 0)) 912 if (print_graph_prologue(iter, s, 0, 0))
@@ -891,9 +931,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
891 return TRACE_TYPE_PARTIAL_LINE; 931 return TRACE_TYPE_PARTIAL_LINE;
892 } 932 }
893 933
894 ret = trace_seq_printf(s, "}\n"); 934 /*
895 if (!ret) 935 * If the return function does not have a matching entry,
896 return TRACE_TYPE_PARTIAL_LINE; 936 * then the entry was lost. Instead of just printing
937 * the '}' and letting the user guess what function this
938 * belongs to, write out the function name.
939 */
940 if (func_match) {
941 ret = trace_seq_printf(s, "}\n");
942 if (!ret)
943 return TRACE_TYPE_PARTIAL_LINE;
944 } else {
945 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
946 if (!ret)
947 return TRACE_TYPE_PARTIAL_LINE;
948 }
897 949
898 /* Overrun */ 950 /* Overrun */
899 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 951 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6178abf3637e..1251e367bae9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -635,12 +635,12 @@ static int create_trace_probe(int argc, char **argv)
635 event = strchr(group, '/') + 1; 635 event = strchr(group, '/') + 1;
636 event[-1] = '\0'; 636 event[-1] = '\0';
637 if (strlen(group) == 0) { 637 if (strlen(group) == 0) {
638 pr_info("Group name is not specifiled\n"); 638 pr_info("Group name is not specified\n");
639 return -EINVAL; 639 return -EINVAL;
640 } 640 }
641 } 641 }
642 if (strlen(event) == 0) { 642 if (strlen(event) == 0) {
643 pr_info("Event name is not specifiled\n"); 643 pr_info("Event name is not specified\n");
644 return -EINVAL; 644 return -EINVAL;
645 } 645 }
646 } 646 }
@@ -673,7 +673,7 @@ static int create_trace_probe(int argc, char **argv)
673 return -EINVAL; 673 return -EINVAL;
674 } 674 }
675 /* an address specified */ 675 /* an address specified */
676 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); 676 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
677 if (ret) { 677 if (ret) {
678 pr_info("Failed to parse address.\n"); 678 pr_info("Failed to parse address.\n");
679 return ret; 679 return ret;
@@ -1155,86 +1155,66 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1155 return 0; 1155 return 0;
1156} 1156}
1157 1157
1158static int __probe_event_show_format(struct trace_seq *s, 1158static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1159 struct trace_probe *tp, const char *fmt,
1160 const char *arg)
1161{ 1159{
1162 int i; 1160 int i;
1161 int pos = 0;
1163 1162
1164 /* Show format */ 1163 const char *fmt, *arg;
1165 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1166 return 0;
1167 1164
1168 for (i = 0; i < tp->nr_args; i++) 1165 if (!probe_is_return(tp)) {
1169 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) 1166 fmt = "(%lx)";
1170 return 0; 1167 arg = "REC->" FIELD_STRING_IP;
1168 } else {
1169 fmt = "(%lx <- %lx)";
1170 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1171 }
1171 1172
1172 if (!trace_seq_printf(s, "\", %s", arg)) 1173 /* When len=0, we just calculate the needed length */
1173 return 0; 1174#define LEN_OR_ZERO (len ? len - pos : 0)
1174 1175
1175 for (i = 0; i < tp->nr_args; i++) 1176 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1176 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1177 return 0;
1178 1177
1179 return trace_seq_puts(s, "\n"); 1178 for (i = 0; i < tp->nr_args; i++) {
1180} 1179 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx",
1180 tp->args[i].name);
1181 }
1181 1182
1182#undef SHOW_FIELD 1183 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1183#define SHOW_FIELD(type, item, name) \
1184 do { \
1185 ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \
1186 "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
1187 (unsigned int)offsetof(typeof(field), item),\
1188 (unsigned int)sizeof(type), \
1189 is_signed_type(type)); \
1190 if (!ret) \
1191 return 0; \
1192 } while (0)
1193 1184
1194static int kprobe_event_show_format(struct ftrace_event_call *call, 1185 for (i = 0; i < tp->nr_args; i++) {
1195 struct trace_seq *s) 1186 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1196{ 1187 tp->args[i].name);
1197 struct kprobe_trace_entry field __attribute__((unused)); 1188 }
1198 int ret, i;
1199 struct trace_probe *tp = (struct trace_probe *)call->data;
1200
1201 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
1202 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1203 1189
1204 /* Show fields */ 1190#undef LEN_OR_ZERO
1205 for (i = 0; i < tp->nr_args; i++)
1206 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1207 trace_seq_puts(s, "\n");
1208 1191
1209 return __probe_event_show_format(s, tp, "(%lx)", 1192 /* return the length of print_fmt */
1210 "REC->" FIELD_STRING_IP); 1193 return pos;
1211} 1194}
1212 1195
1213static int kretprobe_event_show_format(struct ftrace_event_call *call, 1196static int set_print_fmt(struct trace_probe *tp)
1214 struct trace_seq *s)
1215{ 1197{
1216 struct kretprobe_trace_entry field __attribute__((unused)); 1198 int len;
1217 int ret, i; 1199 char *print_fmt;
1218 struct trace_probe *tp = (struct trace_probe *)call->data;
1219 1200
1220 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); 1201 /* First: called with 0 length to calculate the needed length */
1221 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); 1202 len = __set_print_fmt(tp, NULL, 0);
1222 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1203 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1204 if (!print_fmt)
1205 return -ENOMEM;
1223 1206
1224 /* Show fields */ 1207 /* Second: actually write the @print_fmt */
1225 for (i = 0; i < tp->nr_args; i++) 1208 __set_print_fmt(tp, print_fmt, len + 1);
1226 SHOW_FIELD(unsigned long, args[i], tp->args[i].name); 1209 tp->call.print_fmt = print_fmt;
1227 trace_seq_puts(s, "\n");
1228 1210
1229 return __probe_event_show_format(s, tp, "(%lx <- %lx)", 1211 return 0;
1230 "REC->" FIELD_STRING_FUNC
1231 ", REC->" FIELD_STRING_RETIP);
1232} 1212}
1233 1213
1234#ifdef CONFIG_PERF_EVENTS 1214#ifdef CONFIG_PERF_EVENTS
1235 1215
1236/* Kprobe profile handler */ 1216/* Kprobe profile handler */
1237static __kprobes void kprobe_profile_func(struct kprobe *kp, 1217static __kprobes void kprobe_perf_func(struct kprobe *kp,
1238 struct pt_regs *regs) 1218 struct pt_regs *regs)
1239{ 1219{
1240 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
@@ -1247,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
1247 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1248 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1249 size -= sizeof(u32); 1229 size -= sizeof(u32);
1250 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1230 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1251 "profile buffer not large enough")) 1231 "profile buffer not large enough"))
1252 return; 1232 return;
1253 1233
1254 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); 1234 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1255 if (!entry) 1235 if (!entry)
1256 return; 1236 return;
1257 1237
@@ -1260,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
1260 for (i = 0; i < tp->nr_args; i++) 1240 for (i = 0; i < tp->nr_args; i++)
1261 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1262 1242
1263 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); 1243 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
1264} 1244}
1265 1245
1266/* Kretprobe profile handler */ 1246/* Kretprobe profile handler */
1267static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, 1247static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1268 struct pt_regs *regs) 1248 struct pt_regs *regs)
1269{ 1249{
1270 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1277,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1277 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1278 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1279 size -= sizeof(u32); 1259 size -= sizeof(u32);
1280 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1260 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1281 "profile buffer not large enough")) 1261 "profile buffer not large enough"))
1282 return; 1262 return;
1283 1263
1284 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); 1264 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1285 if (!entry) 1265 if (!entry)
1286 return; 1266 return;
1287 1267
@@ -1291,10 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1291 for (i = 0; i < tp->nr_args; i++) 1271 for (i = 0; i < tp->nr_args; i++)
1292 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1293 1273
1294 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); 1274 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
1275 irq_flags, regs);
1295} 1276}
1296 1277
1297static int probe_profile_enable(struct ftrace_event_call *call) 1278static int probe_perf_enable(struct ftrace_event_call *call)
1298{ 1279{
1299 struct trace_probe *tp = (struct trace_probe *)call->data; 1280 struct trace_probe *tp = (struct trace_probe *)call->data;
1300 1281
@@ -1306,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
1306 return enable_kprobe(&tp->rp.kp); 1287 return enable_kprobe(&tp->rp.kp);
1307} 1288}
1308 1289
1309static void probe_profile_disable(struct ftrace_event_call *call) 1290static void probe_perf_disable(struct ftrace_event_call *call)
1310{ 1291{
1311 struct trace_probe *tp = (struct trace_probe *)call->data; 1292 struct trace_probe *tp = (struct trace_probe *)call->data;
1312 1293
@@ -1331,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1331 kprobe_trace_func(kp, regs); 1312 kprobe_trace_func(kp, regs);
1332#ifdef CONFIG_PERF_EVENTS 1313#ifdef CONFIG_PERF_EVENTS
1333 if (tp->flags & TP_FLAG_PROFILE) 1314 if (tp->flags & TP_FLAG_PROFILE)
1334 kprobe_profile_func(kp, regs); 1315 kprobe_perf_func(kp, regs);
1335#endif 1316#endif
1336 return 0; /* We don't tweek kernel, so just return 0 */ 1317 return 0; /* We don't tweek kernel, so just return 0 */
1337} 1318}
@@ -1345,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1345 kretprobe_trace_func(ri, regs); 1326 kretprobe_trace_func(ri, regs);
1346#ifdef CONFIG_PERF_EVENTS 1327#ifdef CONFIG_PERF_EVENTS
1347 if (tp->flags & TP_FLAG_PROFILE) 1328 if (tp->flags & TP_FLAG_PROFILE)
1348 kretprobe_profile_func(ri, regs); 1329 kretprobe_perf_func(ri, regs);
1349#endif 1330#endif
1350 return 0; /* We don't tweek kernel, so just return 0 */ 1331 return 0; /* We don't tweek kernel, so just return 0 */
1351} 1332}
@@ -1359,30 +1340,33 @@ static int register_probe_event(struct trace_probe *tp)
1359 if (probe_is_return(tp)) { 1340 if (probe_is_return(tp)) {
1360 tp->event.trace = print_kretprobe_event; 1341 tp->event.trace = print_kretprobe_event;
1361 call->raw_init = probe_event_raw_init; 1342 call->raw_init = probe_event_raw_init;
1362 call->show_format = kretprobe_event_show_format;
1363 call->define_fields = kretprobe_event_define_fields; 1343 call->define_fields = kretprobe_event_define_fields;
1364 } else { 1344 } else {
1365 tp->event.trace = print_kprobe_event; 1345 tp->event.trace = print_kprobe_event;
1366 call->raw_init = probe_event_raw_init; 1346 call->raw_init = probe_event_raw_init;
1367 call->show_format = kprobe_event_show_format;
1368 call->define_fields = kprobe_event_define_fields; 1347 call->define_fields = kprobe_event_define_fields;
1369 } 1348 }
1349 if (set_print_fmt(tp) < 0)
1350 return -ENOMEM;
1370 call->event = &tp->event; 1351 call->event = &tp->event;
1371 call->id = register_ftrace_event(&tp->event); 1352 call->id = register_ftrace_event(&tp->event);
1372 if (!call->id) 1353 if (!call->id) {
1354 kfree(call->print_fmt);
1373 return -ENODEV; 1355 return -ENODEV;
1356 }
1374 call->enabled = 0; 1357 call->enabled = 0;
1375 call->regfunc = probe_event_enable; 1358 call->regfunc = probe_event_enable;
1376 call->unregfunc = probe_event_disable; 1359 call->unregfunc = probe_event_disable;
1377 1360
1378#ifdef CONFIG_PERF_EVENTS 1361#ifdef CONFIG_PERF_EVENTS
1379 call->profile_enable = probe_profile_enable; 1362 call->perf_event_enable = probe_perf_enable;
1380 call->profile_disable = probe_profile_disable; 1363 call->perf_event_disable = probe_perf_disable;
1381#endif 1364#endif
1382 call->data = tp; 1365 call->data = tp;
1383 ret = trace_add_event_call(call); 1366 ret = trace_add_event_call(call);
1384 if (ret) { 1367 if (ret) {
1385 pr_info("Failed to register kprobe event: %s\n", call->name); 1368 pr_info("Failed to register kprobe event: %s\n", call->name);
1369 kfree(call->print_fmt);
1386 unregister_ftrace_event(&tp->event); 1370 unregister_ftrace_event(&tp->event);
1387 } 1371 }
1388 return ret; 1372 return ret;
@@ -1392,6 +1376,7 @@ static void unregister_probe_event(struct trace_probe *tp)
1392{ 1376{
1393 /* tp->event is unregistered in trace_remove_event_call() */ 1377 /* tp->event is unregistered in trace_remove_event_call() */
1394 trace_remove_event_call(&tp->call); 1378 trace_remove_event_call(&tp->call);
1379 kfree(tp->call.print_fmt);
1395} 1380}
1396 1381
1397/* Make a debugfs interface for controling probe points */ 1382/* Make a debugfs interface for controling probe points */
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 94103cdcf9d8..d59cd6879477 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -23,6 +23,7 @@
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/slab.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
27 28
28#include "trace_output.h" 29#include "trace_output.h"
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0acd834659ed..017fa376505d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/slab.h>
12#include <linux/time.h> 13#include <linux/time.h>
13 14
14#include <asm/atomic.h> 15#include <asm/atomic.h>
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 280fea470d67..81003b4d617f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -3,6 +3,7 @@
3#include <linux/stringify.h> 3#include <linux/stringify.h>
4#include <linux/kthread.h> 4#include <linux/kthread.h>
5#include <linux/delay.h> 5#include <linux/delay.h>
6#include <linux/slab.h>
6 7
7static inline int trace_valid_entry(struct trace_entry *entry) 8static inline int trace_valid_entry(struct trace_entry *entry)
8{ 9{
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
174 arch_spin_lock(&max_stack_lock); 184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 arch_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 218
207static void *t_start(struct seq_file *m, loff_t *pos) 219static void *t_start(struct seq_file *m, loff_t *pos)
208{ 220{
221 int cpu;
222
209 local_irq_disable(); 223 local_irq_disable();
224
225 cpu = smp_processor_id();
226 per_cpu(trace_active, cpu)++;
227
210 arch_spin_lock(&max_stack_lock); 228 arch_spin_lock(&max_stack_lock);
211 229
212 if (*pos == 0) 230 if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 235
218static void t_stop(struct seq_file *m, void *p) 236static void t_stop(struct seq_file *m, void *p)
219{ 237{
238 int cpu;
239
220 arch_spin_unlock(&max_stack_lock); 240 arch_spin_unlock(&max_stack_lock);
241
242 cpu = smp_processor_id();
243 per_cpu(trace_active, cpu)--;
244
221 local_irq_enable(); 245 local_irq_enable();
222} 246}
223 247
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index a4bb239eb987..96cffb269e73 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -10,6 +10,7 @@
10 10
11 11
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/slab.h>
13#include <linux/rbtree.h> 14#include <linux/rbtree.h>
14#include <linux/debugfs.h> 15#include <linux/debugfs.h>
15#include "trace_stat.h" 16#include "trace_stat.h"
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 4e332b9e449c..4d6d711717f2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,6 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/slab.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
4#include <linux/ftrace.h> 5#include <linux/ftrace.h>
5#include <linux/perf_event.h> 6#include <linux/perf_event.h>
@@ -143,70 +144,65 @@ extern char *__bad_type_size(void);
143 #type, #name, offsetof(typeof(trace), name), \ 144 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type) 145 sizeof(trace.name), is_signed_type(type)
145 146
146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 147static
148int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
147{ 149{
148 int i; 150 int i;
149 int ret; 151 int pos = 0;
150 struct syscall_metadata *entry = call->data;
151 struct syscall_trace_enter trace;
152 int offset = offsetof(struct syscall_trace_enter, args);
153 152
154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 153 /* When len=0, we just calculate the needed length */
155 "\tsigned:%u;\n", 154#define LEN_OR_ZERO (len ? len - pos : 0)
156 SYSCALL_FIELD(int, nr));
157 if (!ret)
158 return 0;
159 155
156 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
160 for (i = 0; i < entry->nb_args; i++) { 157 for (i = 0; i < entry->nb_args; i++) {
161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 158 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
162 entry->args[i]); 159 entry->args[i], sizeof(unsigned long),
163 if (!ret) 160 i == entry->nb_args - 1 ? "" : ", ");
164 return 0;
165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
169 if (!ret)
170 return 0;
171 offset += sizeof(unsigned long);
172 } 161 }
162 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
173 163
174 trace_seq_puts(s, "\nprint fmt: \"");
175 for (i = 0; i < entry->nb_args; i++) { 164 for (i = 0; i < entry->nb_args; i++) {
176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 165 pos += snprintf(buf + pos, LEN_OR_ZERO,
177 sizeof(unsigned long), 166 ", ((unsigned long)(REC->%s))", entry->args[i]);
178 i == entry->nb_args - 1 ? "" : ", ");
179 if (!ret)
180 return 0;
181 } 167 }
182 trace_seq_putc(s, '"');
183 168
184 for (i = 0; i < entry->nb_args; i++) { 169#undef LEN_OR_ZERO
185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
186 entry->args[i]);
187 if (!ret)
188 return 0;
189 }
190 170
191 return trace_seq_putc(s, '\n'); 171 /* return the length of print_fmt */
172 return pos;
192} 173}
193 174
194int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 175static int set_syscall_print_fmt(struct ftrace_event_call *call)
195{ 176{
196 int ret; 177 char *print_fmt;
197 struct syscall_trace_exit trace; 178 int len;
179 struct syscall_metadata *entry = call->data;
198 180
199 ret = trace_seq_printf(s, 181 if (entry->enter_event != call) {
200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 182 call->print_fmt = "\"0x%lx\", REC->ret";
201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
204 SYSCALL_FIELD(int, nr),
205 SYSCALL_FIELD(long, ret));
206 if (!ret)
207 return 0; 183 return 0;
184 }
208 185
209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 186 /* First: called with 0 length to calculate the needed length */
187 len = __set_enter_print_fmt(entry, NULL, 0);
188
189 print_fmt = kmalloc(len + 1, GFP_KERNEL);
190 if (!print_fmt)
191 return -ENOMEM;
192
193 /* Second: actually write the @print_fmt */
194 __set_enter_print_fmt(entry, print_fmt, len + 1);
195 call->print_fmt = print_fmt;
196
197 return 0;
198}
199
200static void free_syscall_print_fmt(struct ftrace_event_call *call)
201{
202 struct syscall_metadata *entry = call->data;
203
204 if (entry->enter_event == call)
205 kfree(call->print_fmt);
210} 206}
211 207
212int syscall_enter_define_fields(struct ftrace_event_call *call) 208int syscall_enter_define_fields(struct ftrace_event_call *call)
@@ -386,12 +382,22 @@ int init_syscall_trace(struct ftrace_event_call *call)
386{ 382{
387 int id; 383 int id;
388 384
389 id = register_ftrace_event(call->event); 385 if (set_syscall_print_fmt(call) < 0)
390 if (!id) 386 return -ENOMEM;
391 return -ENODEV; 387
392 call->id = id; 388 id = trace_event_raw_init(call);
393 INIT_LIST_HEAD(&call->fields); 389
394 return 0; 390 if (id < 0) {
391 free_syscall_print_fmt(call);
392 return id;
393 }
394
395 return id;
396}
397
398unsigned long __init arch_syscall_addr(int nr)
399{
400 return (unsigned long)sys_call_table[nr];
395} 401}
396 402
397int __init init_ftrace_syscalls(void) 403int __init init_ftrace_syscalls(void)
@@ -423,12 +429,12 @@ core_initcall(init_ftrace_syscalls);
423 429
424#ifdef CONFIG_PERF_EVENTS 430#ifdef CONFIG_PERF_EVENTS
425 431
426static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 432static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
427static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 433static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
428static int sys_prof_refcount_enter; 434static int sys_perf_refcount_enter;
429static int sys_prof_refcount_exit; 435static int sys_perf_refcount_exit;
430 436
431static void prof_syscall_enter(struct pt_regs *regs, long id) 437static void perf_syscall_enter(struct pt_regs *regs, long id)
432{ 438{
433 struct syscall_metadata *sys_data; 439 struct syscall_metadata *sys_data;
434 struct syscall_trace_enter *rec; 440 struct syscall_trace_enter *rec;
@@ -438,7 +444,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
438 int size; 444 int size;
439 445
440 syscall_nr = syscall_get_nr(current, regs); 446 syscall_nr = syscall_get_nr(current, regs);
441 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 447 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
442 return; 448 return;
443 449
444 sys_data = syscall_nr_to_meta(syscall_nr); 450 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -450,11 +456,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
450 size = ALIGN(size + sizeof(u32), sizeof(u64)); 456 size = ALIGN(size + sizeof(u32), sizeof(u64));
451 size -= sizeof(u32); 457 size -= sizeof(u32);
452 458
453 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 459 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
454 "profile buffer not large enough")) 460 "perf buffer not large enough"))
455 return; 461 return;
456 462
457 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, 463 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
458 sys_data->enter_event->id, &rctx, &flags); 464 sys_data->enter_event->id, &rctx, &flags);
459 if (!rec) 465 if (!rec)
460 return; 466 return;
@@ -462,10 +468,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
462 rec->nr = syscall_nr; 468 rec->nr = syscall_nr;
463 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 469 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
464 (unsigned long *)&rec->args); 470 (unsigned long *)&rec->args);
465 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 471 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
466} 472}
467 473
468int prof_sysenter_enable(struct ftrace_event_call *call) 474int perf_sysenter_enable(struct ftrace_event_call *call)
469{ 475{
470 int ret = 0; 476 int ret = 0;
471 int num; 477 int num;
@@ -473,34 +479,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
473 num = ((struct syscall_metadata *)call->data)->syscall_nr; 479 num = ((struct syscall_metadata *)call->data)->syscall_nr;
474 480
475 mutex_lock(&syscall_trace_lock); 481 mutex_lock(&syscall_trace_lock);
476 if (!sys_prof_refcount_enter) 482 if (!sys_perf_refcount_enter)
477 ret = register_trace_sys_enter(prof_syscall_enter); 483 ret = register_trace_sys_enter(perf_syscall_enter);
478 if (ret) { 484 if (ret) {
479 pr_info("event trace: Could not activate" 485 pr_info("event trace: Could not activate"
480 "syscall entry trace point"); 486 "syscall entry trace point");
481 } else { 487 } else {
482 set_bit(num, enabled_prof_enter_syscalls); 488 set_bit(num, enabled_perf_enter_syscalls);
483 sys_prof_refcount_enter++; 489 sys_perf_refcount_enter++;
484 } 490 }
485 mutex_unlock(&syscall_trace_lock); 491 mutex_unlock(&syscall_trace_lock);
486 return ret; 492 return ret;
487} 493}
488 494
489void prof_sysenter_disable(struct ftrace_event_call *call) 495void perf_sysenter_disable(struct ftrace_event_call *call)
490{ 496{
491 int num; 497 int num;
492 498
493 num = ((struct syscall_metadata *)call->data)->syscall_nr; 499 num = ((struct syscall_metadata *)call->data)->syscall_nr;
494 500
495 mutex_lock(&syscall_trace_lock); 501 mutex_lock(&syscall_trace_lock);
496 sys_prof_refcount_enter--; 502 sys_perf_refcount_enter--;
497 clear_bit(num, enabled_prof_enter_syscalls); 503 clear_bit(num, enabled_perf_enter_syscalls);
498 if (!sys_prof_refcount_enter) 504 if (!sys_perf_refcount_enter)
499 unregister_trace_sys_enter(prof_syscall_enter); 505 unregister_trace_sys_enter(perf_syscall_enter);
500 mutex_unlock(&syscall_trace_lock); 506 mutex_unlock(&syscall_trace_lock);
501} 507}
502 508
503static void prof_syscall_exit(struct pt_regs *regs, long ret) 509static void perf_syscall_exit(struct pt_regs *regs, long ret)
504{ 510{
505 struct syscall_metadata *sys_data; 511 struct syscall_metadata *sys_data;
506 struct syscall_trace_exit *rec; 512 struct syscall_trace_exit *rec;
@@ -510,7 +516,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
510 int size; 516 int size;
511 517
512 syscall_nr = syscall_get_nr(current, regs); 518 syscall_nr = syscall_get_nr(current, regs);
513 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 519 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
514 return; 520 return;
515 521
516 sys_data = syscall_nr_to_meta(syscall_nr); 522 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -525,11 +531,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
525 * Impossible, but be paranoid with the future 531 * Impossible, but be paranoid with the future
526 * How to put this check outside runtime? 532 * How to put this check outside runtime?
527 */ 533 */
528 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 534 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
529 "exit event has grown above profile buffer size")) 535 "exit event has grown above perf buffer size"))
530 return; 536 return;
531 537
532 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, 538 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
533 sys_data->exit_event->id, &rctx, &flags); 539 sys_data->exit_event->id, &rctx, &flags);
534 if (!rec) 540 if (!rec)
535 return; 541 return;
@@ -537,10 +543,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
537 rec->nr = syscall_nr; 543 rec->nr = syscall_nr;
538 rec->ret = syscall_get_return_value(current, regs); 544 rec->ret = syscall_get_return_value(current, regs);
539 545
540 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 546 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
541} 547}
542 548
543int prof_sysexit_enable(struct ftrace_event_call *call) 549int perf_sysexit_enable(struct ftrace_event_call *call)
544{ 550{
545 int ret = 0; 551 int ret = 0;
546 int num; 552 int num;
@@ -548,30 +554,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
548 num = ((struct syscall_metadata *)call->data)->syscall_nr; 554 num = ((struct syscall_metadata *)call->data)->syscall_nr;
549 555
550 mutex_lock(&syscall_trace_lock); 556 mutex_lock(&syscall_trace_lock);
551 if (!sys_prof_refcount_exit) 557 if (!sys_perf_refcount_exit)
552 ret = register_trace_sys_exit(prof_syscall_exit); 558 ret = register_trace_sys_exit(perf_syscall_exit);
553 if (ret) { 559 if (ret) {
554 pr_info("event trace: Could not activate" 560 pr_info("event trace: Could not activate"
555 "syscall entry trace point"); 561 "syscall exit trace point");
556 } else { 562 } else {
557 set_bit(num, enabled_prof_exit_syscalls); 563 set_bit(num, enabled_perf_exit_syscalls);
558 sys_prof_refcount_exit++; 564 sys_perf_refcount_exit++;
559 } 565 }
560 mutex_unlock(&syscall_trace_lock); 566 mutex_unlock(&syscall_trace_lock);
561 return ret; 567 return ret;
562} 568}
563 569
564void prof_sysexit_disable(struct ftrace_event_call *call) 570void perf_sysexit_disable(struct ftrace_event_call *call)
565{ 571{
566 int num; 572 int num;
567 573
568 num = ((struct syscall_metadata *)call->data)->syscall_nr; 574 num = ((struct syscall_metadata *)call->data)->syscall_nr;
569 575
570 mutex_lock(&syscall_trace_lock); 576 mutex_lock(&syscall_trace_lock);
571 sys_prof_refcount_exit--; 577 sys_perf_refcount_exit--;
572 clear_bit(num, enabled_prof_exit_syscalls); 578 clear_bit(num, enabled_perf_exit_syscalls);
573 if (!sys_prof_refcount_exit) 579 if (!sys_perf_refcount_exit)
574 unregister_trace_sys_exit(prof_syscall_exit); 580 unregister_trace_sys_exit(perf_syscall_exit);
575 mutex_unlock(&syscall_trace_lock); 581 mutex_unlock(&syscall_trace_lock);
576} 582}
577 583
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 40cafb07dffd..cc2d2faa7d9e 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/slab.h>
12#include <linux/kref.h> 13#include <linux/kref.h>
13#include "trace_stat.h" 14#include "trace_stat.h"
14#include "trace.h" 15#include "trace.h"