aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /kernel/trace
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig67
-rw-r--r--kernel/trace/Makefile18
-rw-r--r--kernel/trace/blktrace.c21
-rw-r--r--kernel/trace/ftrace.c1292
-rw-r--r--kernel/trace/ring_buffer.c934
-rw-r--r--kernel/trace/rpm-traces.c20
-rw-r--r--kernel/trace/trace.c1342
-rw-r--r--kernel/trace/trace.h93
-rw-r--r--kernel/trace/trace_branch.c4
-rw-r--r--kernel/trace/trace_clock.c12
-rw-r--r--kernel/trace/trace_entries.h70
-rw-r--r--kernel/trace/trace_event_perf.c209
-rw-r--r--kernel/trace/trace_events.c184
-rw-r--r--kernel/trace/trace_events_filter.c1172
-rw-r--r--kernel/trace/trace_events_filter_test.h50
-rw-r--r--kernel/trace/trace_export.c67
-rw-r--r--kernel/trace/trace_functions.c44
-rw-r--r--kernel/trace/trace_functions_graph.c13
-rw-r--r--kernel/trace/trace_irqsoff.c44
-rw-r--r--kernel/trace/trace_kprobe.c923
-rw-r--r--kernel/trace/trace_output.c111
-rw-r--r--kernel/trace/trace_printk.c23
-rw-r--r--kernel/trace/trace_probe.c839
-rw-r--r--kernel/trace/trace_probe.h161
-rw-r--r--kernel/trace/trace_sched_switch.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c30
-rw-r--r--kernel/trace/trace_selftest.c305
-rw-r--r--kernel/trace/trace_stack.c38
-rw-r--r--kernel/trace/trace_syscalls.c94
-rw-r--r--kernel/trace/trace_uprobe.c788
30 files changed, 2813 insertions, 6159 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5d89335a485..93168c0f991 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,11 +49,6 @@ config HAVE_SYSCALL_TRACEPOINTS
49 help 49 help
50 See Documentation/trace/ftrace-design.txt 50 See Documentation/trace/ftrace-design.txt
51 51
52config HAVE_FENTRY
53 bool
54 help
55 Arch supports the gcc options -pg with -mfentry
56
57config HAVE_C_RECORDMCOUNT 52config HAVE_C_RECORDMCOUNT
58 bool 53 bool
59 help 54 help
@@ -62,12 +57,8 @@ config HAVE_C_RECORDMCOUNT
62config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
63 bool 58 bool
64 59
65config TRACE_CLOCK
66 bool
67
68config RING_BUFFER 60config RING_BUFFER
69 bool 61 bool
70 select TRACE_CLOCK
71 62
72config FTRACE_NMI_ENTER 63config FTRACE_NMI_ENTER
73 bool 64 bool
@@ -118,8 +109,6 @@ config TRACING
118 select NOP_TRACER 109 select NOP_TRACER
119 select BINARY_PRINTF 110 select BINARY_PRINTF
120 select EVENT_TRACING 111 select EVENT_TRACING
121 select TRACE_CLOCK
122 select IRQ_WORK
123 112
124config GENERIC_TRACER 113config GENERIC_TRACER
125 bool 114 bool
@@ -152,6 +141,7 @@ if FTRACE
152config FUNCTION_TRACER 141config FUNCTION_TRACER
153 bool "Kernel Function Tracer" 142 bool "Kernel Function Tracer"
154 depends on HAVE_FUNCTION_TRACER 143 depends on HAVE_FUNCTION_TRACER
144 select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE
155 select KALLSYMS 145 select KALLSYMS
156 select GENERIC_TRACER 146 select GENERIC_TRACER
157 select CONTEXT_SWITCH_TRACER 147 select CONTEXT_SWITCH_TRACER
@@ -282,7 +272,7 @@ config PROFILE_ANNOTATED_BRANCHES
282 bool "Trace likely/unlikely profiler" 272 bool "Trace likely/unlikely profiler"
283 select TRACE_BRANCH_PROFILING 273 select TRACE_BRANCH_PROFILING
284 help 274 help
285 This tracer profiles all likely and unlikely macros 275 This tracer profiles all the the likely and unlikely macros
286 in the kernel. It will display the results in: 276 in the kernel. It will display the results in:
287 277
288 /sys/kernel/debug/tracing/trace_stat/branch_annotated 278 /sys/kernel/debug/tracing/trace_stat/branch_annotated
@@ -383,7 +373,6 @@ config KPROBE_EVENT
383 depends on HAVE_REGS_AND_STACK_ACCESS_API 373 depends on HAVE_REGS_AND_STACK_ACCESS_API
384 bool "Enable kprobes-based dynamic events" 374 bool "Enable kprobes-based dynamic events"
385 select TRACING 375 select TRACING
386 select PROBE_EVENTS
387 default y 376 default y
388 help 377 help
389 This allows the user to add tracing events (similar to tracepoints) 378 This allows the user to add tracing events (similar to tracepoints)
@@ -396,25 +385,6 @@ config KPROBE_EVENT
396 This option is also required by perf-probe subcommand of perf tools. 385 This option is also required by perf-probe subcommand of perf tools.
397 If you want to use perf tools, this option is strongly recommended. 386 If you want to use perf tools, this option is strongly recommended.
398 387
399config UPROBE_EVENT
400 bool "Enable uprobes-based dynamic events"
401 depends on ARCH_SUPPORTS_UPROBES
402 depends on MMU
403 select UPROBES
404 select PROBE_EVENTS
405 select TRACING
406 default n
407 help
408 This allows the user to add tracing events on top of userspace
409 dynamic events (similar to tracepoints) on the fly via the trace
410 events interface. Those events can be inserted wherever uprobes
411 can probe, and record various registers.
412 This option is required if you plan to use perf-probe subcommand
413 of perf tools on user space applications.
414
415config PROBE_EVENTS
416 def_bool n
417
418config DYNAMIC_FTRACE 388config DYNAMIC_FTRACE
419 bool "enable/disable ftrace tracepoints dynamically" 389 bool "enable/disable ftrace tracepoints dynamically"
420 depends on FUNCTION_TRACER 390 depends on FUNCTION_TRACER
@@ -517,6 +487,39 @@ config RING_BUFFER_BENCHMARK
517 487
518 If unsure, say N. 488 If unsure, say N.
519 489
490config TRACELEVEL
491 bool "Add capability to prioritize traces"
492 depends on EVENT_TRACING
493 help
494 This option allows subsystem programmers to add priorities to trace
495 events by calling to tracelevel_register. Traces of high priority
496 will automatically be enabled on kernel boot, and users can change
497 the the trace level in a kernel parameter.
498
499config TRACEDUMP
500 bool "Dumping functionality for ftrace"
501 depends on FUNCTION_TRACER
502 help
503 This option adds functionality to dump tracing data in several forms
504 Data can be dumped in ascii form or as raw pages from the tracing
505 ring buffers, along with the saved cmdlines. This is specified by
506 the module parameter tracedump_ascii. Data will be compressed
507 using zlib.
508
509config TRACEDUMP_PANIC
510 bool "Tracedump to console on panic"
511 depends on TRACEDUMP
512 help
513 With this option, tracedump will automatically dump to the console
514 on a kernel panic.
515
516config TRACEDUMP_PROCFS
517 bool "Tracedump via proc file"
518 depends on TRACEDUMP
519 help
520 With this option, tracedump can be dumped from user space by reading
521 from /proc/tracedump.
522
520endif # FTRACE 523endif # FTRACE
521 524
522endif # TRACING_SUPPORT 525endif # TRACING_SUPPORT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d7e2068e4b7..1360a1a90d5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -5,21 +5,21 @@ ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7 7
8ifdef CONFIG_FTRACE_SELFTEST
9# selftest needs instrumentation 8# selftest needs instrumentation
10CFLAGS_trace_selftest_dynamic.o = -pg 9CFLAGS_trace_selftest_dynamic.o = -pg
11obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
12endif 11endif
13endif
14 12
15# If unlikely tracing is enabled, do not trace these files 13# If unlikely tracing is enabled, do not trace these files
16ifdef CONFIG_TRACING_BRANCHES 14ifdef CONFIG_TRACING_BRANCHES
17KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING 15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
18endif 16endif
19 17
20CFLAGS_trace_events_filter.o := -I$(src) 18#
21 19# Make the trace clocks available generally: it's infrastructure
22obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o 20# relied on by ptrace for example:
21#
22obj-y += trace_clock.o
23 23
24obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 24obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
25obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 25obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
@@ -39,6 +39,7 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o
39obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 39obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
40obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 40obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
41obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 41obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
42obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
42obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 43obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
43ifeq ($(CONFIG_BLOCK),y) 44ifeq ($(CONFIG_BLOCK),y)
44obj-$(CONFIG_EVENT_TRACING) += blktrace.o 45obj-$(CONFIG_EVENT_TRACING) += blktrace.o
@@ -52,13 +53,10 @@ endif
52obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
53obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
54obj-$(CONFIG_TRACEPOINTS) += power-traces.o 55obj-$(CONFIG_TRACEPOINTS) += power-traces.o
55ifeq ($(CONFIG_PM_RUNTIME),y)
56obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
57endif
58ifeq ($(CONFIG_TRACING),y) 56ifeq ($(CONFIG_TRACING),y)
59obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
60endif 58endif
61obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o 59obj-$(CONFIG_TRACELEVEL) += tracelevel.o
62obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o 60obj-$(CONFIG_TRACEDUMP) += tracedump.o
63 61
64libftrace-y := ftrace.o 62libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c0bd0308741..7c910a5593a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,7 +23,6 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/export.h>
27#include <linux/time.h> 26#include <linux/time.h>
28#include <linux/uaccess.h> 27#include <linux/uaccess.h>
29 28
@@ -311,6 +310,13 @@ int blk_trace_remove(struct request_queue *q)
311} 310}
312EXPORT_SYMBOL_GPL(blk_trace_remove); 311EXPORT_SYMBOL_GPL(blk_trace_remove);
313 312
313static int blk_dropped_open(struct inode *inode, struct file *filp)
314{
315 filp->private_data = inode->i_private;
316
317 return 0;
318}
319
314static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, 320static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
315 size_t count, loff_t *ppos) 321 size_t count, loff_t *ppos)
316{ 322{
@@ -324,11 +330,18 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
324 330
325static const struct file_operations blk_dropped_fops = { 331static const struct file_operations blk_dropped_fops = {
326 .owner = THIS_MODULE, 332 .owner = THIS_MODULE,
327 .open = simple_open, 333 .open = blk_dropped_open,
328 .read = blk_dropped_read, 334 .read = blk_dropped_read,
329 .llseek = default_llseek, 335 .llseek = default_llseek,
330}; 336};
331 337
338static int blk_msg_open(struct inode *inode, struct file *filp)
339{
340 filp->private_data = inode->i_private;
341
342 return 0;
343}
344
332static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, 345static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
333 size_t count, loff_t *ppos) 346 size_t count, loff_t *ppos)
334{ 347{
@@ -357,7 +370,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
357 370
358static const struct file_operations blk_msg_fops = { 371static const struct file_operations blk_msg_fops = {
359 .owner = THIS_MODULE, 372 .owner = THIS_MODULE,
360 .open = simple_open, 373 .open = blk_msg_open,
361 .write = blk_msg_write, 374 .write = blk_msg_write,
362 .llseek = noop_llseek, 375 .llseek = noop_llseek,
363}; 376};
@@ -388,7 +401,7 @@ static int blk_remove_buf_file_callback(struct dentry *dentry)
388 401
389static struct dentry *blk_create_buf_file_callback(const char *filename, 402static struct dentry *blk_create_buf_file_callback(const char *filename,
390 struct dentry *parent, 403 struct dentry *parent,
391 umode_t mode, 404 int mode,
392 struct rchan_buf *buf, 405 struct rchan_buf *buf,
393 int *is_global) 406 int *is_global)
394{ 407{
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3ffe4c5ad3f..798b16cd40f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -10,7 +10,7 @@
10 * Based on code in the latency_tracer, that is: 10 * Based on code in the latency_tracer, that is:
11 * 11 *
12 * Copyright (C) 2004-2006 Ingo Molnar 12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers 13 * Copyright (C) 2004 William Lee Irwin III
14 */ 14 */
15 15
16#include <linux/stop_machine.h> 16#include <linux/stop_machine.h>
@@ -22,13 +22,10 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/bsearch.h>
26#include <linux/module.h>
27#include <linux/ftrace.h> 25#include <linux/ftrace.h>
28#include <linux/sysctl.h> 26#include <linux/sysctl.h>
29#include <linux/slab.h> 27#include <linux/slab.h>
30#include <linux/ctype.h> 28#include <linux/ctype.h>
31#include <linux/sort.h>
32#include <linux/list.h> 29#include <linux/list.h>
33#include <linux/hash.h> 30#include <linux/hash.h>
34#include <linux/rcupdate.h> 31#include <linux/rcupdate.h>
@@ -62,22 +59,12 @@
62#define FTRACE_HASH_DEFAULT_BITS 10 59#define FTRACE_HASH_DEFAULT_BITS 10
63#define FTRACE_HASH_MAX_BITS 12 60#define FTRACE_HASH_MAX_BITS 12
64 61
65#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
66
67static struct ftrace_ops ftrace_list_end __read_mostly = {
68 .func = ftrace_stub,
69 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
70};
71
72/* ftrace_enabled is a method to turn ftrace on or off */ 62/* ftrace_enabled is a method to turn ftrace on or off */
73int ftrace_enabled __read_mostly; 63int ftrace_enabled __read_mostly;
74static int last_ftrace_enabled; 64static int last_ftrace_enabled;
75 65
76/* Quick disabling of function tracer. */ 66/* Quick disabling of function tracer. */
77int function_trace_stop __read_mostly; 67int function_trace_stop;
78
79/* Current function tracing op */
80struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
81 68
82/* List for set_ftrace_pid's pids. */ 69/* List for set_ftrace_pid's pids. */
83LIST_HEAD(ftrace_pids); 70LIST_HEAD(ftrace_pids);
@@ -94,43 +81,20 @@ static int ftrace_disabled __read_mostly;
94 81
95static DEFINE_MUTEX(ftrace_lock); 82static DEFINE_MUTEX(ftrace_lock);
96 83
84static struct ftrace_ops ftrace_list_end __read_mostly = {
85 .func = ftrace_stub,
86};
87
97static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; 88static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
98static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
99static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; 89static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
100ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 90ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
91static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
92ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
101ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 93ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
102static struct ftrace_ops global_ops; 94static struct ftrace_ops global_ops;
103static struct ftrace_ops control_ops;
104
105#if ARCH_SUPPORTS_FTRACE_OPS
106static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
107 struct ftrace_ops *op, struct pt_regs *regs);
108#else
109/* See comment below, where ftrace_ops_list_func is defined */
110static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
112#endif
113
114/**
115 * ftrace_nr_registered_ops - return number of ops registered
116 *
117 * Returns the number of ftrace_ops registered and tracing functions
118 */
119int ftrace_nr_registered_ops(void)
120{
121 struct ftrace_ops *ops;
122 int cnt = 0;
123
124 mutex_lock(&ftrace_lock);
125 95
126 for (ops = ftrace_ops_list; 96static void
127 ops != &ftrace_list_end; ops = ops->next) 97ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
128 cnt++;
129
130 mutex_unlock(&ftrace_lock);
131
132 return cnt;
133}
134 98
135/* 99/*
136 * Traverse the ftrace_global_list, invoking all entries. The reason that we 100 * Traverse the ftrace_global_list, invoking all entries. The reason that we
@@ -141,29 +105,29 @@ int ftrace_nr_registered_ops(void)
141 * 105 *
142 * Silly Alpha and silly pointer-speculation compiler optimizations! 106 * Silly Alpha and silly pointer-speculation compiler optimizations!
143 */ 107 */
144static void 108static void ftrace_global_list_func(unsigned long ip,
145ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, 109 unsigned long parent_ip)
146 struct ftrace_ops *op, struct pt_regs *regs)
147{ 110{
111 struct ftrace_ops *op;
112
148 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) 113 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
149 return; 114 return;
150 115
151 trace_recursion_set(TRACE_GLOBAL_BIT); 116 trace_recursion_set(TRACE_GLOBAL_BIT);
152 op = rcu_dereference_raw(ftrace_global_list); /*see above*/ 117 op = rcu_dereference_raw(ftrace_global_list); /*see above*/
153 while (op != &ftrace_list_end) { 118 while (op != &ftrace_list_end) {
154 op->func(ip, parent_ip, op, regs); 119 op->func(ip, parent_ip);
155 op = rcu_dereference_raw(op->next); /*see above*/ 120 op = rcu_dereference_raw(op->next); /*see above*/
156 }; 121 };
157 trace_recursion_clear(TRACE_GLOBAL_BIT); 122 trace_recursion_clear(TRACE_GLOBAL_BIT);
158} 123}
159 124
160static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, 125static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
161 struct ftrace_ops *op, struct pt_regs *regs)
162{ 126{
163 if (!test_tsk_trace_trace(current)) 127 if (!test_tsk_trace_trace(current))
164 return; 128 return;
165 129
166 ftrace_pid_function(ip, parent_ip, op, regs); 130 ftrace_pid_function(ip, parent_ip);
167} 131}
168 132
169static void set_ftrace_pid_function(ftrace_func_t func) 133static void set_ftrace_pid_function(ftrace_func_t func)
@@ -182,34 +146,24 @@ static void set_ftrace_pid_function(ftrace_func_t func)
182void clear_ftrace_function(void) 146void clear_ftrace_function(void)
183{ 147{
184 ftrace_trace_function = ftrace_stub; 148 ftrace_trace_function = ftrace_stub;
149 __ftrace_trace_function = ftrace_stub;
150 __ftrace_trace_function_delay = ftrace_stub;
185 ftrace_pid_function = ftrace_stub; 151 ftrace_pid_function = ftrace_stub;
186} 152}
187 153
188static void control_ops_disable_all(struct ftrace_ops *ops) 154#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
189{ 155/*
190 int cpu; 156 * For those archs that do not test ftrace_trace_stop in their
191 157 * mcount call site, we need to do it from C.
192 for_each_possible_cpu(cpu) 158 */
193 *per_cpu_ptr(ops->disabled, cpu) = 1; 159static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
194}
195
196static int control_ops_alloc(struct ftrace_ops *ops)
197{ 160{
198 int __percpu *disabled; 161 if (function_trace_stop)
199 162 return;
200 disabled = alloc_percpu(int);
201 if (!disabled)
202 return -ENOMEM;
203 163
204 ops->disabled = disabled; 164 __ftrace_trace_function(ip, parent_ip);
205 control_ops_disable_all(ops);
206 return 0;
207}
208
209static void control_ops_free(struct ftrace_ops *ops)
210{
211 free_percpu(ops->disabled);
212} 165}
166#endif
213 167
214static void update_global_ops(void) 168static void update_global_ops(void)
215{ 169{
@@ -243,27 +197,27 @@ static void update_ftrace_function(void)
243 197
244 /* 198 /*
245 * If we are at the end of the list and this ops is 199 * If we are at the end of the list and this ops is
246 * recursion safe and not dynamic and the arch supports passing ops, 200 * not dynamic, then have the mcount trampoline call
247 * then have the mcount trampoline call the function directly. 201 * the function directly
248 */ 202 */
249 if (ftrace_ops_list == &ftrace_list_end || 203 if (ftrace_ops_list == &ftrace_list_end ||
250 (ftrace_ops_list->next == &ftrace_list_end && 204 (ftrace_ops_list->next == &ftrace_list_end &&
251 !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && 205 !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC)))
252 (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
253 !FTRACE_FORCE_LIST_FUNC)) {
254 /* Set the ftrace_ops that the arch callback uses */
255 if (ftrace_ops_list == &global_ops)
256 function_trace_op = ftrace_global_list;
257 else
258 function_trace_op = ftrace_ops_list;
259 func = ftrace_ops_list->func; 206 func = ftrace_ops_list->func;
260 } else { 207 else
261 /* Just use the default ftrace_ops */
262 function_trace_op = &ftrace_list_end;
263 func = ftrace_ops_list_func; 208 func = ftrace_ops_list_func;
264 }
265 209
210#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
266 ftrace_trace_function = func; 211 ftrace_trace_function = func;
212#else
213#ifdef CONFIG_DYNAMIC_FTRACE
214 /* do not update till all functions have been modified */
215 __ftrace_trace_function_delay = func;
216#else
217 __ftrace_trace_function = func;
218#endif
219 ftrace_trace_function = ftrace_test_stop_func;
220#endif
267} 221}
268 222
269static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) 223static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
@@ -302,29 +256,9 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
302 return 0; 256 return 0;
303} 257}
304 258
305static void add_ftrace_list_ops(struct ftrace_ops **list,
306 struct ftrace_ops *main_ops,
307 struct ftrace_ops *ops)
308{
309 int first = *list == &ftrace_list_end;
310 add_ftrace_ops(list, ops);
311 if (first)
312 add_ftrace_ops(&ftrace_ops_list, main_ops);
313}
314
315static int remove_ftrace_list_ops(struct ftrace_ops **list,
316 struct ftrace_ops *main_ops,
317 struct ftrace_ops *ops)
318{
319 int ret = remove_ftrace_ops(list, ops);
320 if (!ret && *list == &ftrace_list_end)
321 ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
322 return ret;
323}
324
325static int __register_ftrace_function(struct ftrace_ops *ops) 259static int __register_ftrace_function(struct ftrace_ops *ops)
326{ 260{
327 if (unlikely(ftrace_disabled)) 261 if (ftrace_disabled)
328 return -ENODEV; 262 return -ENODEV;
329 263
330 if (FTRACE_WARN_ON(ops == &global_ops)) 264 if (FTRACE_WARN_ON(ops == &global_ops))
@@ -333,34 +267,15 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
333 if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) 267 if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
334 return -EBUSY; 268 return -EBUSY;
335 269
336 /* We don't support both control and global flags set. */
337 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
338 return -EINVAL;
339
340#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
341 /*
342 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
343 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
344 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant.
345 */
346 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS &&
347 !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
348 return -EINVAL;
349
350 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
351 ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
352#endif
353
354 if (!core_kernel_data((unsigned long)ops)) 270 if (!core_kernel_data((unsigned long)ops))
355 ops->flags |= FTRACE_OPS_FL_DYNAMIC; 271 ops->flags |= FTRACE_OPS_FL_DYNAMIC;
356 272
357 if (ops->flags & FTRACE_OPS_FL_GLOBAL) { 273 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
358 add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops); 274 int first = ftrace_global_list == &ftrace_list_end;
275 add_ftrace_ops(&ftrace_global_list, ops);
359 ops->flags |= FTRACE_OPS_FL_ENABLED; 276 ops->flags |= FTRACE_OPS_FL_ENABLED;
360 } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { 277 if (first)
361 if (control_ops_alloc(ops)) 278 add_ftrace_ops(&ftrace_ops_list, &global_ops);
362 return -ENOMEM;
363 add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
364 } else 279 } else
365 add_ftrace_ops(&ftrace_ops_list, ops); 280 add_ftrace_ops(&ftrace_ops_list, ops);
366 281
@@ -384,23 +299,11 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
384 return -EINVAL; 299 return -EINVAL;
385 300
386 if (ops->flags & FTRACE_OPS_FL_GLOBAL) { 301 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
387 ret = remove_ftrace_list_ops(&ftrace_global_list, 302 ret = remove_ftrace_ops(&ftrace_global_list, ops);
388 &global_ops, ops); 303 if (!ret && ftrace_global_list == &ftrace_list_end)
304 ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
389 if (!ret) 305 if (!ret)
390 ops->flags &= ~FTRACE_OPS_FL_ENABLED; 306 ops->flags &= ~FTRACE_OPS_FL_ENABLED;
391 } else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
392 ret = remove_ftrace_list_ops(&ftrace_control_list,
393 &control_ops, ops);
394 if (!ret) {
395 /*
396 * The ftrace_ops is now removed from the list,
397 * so there'll be no new users. We must ensure
398 * all current users are done before we free
399 * the control data.
400 */
401 synchronize_sched();
402 control_ops_free(ops);
403 }
404 } else 307 } else
405 ret = remove_ftrace_ops(&ftrace_ops_list, ops); 308 ret = remove_ftrace_ops(&ftrace_ops_list, ops);
406 309
@@ -799,8 +702,7 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
799} 702}
800 703
801static void 704static void
802function_profile_call(unsigned long ip, unsigned long parent_ip, 705function_profile_call(unsigned long ip, unsigned long parent_ip)
803 struct ftrace_ops *ops, struct pt_regs *regs)
804{ 706{
805 struct ftrace_profile_stat *stat; 707 struct ftrace_profile_stat *stat;
806 struct ftrace_profile *rec; 708 struct ftrace_profile *rec;
@@ -830,7 +732,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
830#ifdef CONFIG_FUNCTION_GRAPH_TRACER 732#ifdef CONFIG_FUNCTION_GRAPH_TRACER
831static int profile_graph_entry(struct ftrace_graph_ent *trace) 733static int profile_graph_entry(struct ftrace_graph_ent *trace)
832{ 734{
833 function_profile_call(trace->func, 0, NULL, NULL); 735 function_profile_call(trace->func, 0);
834 return 1; 736 return 1;
835} 737}
836 738
@@ -890,7 +792,6 @@ static void unregister_ftrace_profiler(void)
890#else 792#else
891static struct ftrace_ops ftrace_profile_ops __read_mostly = { 793static struct ftrace_ops ftrace_profile_ops __read_mostly = {
892 .func = function_profile_call, 794 .func = function_profile_call,
893 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
894}; 795};
895 796
896static int register_ftrace_profiler(void) 797static int register_ftrace_profiler(void)
@@ -1045,6 +946,13 @@ struct ftrace_func_probe {
1045 struct rcu_head rcu; 946 struct rcu_head rcu;
1046}; 947};
1047 948
949enum {
950 FTRACE_UPDATE_CALLS = (1 << 0),
951 FTRACE_DISABLE_CALLS = (1 << 1),
952 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
953 FTRACE_START_FUNC_RET = (1 << 3),
954 FTRACE_STOP_FUNC_RET = (1 << 4),
955};
1048struct ftrace_func_entry { 956struct ftrace_func_entry {
1049 struct hlist_node hlist; 957 struct hlist_node hlist;
1050 unsigned long ip; 958 unsigned long ip;
@@ -1073,22 +981,20 @@ static struct ftrace_ops global_ops = {
1073 .func = ftrace_stub, 981 .func = ftrace_stub,
1074 .notrace_hash = EMPTY_HASH, 982 .notrace_hash = EMPTY_HASH,
1075 .filter_hash = EMPTY_HASH, 983 .filter_hash = EMPTY_HASH,
1076 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
1077}; 984};
1078 985
986static struct dyn_ftrace *ftrace_new_addrs;
987
1079static DEFINE_MUTEX(ftrace_regex_lock); 988static DEFINE_MUTEX(ftrace_regex_lock);
1080 989
1081struct ftrace_page { 990struct ftrace_page {
1082 struct ftrace_page *next; 991 struct ftrace_page *next;
1083 struct dyn_ftrace *records;
1084 int index; 992 int index;
1085 int size; 993 struct dyn_ftrace records[];
1086}; 994};
1087 995
1088static struct ftrace_page *ftrace_new_pgs; 996#define ENTRIES_PER_PAGE \
1089 997 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
1090#define ENTRY_SIZE sizeof(struct dyn_ftrace)
1091#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
1092 998
1093/* estimate from running different kernels */ 999/* estimate from running different kernels */
1094#define NR_TO_INIT 10000 1000#define NR_TO_INIT 10000
@@ -1096,10 +1002,7 @@ static struct ftrace_page *ftrace_new_pgs;
1096static struct ftrace_page *ftrace_pages_start; 1002static struct ftrace_page *ftrace_pages_start;
1097static struct ftrace_page *ftrace_pages; 1003static struct ftrace_page *ftrace_pages;
1098 1004
1099static bool ftrace_hash_empty(struct ftrace_hash *hash) 1005static struct dyn_ftrace *ftrace_free_records;
1100{
1101 return !hash || !hash->count;
1102}
1103 1006
1104static struct ftrace_func_entry * 1007static struct ftrace_func_entry *
1105ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) 1008ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
@@ -1109,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
1109 struct hlist_head *hhd; 1012 struct hlist_head *hhd;
1110 struct hlist_node *n; 1013 struct hlist_node *n;
1111 1014
1112 if (ftrace_hash_empty(hash)) 1015 if (!hash->count)
1113 return NULL; 1016 return NULL;
1114 1017
1115 if (hash->size_bits > 0) 1018 if (hash->size_bits > 0)
@@ -1216,12 +1119,6 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
1216 call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); 1119 call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
1217} 1120}
1218 1121
1219void ftrace_free_filter(struct ftrace_ops *ops)
1220{
1221 free_ftrace_hash(ops->filter_hash);
1222 free_ftrace_hash(ops->notrace_hash);
1223}
1224
1225static struct ftrace_hash *alloc_ftrace_hash(int size_bits) 1122static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
1226{ 1123{
1227 struct ftrace_hash *hash; 1124 struct ftrace_hash *hash;
@@ -1232,7 +1129,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
1232 return NULL; 1129 return NULL;
1233 1130
1234 size = 1 << size_bits; 1131 size = 1 << size_bits;
1235 hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL); 1132 hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL);
1236 1133
1237 if (!hash->buckets) { 1134 if (!hash->buckets) {
1238 kfree(hash); 1135 kfree(hash);
@@ -1259,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
1259 return NULL; 1156 return NULL;
1260 1157
1261 /* Empty hash? */ 1158 /* Empty hash? */
1262 if (ftrace_hash_empty(hash)) 1159 if (!hash || !hash->count)
1263 return new_hash; 1160 return new_hash;
1264 1161
1265 size = 1 << hash->size_bits; 1162 size = 1 << hash->size_bits;
@@ -1313,9 +1210,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
1313 if (!src->count) { 1210 if (!src->count) {
1314 free_ftrace_hash_rcu(*dst); 1211 free_ftrace_hash_rcu(*dst);
1315 rcu_assign_pointer(*dst, EMPTY_HASH); 1212 rcu_assign_pointer(*dst, EMPTY_HASH);
1316 /* still need to update the function records */ 1213 return 0;
1317 ret = 0;
1318 goto out;
1319 } 1214 }
1320 1215
1321 /* 1216 /*
@@ -1384,9 +1279,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
1384 filter_hash = rcu_dereference_raw(ops->filter_hash); 1279 filter_hash = rcu_dereference_raw(ops->filter_hash);
1385 notrace_hash = rcu_dereference_raw(ops->notrace_hash); 1280 notrace_hash = rcu_dereference_raw(ops->notrace_hash);
1386 1281
1387 if ((ftrace_hash_empty(filter_hash) || 1282 if ((!filter_hash || !filter_hash->count ||
1388 ftrace_lookup_ip(filter_hash, ip)) && 1283 ftrace_lookup_ip(filter_hash, ip)) &&
1389 (ftrace_hash_empty(notrace_hash) || 1284 (!notrace_hash || !notrace_hash->count ||
1390 !ftrace_lookup_ip(notrace_hash, ip))) 1285 !ftrace_lookup_ip(notrace_hash, ip)))
1391 ret = 1; 1286 ret = 1;
1392 else 1287 else
@@ -1409,76 +1304,6 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
1409 } \ 1304 } \
1410 } 1305 }
1411 1306
1412
1413static int ftrace_cmp_recs(const void *a, const void *b)
1414{
1415 const struct dyn_ftrace *key = a;
1416 const struct dyn_ftrace *rec = b;
1417
1418 if (key->flags < rec->ip)
1419 return -1;
1420 if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
1421 return 1;
1422 return 0;
1423}
1424
1425static unsigned long ftrace_location_range(unsigned long start, unsigned long end)
1426{
1427 struct ftrace_page *pg;
1428 struct dyn_ftrace *rec;
1429 struct dyn_ftrace key;
1430
1431 key.ip = start;
1432 key.flags = end; /* overload flags, as it is unsigned long */
1433
1434 for (pg = ftrace_pages_start; pg; pg = pg->next) {
1435 if (end < pg->records[0].ip ||
1436 start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
1437 continue;
1438 rec = bsearch(&key, pg->records, pg->index,
1439 sizeof(struct dyn_ftrace),
1440 ftrace_cmp_recs);
1441 if (rec)
1442 return rec->ip;
1443 }
1444
1445 return 0;
1446}
1447
1448/**
1449 * ftrace_location - return true if the ip giving is a traced location
1450 * @ip: the instruction pointer to check
1451 *
1452 * Returns rec->ip if @ip given is a pointer to a ftrace location.
1453 * That is, the instruction that is either a NOP or call to
1454 * the function tracer. It checks the ftrace internal tables to
1455 * determine if the address belongs or not.
1456 */
1457unsigned long ftrace_location(unsigned long ip)
1458{
1459 return ftrace_location_range(ip, ip);
1460}
1461
1462/**
1463 * ftrace_text_reserved - return true if range contains an ftrace location
1464 * @start: start of range to search
1465 * @end: end of range to search (inclusive). @end points to the last byte to check.
1466 *
1467 * Returns 1 if @start and @end contains a ftrace location.
1468 * That is, the instruction that is either a NOP or call to
1469 * the function tracer. It checks the ftrace internal tables to
1470 * determine if the address belongs or not.
1471 */
1472int ftrace_text_reserved(void *start, void *end)
1473{
1474 unsigned long ret;
1475
1476 ret = ftrace_location_range((unsigned long)start,
1477 (unsigned long)end);
1478
1479 return (int)!!ret;
1480}
1481
1482static void __ftrace_hash_rec_update(struct ftrace_ops *ops, 1307static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1483 int filter_hash, 1308 int filter_hash,
1484 bool inc) 1309 bool inc)
@@ -1508,7 +1333,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1508 if (filter_hash) { 1333 if (filter_hash) {
1509 hash = ops->filter_hash; 1334 hash = ops->filter_hash;
1510 other_hash = ops->notrace_hash; 1335 other_hash = ops->notrace_hash;
1511 if (ftrace_hash_empty(hash)) 1336 if (!hash || !hash->count)
1512 all = 1; 1337 all = 1;
1513 } else { 1338 } else {
1514 inc = !inc; 1339 inc = !inc;
@@ -1518,7 +1343,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1518 * If the notrace hash has no items, 1343 * If the notrace hash has no items,
1519 * then there's nothing to do. 1344 * then there's nothing to do.
1520 */ 1345 */
1521 if (ftrace_hash_empty(hash)) 1346 if (hash && !hash->count)
1522 return; 1347 return;
1523 } 1348 }
1524 1349
@@ -1535,8 +1360,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1535 if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) 1360 if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
1536 match = 1; 1361 match = 1;
1537 } else { 1362 } else {
1538 in_hash = !!ftrace_lookup_ip(hash, rec->ip); 1363 in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
1539 in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); 1364 in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
1540 1365
1541 /* 1366 /*
1542 * 1367 *
@@ -1544,7 +1369,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1544 if (filter_hash && in_hash && !in_other_hash) 1369 if (filter_hash && in_hash && !in_other_hash)
1545 match = 1; 1370 match = 1;
1546 else if (!filter_hash && in_hash && 1371 else if (!filter_hash && in_hash &&
1547 (in_other_hash || ftrace_hash_empty(other_hash))) 1372 (in_other_hash || !other_hash->count))
1548 match = 1; 1373 match = 1;
1549 } 1374 }
1550 if (!match) 1375 if (!match)
@@ -1554,12 +1379,6 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1554 rec->flags++; 1379 rec->flags++;
1555 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) 1380 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
1556 return; 1381 return;
1557 /*
1558 * If any ops wants regs saved for this function
1559 * then all ops will get saved regs.
1560 */
1561 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
1562 rec->flags |= FTRACE_FL_REGS;
1563 } else { 1382 } else {
1564 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) 1383 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
1565 return; 1384 return;
@@ -1584,6 +1403,65 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
1584 __ftrace_hash_rec_update(ops, filter_hash, 1); 1403 __ftrace_hash_rec_update(ops, filter_hash, 1);
1585} 1404}
1586 1405
1406static void ftrace_free_rec(struct dyn_ftrace *rec)
1407{
1408 rec->freelist = ftrace_free_records;
1409 ftrace_free_records = rec;
1410 rec->flags |= FTRACE_FL_FREE;
1411}
1412
1413static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
1414{
1415 struct dyn_ftrace *rec;
1416
1417 /* First check for freed records */
1418 if (ftrace_free_records) {
1419 rec = ftrace_free_records;
1420
1421 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
1422 FTRACE_WARN_ON_ONCE(1);
1423 ftrace_free_records = NULL;
1424 return NULL;
1425 }
1426
1427 ftrace_free_records = rec->freelist;
1428 memset(rec, 0, sizeof(*rec));
1429 return rec;
1430 }
1431
1432 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
1433 if (!ftrace_pages->next) {
1434 /* allocate another page */
1435 ftrace_pages->next =
1436 (void *)get_zeroed_page(GFP_KERNEL);
1437 if (!ftrace_pages->next)
1438 return NULL;
1439 }
1440 ftrace_pages = ftrace_pages->next;
1441 }
1442
1443 return &ftrace_pages->records[ftrace_pages->index++];
1444}
1445
1446static struct dyn_ftrace *
1447ftrace_record_ip(unsigned long ip)
1448{
1449 struct dyn_ftrace *rec;
1450
1451 if (ftrace_disabled)
1452 return NULL;
1453
1454 rec = ftrace_alloc_dyn_node(ip);
1455 if (!rec)
1456 return NULL;
1457
1458 rec->ip = ip;
1459 rec->newlist = ftrace_new_addrs;
1460 ftrace_new_addrs = rec;
1461
1462 return rec;
1463}
1464
1587static void print_ip_ins(const char *fmt, unsigned char *p) 1465static void print_ip_ins(const char *fmt, unsigned char *p)
1588{ 1466{
1589 int i; 1467 int i;
@@ -1594,19 +1472,7 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
1594 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); 1472 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
1595} 1473}
1596 1474
1597/** 1475static void ftrace_bug(int failed, unsigned long ip)
1598 * ftrace_bug - report and shutdown function tracer
1599 * @failed: The failed type (EFAULT, EINVAL, EPERM)
1600 * @ip: The address that failed
1601 *
1602 * The arch code that enables or disables the function tracing
1603 * can call ftrace_bug() when it has detected a problem in
1604 * modifying the code. @failed should be one of either:
1605 * EFAULT - if the problem happens on reading the @ip address
1606 * EINVAL - if what is read at @ip is not what was expected
1607 * EPERM - if the problem happens on writting to the @ip address
1608 */
1609void ftrace_bug(int failed, unsigned long ip)
1610{ 1476{
1611 switch (failed) { 1477 switch (failed) {
1612 case -EFAULT: 1478 case -EFAULT:
@@ -1633,10 +1499,30 @@ void ftrace_bug(int failed, unsigned long ip)
1633 } 1499 }
1634} 1500}
1635 1501
1636static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) 1502
1503/* Return 1 if the address range is reserved for ftrace */
1504int ftrace_text_reserved(void *start, void *end)
1505{
1506 struct dyn_ftrace *rec;
1507 struct ftrace_page *pg;
1508
1509 do_for_each_ftrace_rec(pg, rec) {
1510 if (rec->ip <= (unsigned long)end &&
1511 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1512 return 1;
1513 } while_for_each_ftrace_rec();
1514 return 0;
1515}
1516
1517
1518static int
1519__ftrace_replace_code(struct dyn_ftrace *rec, int update)
1637{ 1520{
1521 unsigned long ftrace_addr;
1638 unsigned long flag = 0UL; 1522 unsigned long flag = 0UL;
1639 1523
1524 ftrace_addr = (unsigned long)FTRACE_ADDR;
1525
1640 /* 1526 /*
1641 * If we are updating calls: 1527 * If we are updating calls:
1642 * 1528 *
@@ -1648,131 +1534,23 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
1648 * If we are disabling calls, then disable all records that 1534 * If we are disabling calls, then disable all records that
1649 * are enabled. 1535 * are enabled.
1650 */ 1536 */
1651 if (enable && (rec->flags & ~FTRACE_FL_MASK)) 1537 if (update && (rec->flags & ~FTRACE_FL_MASK))
1652 flag = FTRACE_FL_ENABLED; 1538 flag = FTRACE_FL_ENABLED;
1653 1539
1654 /*
1655 * If enabling and the REGS flag does not match the REGS_EN, then
1656 * do not ignore this record. Set flags to fail the compare against
1657 * ENABLED.
1658 */
1659 if (flag &&
1660 (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN)))
1661 flag |= FTRACE_FL_REGS;
1662
1663 /* If the state of this record hasn't changed, then do nothing */ 1540 /* If the state of this record hasn't changed, then do nothing */
1664 if ((rec->flags & FTRACE_FL_ENABLED) == flag) 1541 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
1665 return FTRACE_UPDATE_IGNORE;
1666
1667 if (flag) {
1668 /* Save off if rec is being enabled (for return value) */
1669 flag ^= rec->flags & FTRACE_FL_ENABLED;
1670
1671 if (update) {
1672 rec->flags |= FTRACE_FL_ENABLED;
1673 if (flag & FTRACE_FL_REGS) {
1674 if (rec->flags & FTRACE_FL_REGS)
1675 rec->flags |= FTRACE_FL_REGS_EN;
1676 else
1677 rec->flags &= ~FTRACE_FL_REGS_EN;
1678 }
1679 }
1680
1681 /*
1682 * If this record is being updated from a nop, then
1683 * return UPDATE_MAKE_CALL.
1684 * Otherwise, if the EN flag is set, then return
1685 * UPDATE_MODIFY_CALL_REGS to tell the caller to convert
1686 * from the non-save regs, to a save regs function.
1687 * Otherwise,
1688 * return UPDATE_MODIFY_CALL to tell the caller to convert
1689 * from the save regs, to a non-save regs function.
1690 */
1691 if (flag & FTRACE_FL_ENABLED)
1692 return FTRACE_UPDATE_MAKE_CALL;
1693 else if (rec->flags & FTRACE_FL_REGS_EN)
1694 return FTRACE_UPDATE_MODIFY_CALL_REGS;
1695 else
1696 return FTRACE_UPDATE_MODIFY_CALL;
1697 }
1698
1699 if (update) {
1700 /* If there's no more users, clear all flags */
1701 if (!(rec->flags & ~FTRACE_FL_MASK))
1702 rec->flags = 0;
1703 else
1704 /* Just disable the record (keep REGS state) */
1705 rec->flags &= ~FTRACE_FL_ENABLED;
1706 }
1707
1708 return FTRACE_UPDATE_MAKE_NOP;
1709}
1710
1711/**
1712 * ftrace_update_record, set a record that now is tracing or not
1713 * @rec: the record to update
1714 * @enable: set to 1 if the record is tracing, zero to force disable
1715 *
1716 * The records that represent all functions that can be traced need
1717 * to be updated when tracing has been enabled.
1718 */
1719int ftrace_update_record(struct dyn_ftrace *rec, int enable)
1720{
1721 return ftrace_check_record(rec, enable, 1);
1722}
1723
1724/**
1725 * ftrace_test_record, check if the record has been enabled or not
1726 * @rec: the record to test
1727 * @enable: set to 1 to check if enabled, 0 if it is disabled
1728 *
1729 * The arch code may need to test if a record is already set to
1730 * tracing to determine how to modify the function code that it
1731 * represents.
1732 */
1733int ftrace_test_record(struct dyn_ftrace *rec, int enable)
1734{
1735 return ftrace_check_record(rec, enable, 0);
1736}
1737
1738static int
1739__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1740{
1741 unsigned long ftrace_old_addr;
1742 unsigned long ftrace_addr;
1743 int ret;
1744
1745 ret = ftrace_update_record(rec, enable);
1746
1747 if (rec->flags & FTRACE_FL_REGS)
1748 ftrace_addr = (unsigned long)FTRACE_REGS_ADDR;
1749 else
1750 ftrace_addr = (unsigned long)FTRACE_ADDR;
1751
1752 switch (ret) {
1753 case FTRACE_UPDATE_IGNORE:
1754 return 0; 1542 return 0;
1755 1543
1756 case FTRACE_UPDATE_MAKE_CALL: 1544 if (flag) {
1545 rec->flags |= FTRACE_FL_ENABLED;
1757 return ftrace_make_call(rec, ftrace_addr); 1546 return ftrace_make_call(rec, ftrace_addr);
1758
1759 case FTRACE_UPDATE_MAKE_NOP:
1760 return ftrace_make_nop(NULL, rec, ftrace_addr);
1761
1762 case FTRACE_UPDATE_MODIFY_CALL_REGS:
1763 case FTRACE_UPDATE_MODIFY_CALL:
1764 if (rec->flags & FTRACE_FL_REGS)
1765 ftrace_old_addr = (unsigned long)FTRACE_ADDR;
1766 else
1767 ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR;
1768
1769 return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
1770 } 1547 }
1771 1548
1772 return -1; /* unknow ftrace bug */ 1549 rec->flags &= ~FTRACE_FL_ENABLED;
1550 return ftrace_make_nop(NULL, rec, ftrace_addr);
1773} 1551}
1774 1552
1775void __weak ftrace_replace_code(int enable) 1553static void ftrace_replace_code(int update)
1776{ 1554{
1777 struct dyn_ftrace *rec; 1555 struct dyn_ftrace *rec;
1778 struct ftrace_page *pg; 1556 struct ftrace_page *pg;
@@ -1782,7 +1560,11 @@ void __weak ftrace_replace_code(int enable)
1782 return; 1560 return;
1783 1561
1784 do_for_each_ftrace_rec(pg, rec) { 1562 do_for_each_ftrace_rec(pg, rec) {
1785 failed = __ftrace_replace_code(rec, enable); 1563 /* Skip over free records */
1564 if (rec->flags & FTRACE_FL_FREE)
1565 continue;
1566
1567 failed = __ftrace_replace_code(rec, update);
1786 if (failed) { 1568 if (failed) {
1787 ftrace_bug(failed, rec->ip); 1569 ftrace_bug(failed, rec->ip);
1788 /* Stop processing */ 1570 /* Stop processing */
@@ -1791,78 +1573,6 @@ void __weak ftrace_replace_code(int enable)
1791 } while_for_each_ftrace_rec(); 1573 } while_for_each_ftrace_rec();
1792} 1574}
1793 1575
1794struct ftrace_rec_iter {
1795 struct ftrace_page *pg;
1796 int index;
1797};
1798
1799/**
1800 * ftrace_rec_iter_start, start up iterating over traced functions
1801 *
1802 * Returns an iterator handle that is used to iterate over all
1803 * the records that represent address locations where functions
1804 * are traced.
1805 *
1806 * May return NULL if no records are available.
1807 */
1808struct ftrace_rec_iter *ftrace_rec_iter_start(void)
1809{
1810 /*
1811 * We only use a single iterator.
1812 * Protected by the ftrace_lock mutex.
1813 */
1814 static struct ftrace_rec_iter ftrace_rec_iter;
1815 struct ftrace_rec_iter *iter = &ftrace_rec_iter;
1816
1817 iter->pg = ftrace_pages_start;
1818 iter->index = 0;
1819
1820 /* Could have empty pages */
1821 while (iter->pg && !iter->pg->index)
1822 iter->pg = iter->pg->next;
1823
1824 if (!iter->pg)
1825 return NULL;
1826
1827 return iter;
1828}
1829
1830/**
1831 * ftrace_rec_iter_next, get the next record to process.
1832 * @iter: The handle to the iterator.
1833 *
1834 * Returns the next iterator after the given iterator @iter.
1835 */
1836struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
1837{
1838 iter->index++;
1839
1840 if (iter->index >= iter->pg->index) {
1841 iter->pg = iter->pg->next;
1842 iter->index = 0;
1843
1844 /* Could have empty pages */
1845 while (iter->pg && !iter->pg->index)
1846 iter->pg = iter->pg->next;
1847 }
1848
1849 if (!iter->pg)
1850 return NULL;
1851
1852 return iter;
1853}
1854
1855/**
1856 * ftrace_rec_iter_record, get the record at the iterator location
1857 * @iter: The current iterator location
1858 *
1859 * Returns the record that the current @iter is at.
1860 */
1861struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
1862{
1863 return &iter->pg->records[iter->index];
1864}
1865
1866static int 1576static int
1867ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) 1577ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
1868{ 1578{
@@ -1900,55 +1610,44 @@ int __weak ftrace_arch_code_modify_post_process(void)
1900 return 0; 1610 return 0;
1901} 1611}
1902 1612
1903void ftrace_modify_all_code(int command) 1613static int __ftrace_modify_code(void *data)
1904{ 1614{
1905 if (command & FTRACE_UPDATE_CALLS) 1615 int *command = data;
1616
1617 /*
1618 * Do not call function tracer while we update the code.
1619 * We are in stop machine, no worrying about races.
1620 */
1621 function_trace_stop++;
1622
1623 if (*command & FTRACE_UPDATE_CALLS)
1906 ftrace_replace_code(1); 1624 ftrace_replace_code(1);
1907 else if (command & FTRACE_DISABLE_CALLS) 1625 else if (*command & FTRACE_DISABLE_CALLS)
1908 ftrace_replace_code(0); 1626 ftrace_replace_code(0);
1909 1627
1910 if (command & FTRACE_UPDATE_TRACE_FUNC) 1628 if (*command & FTRACE_UPDATE_TRACE_FUNC)
1911 ftrace_update_ftrace_func(ftrace_trace_function); 1629 ftrace_update_ftrace_func(ftrace_trace_function);
1912 1630
1913 if (command & FTRACE_START_FUNC_RET) 1631 if (*command & FTRACE_START_FUNC_RET)
1914 ftrace_enable_ftrace_graph_caller(); 1632 ftrace_enable_ftrace_graph_caller();
1915 else if (command & FTRACE_STOP_FUNC_RET) 1633 else if (*command & FTRACE_STOP_FUNC_RET)
1916 ftrace_disable_ftrace_graph_caller(); 1634 ftrace_disable_ftrace_graph_caller();
1917}
1918
1919static int __ftrace_modify_code(void *data)
1920{
1921 int *command = data;
1922 1635
1923 ftrace_modify_all_code(*command); 1636#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
1637 /*
1638 * For archs that call ftrace_test_stop_func(), we must
1639 * wait till after we update all the function callers
1640 * before we update the callback. This keeps different
1641 * ops that record different functions from corrupting
1642 * each other.
1643 */
1644 __ftrace_trace_function = __ftrace_trace_function_delay;
1645#endif
1646 function_trace_stop--;
1924 1647
1925 return 0; 1648 return 0;
1926} 1649}
1927 1650
1928/**
1929 * ftrace_run_stop_machine, go back to the stop machine method
1930 * @command: The command to tell ftrace what to do
1931 *
1932 * If an arch needs to fall back to the stop machine method, the
1933 * it can call this function.
1934 */
1935void ftrace_run_stop_machine(int command)
1936{
1937 stop_machine(__ftrace_modify_code, &command, NULL);
1938}
1939
1940/**
1941 * arch_ftrace_update_code, modify the code to trace or not trace
1942 * @command: The command that needs to be done
1943 *
1944 * Archs can override this function if it does not need to
1945 * run stop_machine() to modify code.
1946 */
1947void __weak arch_ftrace_update_code(int command)
1948{
1949 ftrace_run_stop_machine(command);
1950}
1951
1952static void ftrace_run_update_code(int command) 1651static void ftrace_run_update_code(int command)
1953{ 1652{
1954 int ret; 1653 int ret;
@@ -1957,21 +1656,8 @@ static void ftrace_run_update_code(int command)
1957 FTRACE_WARN_ON(ret); 1656 FTRACE_WARN_ON(ret);
1958 if (ret) 1657 if (ret)
1959 return; 1658 return;
1960 /*
1961 * Do not call function tracer while we update the code.
1962 * We are in stop machine.
1963 */
1964 function_trace_stop++;
1965 1659
1966 /* 1660 stop_machine(__ftrace_modify_code, &command, NULL);
1967 * By default we use stop_machine() to modify the code.
1968 * But archs can do what ever they want as long as it
1969 * is safe. The stop_machine() is the safest, but also
1970 * produces the most overhead.
1971 */
1972 arch_ftrace_update_code(command);
1973
1974 function_trace_stop--;
1975 1661
1976 ret = ftrace_arch_code_modify_post_process(); 1662 ret = ftrace_arch_code_modify_post_process();
1977 FTRACE_WARN_ON(ret); 1663 FTRACE_WARN_ON(ret);
@@ -2098,16 +1784,14 @@ static int ops_traces_mod(struct ftrace_ops *ops)
2098 struct ftrace_hash *hash; 1784 struct ftrace_hash *hash;
2099 1785
2100 hash = ops->filter_hash; 1786 hash = ops->filter_hash;
2101 return ftrace_hash_empty(hash); 1787 return !!(!hash || !hash->count);
2102} 1788}
2103 1789
2104static int ftrace_update_code(struct module *mod) 1790static int ftrace_update_code(struct module *mod)
2105{ 1791{
2106 struct ftrace_page *pg;
2107 struct dyn_ftrace *p; 1792 struct dyn_ftrace *p;
2108 cycle_t start, stop; 1793 cycle_t start, stop;
2109 unsigned long ref = 0; 1794 unsigned long ref = 0;
2110 int i;
2111 1795
2112 /* 1796 /*
2113 * When adding a module, we need to check if tracers are 1797 * When adding a module, we need to check if tracers are
@@ -2129,44 +1813,46 @@ static int ftrace_update_code(struct module *mod)
2129 start = ftrace_now(raw_smp_processor_id()); 1813 start = ftrace_now(raw_smp_processor_id());
2130 ftrace_update_cnt = 0; 1814 ftrace_update_cnt = 0;
2131 1815
2132 for (pg = ftrace_new_pgs; pg; pg = pg->next) { 1816 while (ftrace_new_addrs) {
2133 1817
2134 for (i = 0; i < pg->index; i++) { 1818 /* If something went wrong, bail without enabling anything */
2135 /* If something went wrong, bail without enabling anything */ 1819 if (unlikely(ftrace_disabled))
2136 if (unlikely(ftrace_disabled)) 1820 return -1;
2137 return -1;
2138 1821
2139 p = &pg->records[i]; 1822 p = ftrace_new_addrs;
2140 p->flags = ref; 1823 ftrace_new_addrs = p->newlist;
1824 p->flags = ref;
2141 1825
2142 /* 1826 /*
2143 * Do the initial record conversion from mcount jump 1827 * Do the initial record conversion from mcount jump
2144 * to the NOP instructions. 1828 * to the NOP instructions.
2145 */ 1829 */
2146 if (!ftrace_code_disable(mod, p)) 1830 if (!ftrace_code_disable(mod, p)) {
2147 break; 1831 ftrace_free_rec(p);
1832 /* Game over */
1833 break;
1834 }
2148 1835
2149 ftrace_update_cnt++; 1836 ftrace_update_cnt++;
2150 1837
2151 /* 1838 /*
2152 * If the tracing is enabled, go ahead and enable the record. 1839 * If the tracing is enabled, go ahead and enable the record.
2153 * 1840 *
2154 * The reason not to enable the record immediatelly is the 1841 * The reason not to enable the record immediatelly is the
2155 * inherent check of ftrace_make_nop/ftrace_make_call for 1842 * inherent check of ftrace_make_nop/ftrace_make_call for
2156 * correct previous instructions. Making first the NOP 1843 * correct previous instructions. Making first the NOP
2157 * conversion puts the module to the correct state, thus 1844 * conversion puts the module to the correct state, thus
2158 * passing the ftrace_make_call check. 1845 * passing the ftrace_make_call check.
2159 */ 1846 */
2160 if (ftrace_start_up && ref) { 1847 if (ftrace_start_up && ref) {
2161 int failed = __ftrace_replace_code(p, 1); 1848 int failed = __ftrace_replace_code(p, 1);
2162 if (failed) 1849 if (failed) {
2163 ftrace_bug(failed, p->ip); 1850 ftrace_bug(failed, p->ip);
1851 ftrace_free_rec(p);
2164 } 1852 }
2165 } 1853 }
2166 } 1854 }
2167 1855
2168 ftrace_new_pgs = NULL;
2169
2170 stop = ftrace_now(raw_smp_processor_id()); 1856 stop = ftrace_now(raw_smp_processor_id());
2171 ftrace_update_time = stop - start; 1857 ftrace_update_time = stop - start;
2172 ftrace_update_tot_cnt += ftrace_update_cnt; 1858 ftrace_update_tot_cnt += ftrace_update_cnt;
@@ -2174,109 +1860,58 @@ static int ftrace_update_code(struct module *mod)
2174 return 0; 1860 return 0;
2175} 1861}
2176 1862
2177static int ftrace_allocate_records(struct ftrace_page *pg, int count) 1863static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
2178{ 1864{
2179 int order; 1865 struct ftrace_page *pg;
2180 int cnt; 1866 int cnt;
1867 int i;
2181 1868
2182 if (WARN_ON(!count)) 1869 /* allocate a few pages */
2183 return -EINVAL; 1870 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
2184 1871 if (!ftrace_pages_start)
2185 order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); 1872 return -1;
2186 1873
2187 /* 1874 /*
2188 * We want to fill as much as possible. No more than a page 1875 * Allocate a few more pages.
2189 * may be empty. 1876 *
1877 * TODO: have some parser search vmlinux before
1878 * final linking to find all calls to ftrace.
1879 * Then we can:
1880 * a) know how many pages to allocate.
1881 * and/or
1882 * b) set up the table then.
1883 *
1884 * The dynamic code is still necessary for
1885 * modules.
2190 */ 1886 */
2191 while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
2192 order--;
2193
2194 again:
2195 pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
2196 1887
2197 if (!pg->records) { 1888 pg = ftrace_pages = ftrace_pages_start;
2198 /* if we can't allocate this size, try something smaller */
2199 if (!order)
2200 return -ENOMEM;
2201 order >>= 1;
2202 goto again;
2203 }
2204
2205 cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
2206 pg->size = cnt;
2207 1889
2208 if (cnt > count) 1890 cnt = num_to_init / ENTRIES_PER_PAGE;
2209 cnt = count; 1891 pr_info("ftrace: allocating %ld entries in %d pages\n",
2210 1892 num_to_init, cnt + 1);
2211 return cnt;
2212}
2213
2214static struct ftrace_page *
2215ftrace_allocate_pages(unsigned long num_to_init)
2216{
2217 struct ftrace_page *start_pg;
2218 struct ftrace_page *pg;
2219 int order;
2220 int cnt;
2221
2222 if (!num_to_init)
2223 return 0;
2224
2225 start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
2226 if (!pg)
2227 return NULL;
2228
2229 /*
2230 * Try to allocate as much as possible in one continues
2231 * location that fills in all of the space. We want to
2232 * waste as little space as possible.
2233 */
2234 for (;;) {
2235 cnt = ftrace_allocate_records(pg, num_to_init);
2236 if (cnt < 0)
2237 goto free_pages;
2238 1893
2239 num_to_init -= cnt; 1894 for (i = 0; i < cnt; i++) {
2240 if (!num_to_init) 1895 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
2241 break;
2242 1896
2243 pg->next = kzalloc(sizeof(*pg), GFP_KERNEL); 1897 /* If we fail, we'll try later anyway */
2244 if (!pg->next) 1898 if (!pg->next)
2245 goto free_pages; 1899 break;
2246 1900
2247 pg = pg->next; 1901 pg = pg->next;
2248 } 1902 }
2249 1903
2250 return start_pg;
2251
2252 free_pages:
2253 while (start_pg) {
2254 order = get_count_order(pg->size / ENTRIES_PER_PAGE);
2255 free_pages((unsigned long)pg->records, order);
2256 start_pg = pg->next;
2257 kfree(pg);
2258 pg = start_pg;
2259 }
2260 pr_info("ftrace: FAILED to allocate memory for functions\n");
2261 return NULL;
2262}
2263
2264static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
2265{
2266 int cnt;
2267
2268 if (!num_to_init) {
2269 pr_info("ftrace: No functions to be traced?\n");
2270 return -1;
2271 }
2272
2273 cnt = num_to_init / ENTRIES_PER_PAGE;
2274 pr_info("ftrace: allocating %ld entries in %d pages\n",
2275 num_to_init, cnt + 1);
2276
2277 return 0; 1904 return 0;
2278} 1905}
2279 1906
1907enum {
1908 FTRACE_ITER_FILTER = (1 << 0),
1909 FTRACE_ITER_NOTRACE = (1 << 1),
1910 FTRACE_ITER_PRINTALL = (1 << 2),
1911 FTRACE_ITER_HASH = (1 << 3),
1912 FTRACE_ITER_ENABLED = (1 << 4),
1913};
1914
2280#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1915#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
2281 1916
2282struct ftrace_iterator { 1917struct ftrace_iterator {
@@ -2341,9 +1976,6 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
2341 void *p = NULL; 1976 void *p = NULL;
2342 loff_t l; 1977 loff_t l;
2343 1978
2344 if (!(iter->flags & FTRACE_ITER_DO_HASH))
2345 return NULL;
2346
2347 if (iter->func_pos > *pos) 1979 if (iter->func_pos > *pos)
2348 return NULL; 1980 return NULL;
2349 1981
@@ -2387,7 +2019,7 @@ static void *
2387t_next(struct seq_file *m, void *v, loff_t *pos) 2019t_next(struct seq_file *m, void *v, loff_t *pos)
2388{ 2020{
2389 struct ftrace_iterator *iter = m->private; 2021 struct ftrace_iterator *iter = m->private;
2390 struct ftrace_ops *ops = iter->ops; 2022 struct ftrace_ops *ops = &global_ops;
2391 struct dyn_ftrace *rec = NULL; 2023 struct dyn_ftrace *rec = NULL;
2392 2024
2393 if (unlikely(ftrace_disabled)) 2025 if (unlikely(ftrace_disabled))
@@ -2411,7 +2043,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
2411 } 2043 }
2412 } else { 2044 } else {
2413 rec = &iter->pg->records[iter->idx++]; 2045 rec = &iter->pg->records[iter->idx++];
2414 if (((iter->flags & FTRACE_ITER_FILTER) && 2046 if ((rec->flags & FTRACE_FL_FREE) ||
2047
2048 ((iter->flags & FTRACE_ITER_FILTER) &&
2415 !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || 2049 !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
2416 2050
2417 ((iter->flags & FTRACE_ITER_NOTRACE) && 2051 ((iter->flags & FTRACE_ITER_NOTRACE) &&
@@ -2437,13 +2071,13 @@ static void reset_iter_read(struct ftrace_iterator *iter)
2437{ 2071{
2438 iter->pos = 0; 2072 iter->pos = 0;
2439 iter->func_pos = 0; 2073 iter->func_pos = 0;
2440 iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_HASH); 2074 iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
2441} 2075}
2442 2076
2443static void *t_start(struct seq_file *m, loff_t *pos) 2077static void *t_start(struct seq_file *m, loff_t *pos)
2444{ 2078{
2445 struct ftrace_iterator *iter = m->private; 2079 struct ftrace_iterator *iter = m->private;
2446 struct ftrace_ops *ops = iter->ops; 2080 struct ftrace_ops *ops = &global_ops;
2447 void *p = NULL; 2081 void *p = NULL;
2448 loff_t l; 2082 loff_t l;
2449 2083
@@ -2463,8 +2097,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
2463 * off, we can short cut and just print out that all 2097 * off, we can short cut and just print out that all
2464 * functions are enabled. 2098 * functions are enabled.
2465 */ 2099 */
2466 if (iter->flags & FTRACE_ITER_FILTER && 2100 if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
2467 ftrace_hash_empty(ops->filter_hash)) {
2468 if (*pos > 0) 2101 if (*pos > 0)
2469 return t_hash_start(m, pos); 2102 return t_hash_start(m, pos);
2470 iter->flags |= FTRACE_ITER_PRINTALL; 2103 iter->flags |= FTRACE_ITER_PRINTALL;
@@ -2489,8 +2122,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
2489 break; 2122 break;
2490 } 2123 }
2491 2124
2492 if (!p) 2125 if (!p) {
2493 return t_hash_start(m, pos); 2126 if (iter->flags & FTRACE_ITER_FILTER)
2127 return t_hash_start(m, pos);
2128
2129 return NULL;
2130 }
2494 2131
2495 return iter; 2132 return iter;
2496} 2133}
@@ -2520,9 +2157,8 @@ static int t_show(struct seq_file *m, void *v)
2520 2157
2521 seq_printf(m, "%ps", (void *)rec->ip); 2158 seq_printf(m, "%ps", (void *)rec->ip);
2522 if (iter->flags & FTRACE_ITER_ENABLED) 2159 if (iter->flags & FTRACE_ITER_ENABLED)
2523 seq_printf(m, " (%ld)%s", 2160 seq_printf(m, " (%ld)",
2524 rec->flags & ~FTRACE_FL_MASK, 2161 rec->flags & ~FTRACE_FL_MASK);
2525 rec->flags & FTRACE_FL_REGS ? " R" : "");
2526 seq_printf(m, "\n"); 2162 seq_printf(m, "\n");
2527 2163
2528 return 0; 2164 return 0;
@@ -2539,35 +2175,55 @@ static int
2539ftrace_avail_open(struct inode *inode, struct file *file) 2175ftrace_avail_open(struct inode *inode, struct file *file)
2540{ 2176{
2541 struct ftrace_iterator *iter; 2177 struct ftrace_iterator *iter;
2178 int ret;
2542 2179
2543 if (unlikely(ftrace_disabled)) 2180 if (unlikely(ftrace_disabled))
2544 return -ENODEV; 2181 return -ENODEV;
2545 2182
2546 iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); 2183 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2547 if (iter) { 2184 if (!iter)
2548 iter->pg = ftrace_pages_start; 2185 return -ENOMEM;
2549 iter->ops = &global_ops; 2186
2187 iter->pg = ftrace_pages_start;
2188
2189 ret = seq_open(file, &show_ftrace_seq_ops);
2190 if (!ret) {
2191 struct seq_file *m = file->private_data;
2192
2193 m->private = iter;
2194 } else {
2195 kfree(iter);
2550 } 2196 }
2551 2197
2552 return iter ? 0 : -ENOMEM; 2198 return ret;
2553} 2199}
2554 2200
2555static int 2201static int
2556ftrace_enabled_open(struct inode *inode, struct file *file) 2202ftrace_enabled_open(struct inode *inode, struct file *file)
2557{ 2203{
2558 struct ftrace_iterator *iter; 2204 struct ftrace_iterator *iter;
2205 int ret;
2559 2206
2560 if (unlikely(ftrace_disabled)) 2207 if (unlikely(ftrace_disabled))
2561 return -ENODEV; 2208 return -ENODEV;
2562 2209
2563 iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); 2210 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2564 if (iter) { 2211 if (!iter)
2565 iter->pg = ftrace_pages_start; 2212 return -ENOMEM;
2566 iter->flags = FTRACE_ITER_ENABLED; 2213
2567 iter->ops = &global_ops; 2214 iter->pg = ftrace_pages_start;
2215 iter->flags = FTRACE_ITER_ENABLED;
2216
2217 ret = seq_open(file, &show_ftrace_seq_ops);
2218 if (!ret) {
2219 struct seq_file *m = file->private_data;
2220
2221 m->private = iter;
2222 } else {
2223 kfree(iter);
2568 } 2224 }
2569 2225
2570 return iter ? 0 : -ENOMEM; 2226 return ret;
2571} 2227}
2572 2228
2573static void ftrace_filter_reset(struct ftrace_hash *hash) 2229static void ftrace_filter_reset(struct ftrace_hash *hash)
@@ -2577,23 +2233,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
2577 mutex_unlock(&ftrace_lock); 2233 mutex_unlock(&ftrace_lock);
2578} 2234}
2579 2235
2580/** 2236static int
2581 * ftrace_regex_open - initialize function tracer filter files
2582 * @ops: The ftrace_ops that hold the hash filters
2583 * @flag: The type of filter to process
2584 * @inode: The inode, usually passed in to your open routine
2585 * @file: The file, usually passed in to your open routine
2586 *
2587 * ftrace_regex_open() initializes the filter files for the
2588 * @ops. Depending on @flag it may process the filter hash or
2589 * the notrace hash of @ops. With this called from the open
2590 * routine, you can use ftrace_filter_write() for the write
2591 * routine if @flag has FTRACE_ITER_FILTER set, or
2592 * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
2593 * ftrace_regex_lseek() should be used as the lseek routine, and
2594 * release must call ftrace_regex_release().
2595 */
2596int
2597ftrace_regex_open(struct ftrace_ops *ops, int flag, 2237ftrace_regex_open(struct ftrace_ops *ops, int flag,
2598 struct inode *inode, struct file *file) 2238 struct inode *inode, struct file *file)
2599{ 2239{
@@ -2662,9 +2302,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
2662static int 2302static int
2663ftrace_filter_open(struct inode *inode, struct file *file) 2303ftrace_filter_open(struct inode *inode, struct file *file)
2664{ 2304{
2665 return ftrace_regex_open(&global_ops, 2305 return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
2666 FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, 2306 inode, file);
2667 inode, file);
2668} 2307}
2669 2308
2670static int 2309static int
@@ -2674,13 +2313,13 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
2674 inode, file); 2313 inode, file);
2675} 2314}
2676 2315
2677loff_t 2316static loff_t
2678ftrace_regex_lseek(struct file *file, loff_t offset, int whence) 2317ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
2679{ 2318{
2680 loff_t ret; 2319 loff_t ret;
2681 2320
2682 if (file->f_mode & FMODE_READ) 2321 if (file->f_mode & FMODE_READ)
2683 ret = seq_lseek(file, offset, whence); 2322 ret = seq_lseek(file, offset, origin);
2684 else 2323 else
2685 file->f_pos = ret = 1; 2324 file->f_pos = ret = 1;
2686 2325
@@ -2783,6 +2422,7 @@ match_records(struct ftrace_hash *hash, char *buff,
2783 goto out_unlock; 2422 goto out_unlock;
2784 2423
2785 do_for_each_ftrace_rec(pg, rec) { 2424 do_for_each_ftrace_rec(pg, rec) {
2425
2786 if (ftrace_match_record(rec, mod, search, search_len, type)) { 2426 if (ftrace_match_record(rec, mod, search, search_len, type)) {
2787 ret = enter_record(hash, rec, not); 2427 ret = enter_record(hash, rec, not);
2788 if (ret < 0) { 2428 if (ret < 0) {
@@ -2868,10 +2508,10 @@ static int __init ftrace_mod_cmd_init(void)
2868{ 2508{
2869 return register_ftrace_command(&ftrace_mod_cmd); 2509 return register_ftrace_command(&ftrace_mod_cmd);
2870} 2510}
2871core_initcall(ftrace_mod_cmd_init); 2511device_initcall(ftrace_mod_cmd_init);
2872 2512
2873static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, 2513static void
2874 struct ftrace_ops *op, struct pt_regs *pt_regs) 2514function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
2875{ 2515{
2876 struct ftrace_func_probe *entry; 2516 struct ftrace_func_probe *entry;
2877 struct hlist_head *hhd; 2517 struct hlist_head *hhd;
@@ -3227,14 +2867,14 @@ out_unlock:
3227 return ret; 2867 return ret;
3228} 2868}
3229 2869
3230ssize_t 2870static ssize_t
3231ftrace_filter_write(struct file *file, const char __user *ubuf, 2871ftrace_filter_write(struct file *file, const char __user *ubuf,
3232 size_t cnt, loff_t *ppos) 2872 size_t cnt, loff_t *ppos)
3233{ 2873{
3234 return ftrace_regex_write(file, ubuf, cnt, ppos, 1); 2874 return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
3235} 2875}
3236 2876
3237ssize_t 2877static ssize_t
3238ftrace_notrace_write(struct file *file, const char __user *ubuf, 2878ftrace_notrace_write(struct file *file, const char __user *ubuf,
3239 size_t cnt, loff_t *ppos) 2879 size_t cnt, loff_t *ppos)
3240{ 2880{
@@ -3242,27 +2882,8 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
3242} 2882}
3243 2883
3244static int 2884static int
3245ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) 2885ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3246{ 2886 int reset, int enable)
3247 struct ftrace_func_entry *entry;
3248
3249 if (!ftrace_location(ip))
3250 return -EINVAL;
3251
3252 if (remove) {
3253 entry = ftrace_lookup_ip(hash, ip);
3254 if (!entry)
3255 return -ENOENT;
3256 free_hash_entry(hash, entry);
3257 return 0;
3258 }
3259
3260 return add_hash_entry(hash, ip);
3261}
3262
3263static int
3264ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3265 unsigned long ip, int remove, int reset, int enable)
3266{ 2887{
3267 struct ftrace_hash **orig_hash; 2888 struct ftrace_hash **orig_hash;
3268 struct ftrace_hash *hash; 2889 struct ftrace_hash *hash;
@@ -3287,15 +2908,8 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3287 mutex_lock(&ftrace_regex_lock); 2908 mutex_lock(&ftrace_regex_lock);
3288 if (reset) 2909 if (reset)
3289 ftrace_filter_reset(hash); 2910 ftrace_filter_reset(hash);
3290 if (buf && !ftrace_match_records(hash, buf, len)) { 2911 if (buf)
3291 ret = -EINVAL; 2912 ftrace_match_records(hash, buf, len);
3292 goto out_regex_unlock;
3293 }
3294 if (ip) {
3295 ret = ftrace_match_addr(hash, ip, remove);
3296 if (ret < 0)
3297 goto out_regex_unlock;
3298 }
3299 2913
3300 mutex_lock(&ftrace_lock); 2914 mutex_lock(&ftrace_lock);
3301 ret = ftrace_hash_move(ops, enable, orig_hash, hash); 2915 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
@@ -3305,44 +2919,12 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3305 2919
3306 mutex_unlock(&ftrace_lock); 2920 mutex_unlock(&ftrace_lock);
3307 2921
3308 out_regex_unlock:
3309 mutex_unlock(&ftrace_regex_lock); 2922 mutex_unlock(&ftrace_regex_lock);
3310 2923
3311 free_ftrace_hash(hash); 2924 free_ftrace_hash(hash);
3312 return ret; 2925 return ret;
3313} 2926}
3314 2927
3315static int
3316ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
3317 int reset, int enable)
3318{
3319 return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
3320}
3321
3322/**
3323 * ftrace_set_filter_ip - set a function to filter on in ftrace by address
3324 * @ops - the ops to set the filter with
3325 * @ip - the address to add to or remove from the filter.
3326 * @remove - non zero to remove the ip from the filter
3327 * @reset - non zero to reset all filters before applying this filter.
3328 *
3329 * Filters denote which functions should be enabled when tracing is enabled
3330 * If @ip is NULL, it failes to update filter.
3331 */
3332int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
3333 int remove, int reset)
3334{
3335 return ftrace_set_addr(ops, ip, remove, reset, 1);
3336}
3337EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
3338
3339static int
3340ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3341 int reset, int enable)
3342{
3343 return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable);
3344}
3345
3346/** 2928/**
3347 * ftrace_set_filter - set a function to filter on in ftrace 2929 * ftrace_set_filter - set a function to filter on in ftrace
3348 * @ops - the ops to set the filter with 2930 * @ops - the ops to set the filter with
@@ -3353,10 +2935,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3353 * Filters denote which functions should be enabled when tracing is enabled. 2935 * Filters denote which functions should be enabled when tracing is enabled.
3354 * If @buf is NULL and reset is set, all functions will be enabled for tracing. 2936 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
3355 */ 2937 */
3356int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, 2938void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
3357 int len, int reset) 2939 int len, int reset)
3358{ 2940{
3359 return ftrace_set_regex(ops, buf, len, reset, 1); 2941 ftrace_set_regex(ops, buf, len, reset, 1);
3360} 2942}
3361EXPORT_SYMBOL_GPL(ftrace_set_filter); 2943EXPORT_SYMBOL_GPL(ftrace_set_filter);
3362 2944
@@ -3371,10 +2953,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter);
3371 * is enabled. If @buf is NULL and reset is set, all functions will be enabled 2953 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
3372 * for tracing. 2954 * for tracing.
3373 */ 2955 */
3374int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, 2956void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
3375 int len, int reset) 2957 int len, int reset)
3376{ 2958{
3377 return ftrace_set_regex(ops, buf, len, reset, 0); 2959 ftrace_set_regex(ops, buf, len, reset, 0);
3378} 2960}
3379EXPORT_SYMBOL_GPL(ftrace_set_notrace); 2961EXPORT_SYMBOL_GPL(ftrace_set_notrace);
3380/** 2962/**
@@ -3459,8 +3041,8 @@ static void __init set_ftrace_early_graph(char *buf)
3459} 3041}
3460#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 3042#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
3461 3043
3462void __init 3044static void __init
3463ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable) 3045set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
3464{ 3046{
3465 char *func; 3047 char *func;
3466 3048
@@ -3473,16 +3055,17 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
3473static void __init set_ftrace_early_filters(void) 3055static void __init set_ftrace_early_filters(void)
3474{ 3056{
3475 if (ftrace_filter_buf[0]) 3057 if (ftrace_filter_buf[0])
3476 ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1); 3058 set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
3477 if (ftrace_notrace_buf[0]) 3059 if (ftrace_notrace_buf[0])
3478 ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0); 3060 set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
3479#ifdef CONFIG_FUNCTION_GRAPH_TRACER 3061#ifdef CONFIG_FUNCTION_GRAPH_TRACER
3480 if (ftrace_graph_buf[0]) 3062 if (ftrace_graph_buf[0])
3481 set_ftrace_early_graph(ftrace_graph_buf); 3063 set_ftrace_early_graph(ftrace_graph_buf);
3482#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 3064#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
3483} 3065}
3484 3066
3485int ftrace_regex_release(struct inode *inode, struct file *file) 3067static int
3068ftrace_regex_release(struct inode *inode, struct file *file)
3486{ 3069{
3487 struct seq_file *m = (struct seq_file *)file->private_data; 3070 struct seq_file *m = (struct seq_file *)file->private_data;
3488 struct ftrace_iterator *iter; 3071 struct ftrace_iterator *iter;
@@ -3683,6 +3266,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
3683 3266
3684 do_for_each_ftrace_rec(pg, rec) { 3267 do_for_each_ftrace_rec(pg, rec) {
3685 3268
3269 if (rec->flags & FTRACE_FL_FREE)
3270 continue;
3271
3686 if (ftrace_match_record(rec, NULL, search, search_len, type)) { 3272 if (ftrace_match_record(rec, NULL, search, search_len, type)) {
3687 /* if it is in the array */ 3273 /* if it is in the array */
3688 exists = false; 3274 exists = false;
@@ -3791,80 +3377,16 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
3791 return 0; 3377 return 0;
3792} 3378}
3793 3379
3794static int ftrace_cmp_ips(const void *a, const void *b)
3795{
3796 const unsigned long *ipa = a;
3797 const unsigned long *ipb = b;
3798
3799 if (*ipa > *ipb)
3800 return 1;
3801 if (*ipa < *ipb)
3802 return -1;
3803 return 0;
3804}
3805
3806static void ftrace_swap_ips(void *a, void *b, int size)
3807{
3808 unsigned long *ipa = a;
3809 unsigned long *ipb = b;
3810 unsigned long t;
3811
3812 t = *ipa;
3813 *ipa = *ipb;
3814 *ipb = t;
3815}
3816
3817static int ftrace_process_locs(struct module *mod, 3380static int ftrace_process_locs(struct module *mod,
3818 unsigned long *start, 3381 unsigned long *start,
3819 unsigned long *end) 3382 unsigned long *end)
3820{ 3383{
3821 struct ftrace_page *start_pg;
3822 struct ftrace_page *pg;
3823 struct dyn_ftrace *rec;
3824 unsigned long count;
3825 unsigned long *p; 3384 unsigned long *p;
3826 unsigned long addr; 3385 unsigned long addr;
3827 unsigned long flags = 0; /* Shut up gcc */ 3386 unsigned long flags = 0; /* Shut up gcc */
3828 int ret = -ENOMEM;
3829
3830 count = end - start;
3831
3832 if (!count)
3833 return 0;
3834
3835 sort(start, count, sizeof(*start),
3836 ftrace_cmp_ips, ftrace_swap_ips);
3837
3838 start_pg = ftrace_allocate_pages(count);
3839 if (!start_pg)
3840 return -ENOMEM;
3841 3387
3842 mutex_lock(&ftrace_lock); 3388 mutex_lock(&ftrace_lock);
3843
3844 /*
3845 * Core and each module needs their own pages, as
3846 * modules will free them when they are removed.
3847 * Force a new page to be allocated for modules.
3848 */
3849 if (!mod) {
3850 WARN_ON(ftrace_pages || ftrace_pages_start);
3851 /* First initialization */
3852 ftrace_pages = ftrace_pages_start = start_pg;
3853 } else {
3854 if (!ftrace_pages)
3855 goto out;
3856
3857 if (WARN_ON(ftrace_pages->next)) {
3858 /* Hmm, we have free pages? */
3859 while (ftrace_pages->next)
3860 ftrace_pages = ftrace_pages->next;
3861 }
3862
3863 ftrace_pages->next = start_pg;
3864 }
3865
3866 p = start; 3389 p = start;
3867 pg = start_pg;
3868 while (p < end) { 3390 while (p < end) {
3869 addr = ftrace_call_adjust(*p++); 3391 addr = ftrace_call_adjust(*p++);
3870 /* 3392 /*
@@ -3875,27 +3397,9 @@ static int ftrace_process_locs(struct module *mod,
3875 */ 3397 */
3876 if (!addr) 3398 if (!addr)
3877 continue; 3399 continue;
3878 3400 ftrace_record_ip(addr);
3879 if (pg->index == pg->size) {
3880 /* We should have allocated enough */
3881 if (WARN_ON(!pg->next))
3882 break;
3883 pg = pg->next;
3884 }
3885
3886 rec = &pg->records[pg->index++];
3887 rec->ip = addr;
3888 } 3401 }
3889 3402
3890 /* We should have used all pages */
3891 WARN_ON(pg->next);
3892
3893 /* Assign the last page to ftrace_pages */
3894 ftrace_pages = pg;
3895
3896 /* These new locations need to be initialized */
3897 ftrace_new_pgs = start_pg;
3898
3899 /* 3403 /*
3900 * We only need to disable interrupts on start up 3404 * We only need to disable interrupts on start up
3901 * because we are modifying code that an interrupt 3405 * because we are modifying code that an interrupt
@@ -3909,55 +3413,32 @@ static int ftrace_process_locs(struct module *mod,
3909 ftrace_update_code(mod); 3413 ftrace_update_code(mod);
3910 if (!mod) 3414 if (!mod)
3911 local_irq_restore(flags); 3415 local_irq_restore(flags);
3912 ret = 0;
3913 out:
3914 mutex_unlock(&ftrace_lock); 3416 mutex_unlock(&ftrace_lock);
3915 3417
3916 return ret; 3418 return 0;
3917} 3419}
3918 3420
3919#ifdef CONFIG_MODULES 3421#ifdef CONFIG_MODULES
3920
3921#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
3922
3923void ftrace_release_mod(struct module *mod) 3422void ftrace_release_mod(struct module *mod)
3924{ 3423{
3925 struct dyn_ftrace *rec; 3424 struct dyn_ftrace *rec;
3926 struct ftrace_page **last_pg;
3927 struct ftrace_page *pg; 3425 struct ftrace_page *pg;
3928 int order;
3929 3426
3930 mutex_lock(&ftrace_lock); 3427 mutex_lock(&ftrace_lock);
3931 3428
3932 if (ftrace_disabled) 3429 if (ftrace_disabled)
3933 goto out_unlock; 3430 goto out_unlock;
3934 3431
3935 /* 3432 do_for_each_ftrace_rec(pg, rec) {
3936 * Each module has its own ftrace_pages, remove
3937 * them from the list.
3938 */
3939 last_pg = &ftrace_pages_start;
3940 for (pg = ftrace_pages_start; pg; pg = *last_pg) {
3941 rec = &pg->records[0];
3942 if (within_module_core(rec->ip, mod)) { 3433 if (within_module_core(rec->ip, mod)) {
3943 /* 3434 /*
3944 * As core pages are first, the first 3435 * rec->ip is changed in ftrace_free_rec()
3945 * page should never be a module page. 3436 * It should not between s and e if record was freed.
3946 */ 3437 */
3947 if (WARN_ON(pg == ftrace_pages_start)) 3438 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
3948 goto out_unlock; 3439 ftrace_free_rec(rec);
3949 3440 }
3950 /* Check if we are deleting the last page */ 3441 } while_for_each_ftrace_rec();
3951 if (pg == ftrace_pages)
3952 ftrace_pages = next_to_ftrace_page(last_pg);
3953
3954 *last_pg = pg->next;
3955 order = get_count_order(pg->size / ENTRIES_PER_PAGE);
3956 free_pages((unsigned long)pg->records, order);
3957 kfree(pg);
3958 } else
3959 last_pg = &pg->next;
3960 }
3961 out_unlock: 3442 out_unlock:
3962 mutex_unlock(&ftrace_lock); 3443 mutex_unlock(&ftrace_lock);
3963} 3444}
@@ -4047,7 +3528,6 @@ void __init ftrace_init(void)
4047 3528
4048static struct ftrace_ops global_ops = { 3529static struct ftrace_ops global_ops = {
4049 .func = ftrace_stub, 3530 .func = ftrace_stub,
4050 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
4051}; 3531};
4052 3532
4053static int __init ftrace_nodyn_init(void) 3533static int __init ftrace_nodyn_init(void)
@@ -4055,7 +3535,7 @@ static int __init ftrace_nodyn_init(void)
4055 ftrace_enabled = 1; 3535 ftrace_enabled = 1;
4056 return 0; 3536 return 0;
4057} 3537}
4058core_initcall(ftrace_nodyn_init); 3538device_initcall(ftrace_nodyn_init);
4059 3539
4060static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } 3540static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
4061static inline void ftrace_startup_enable(int command) { } 3541static inline void ftrace_startup_enable(int command) { }
@@ -4078,44 +3558,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
4078#endif /* CONFIG_DYNAMIC_FTRACE */ 3558#endif /* CONFIG_DYNAMIC_FTRACE */
4079 3559
4080static void 3560static void
4081ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, 3561ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
4082 struct ftrace_ops *op, struct pt_regs *regs)
4083{
4084 if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
4085 return;
4086
4087 /*
4088 * Some of the ops may be dynamically allocated,
4089 * they must be freed after a synchronize_sched().
4090 */
4091 preempt_disable_notrace();
4092 trace_recursion_set(TRACE_CONTROL_BIT);
4093 op = rcu_dereference_raw(ftrace_control_list);
4094 while (op != &ftrace_list_end) {
4095 if (!ftrace_function_local_disabled(op) &&
4096 ftrace_ops_test(op, ip))
4097 op->func(ip, parent_ip, op, regs);
4098
4099 op = rcu_dereference_raw(op->next);
4100 };
4101 trace_recursion_clear(TRACE_CONTROL_BIT);
4102 preempt_enable_notrace();
4103}
4104
4105static struct ftrace_ops control_ops = {
4106 .func = ftrace_ops_control_func,
4107 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
4108};
4109
4110static inline void
4111__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4112 struct ftrace_ops *ignored, struct pt_regs *regs)
4113{ 3562{
4114 struct ftrace_ops *op; 3563 struct ftrace_ops *op;
4115 3564
4116 if (function_trace_stop)
4117 return;
4118
4119 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) 3565 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
4120 return; 3566 return;
4121 3567
@@ -4128,39 +3574,13 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4128 op = rcu_dereference_raw(ftrace_ops_list); 3574 op = rcu_dereference_raw(ftrace_ops_list);
4129 while (op != &ftrace_list_end) { 3575 while (op != &ftrace_list_end) {
4130 if (ftrace_ops_test(op, ip)) 3576 if (ftrace_ops_test(op, ip))
4131 op->func(ip, parent_ip, op, regs); 3577 op->func(ip, parent_ip);
4132 op = rcu_dereference_raw(op->next); 3578 op = rcu_dereference_raw(op->next);
4133 }; 3579 };
4134 preempt_enable_notrace(); 3580 preempt_enable_notrace();
4135 trace_recursion_clear(TRACE_INTERNAL_BIT); 3581 trace_recursion_clear(TRACE_INTERNAL_BIT);
4136} 3582}
4137 3583
4138/*
4139 * Some archs only support passing ip and parent_ip. Even though
4140 * the list function ignores the op parameter, we do not want any
4141 * C side effects, where a function is called without the caller
4142 * sending a third parameter.
4143 * Archs are to support both the regs and ftrace_ops at the same time.
4144 * If they support ftrace_ops, it is assumed they support regs.
4145 * If call backs want to use regs, they must either check for regs
4146 * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
4147 * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
4148 * An architecture can pass partial regs with ftrace_ops and still
4149 * set the ARCH_SUPPORT_FTARCE_OPS.
4150 */
4151#if ARCH_SUPPORTS_FTRACE_OPS
4152static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4153 struct ftrace_ops *op, struct pt_regs *regs)
4154{
4155 __ftrace_ops_list_func(ip, parent_ip, NULL, regs);
4156}
4157#else
4158static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
4159{
4160 __ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
4161}
4162#endif
4163
4164static void clear_ftrace_swapper(void) 3584static void clear_ftrace_swapper(void)
4165{ 3585{
4166 struct task_struct *p; 3586 struct task_struct *p;
@@ -4381,7 +3801,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
4381 if (strlen(tmp) == 0) 3801 if (strlen(tmp) == 0)
4382 return 1; 3802 return 1;
4383 3803
4384 ret = kstrtol(tmp, 10, &val); 3804 ret = strict_strtol(tmp, 10, &val);
4385 if (ret < 0) 3805 if (ret < 0)
4386 return ret; 3806 return ret;
4387 3807
@@ -4441,14 +3861,6 @@ void ftrace_kill(void)
4441} 3861}
4442 3862
4443/** 3863/**
4444 * Test if ftrace is dead or not.
4445 */
4446int ftrace_is_dead(void)
4447{
4448 return ftrace_disabled;
4449}
4450
4451/**
4452 * register_ftrace_function - register a function for profiling 3864 * register_ftrace_function - register a function for profiling
4453 * @ops - ops structure that holds the function for profiling. 3865 * @ops - ops structure that holds the function for profiling.
4454 * 3866 *
@@ -4465,12 +3877,16 @@ int register_ftrace_function(struct ftrace_ops *ops)
4465 3877
4466 mutex_lock(&ftrace_lock); 3878 mutex_lock(&ftrace_lock);
4467 3879
3880 if (unlikely(ftrace_disabled))
3881 goto out_unlock;
3882
4468 ret = __register_ftrace_function(ops); 3883 ret = __register_ftrace_function(ops);
4469 if (!ret) 3884 if (!ret)
4470 ret = ftrace_startup(ops, 0); 3885 ret = ftrace_startup(ops, 0);
4471 3886
4472 mutex_unlock(&ftrace_lock);
4473 3887
3888 out_unlock:
3889 mutex_unlock(&ftrace_lock);
4474 return ret; 3890 return ret;
4475} 3891}
4476EXPORT_SYMBOL_GPL(register_ftrace_function); 3892EXPORT_SYMBOL_GPL(register_ftrace_function);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ce8514feedc..731201bf4ac 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -23,8 +23,6 @@
23#include <asm/local.h> 23#include <asm/local.h>
24#include "trace.h" 24#include "trace.h"
25 25
26static void update_pages_handler(struct work_struct *work);
27
28/* 26/*
29 * The ring buffer header is special. We must manually up keep it. 27 * The ring buffer header is special. We must manually up keep it.
30 */ 28 */
@@ -156,12 +154,35 @@ enum {
156 154
157static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 155static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
158 156
159/* Used for individual buffers (after the counter) */
160#define RB_BUFFER_OFF (1 << 20)
161
162#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) 157#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
163 158
164/** 159/**
160 * tracing_on - enable all tracing buffers
161 *
162 * This function enables all tracing buffers that may have been
163 * disabled with tracing_off.
164 */
165void tracing_on(void)
166{
167 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
168}
169EXPORT_SYMBOL_GPL(tracing_on);
170
171/**
172 * tracing_off - turn off all tracing buffers
173 *
174 * This function stops all tracing buffers from recording data.
175 * It does not disable any overhead the tracers themselves may
176 * be causing. This function simply causes all recording to
177 * the ring buffers to fail.
178 */
179void tracing_off(void)
180{
181 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
182}
183EXPORT_SYMBOL_GPL(tracing_off);
184
185/**
165 * tracing_off_permanent - permanently disable ring buffers 186 * tracing_off_permanent - permanently disable ring buffers
166 * 187 *
167 * This function, once called, will disable all ring buffers 188 * This function, once called, will disable all ring buffers
@@ -172,6 +193,15 @@ void tracing_off_permanent(void)
172 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 193 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
173} 194}
174 195
196/**
197 * tracing_is_on - show state of ring buffers enabled
198 */
199int tracing_is_on(void)
200{
201 return ring_buffer_flags == RB_BUFFERS_ON;
202}
203EXPORT_SYMBOL_GPL(tracing_is_on);
204
175#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 205#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
176#define RB_ALIGNMENT 4U 206#define RB_ALIGNMENT 4U
177#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 207#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
@@ -448,10 +478,9 @@ struct ring_buffer_per_cpu {
448 int cpu; 478 int cpu;
449 atomic_t record_disabled; 479 atomic_t record_disabled;
450 struct ring_buffer *buffer; 480 struct ring_buffer *buffer;
451 raw_spinlock_t reader_lock; /* serialize readers */ 481 spinlock_t reader_lock; /* serialize readers */
452 arch_spinlock_t lock; 482 arch_spinlock_t lock;
453 struct lock_class_key lock_key; 483 struct lock_class_key lock_key;
454 unsigned int nr_pages;
455 struct list_head *pages; 484 struct list_head *pages;
456 struct buffer_page *head_page; /* read from head */ 485 struct buffer_page *head_page; /* read from head */
457 struct buffer_page *tail_page; /* write to tail */ 486 struct buffer_page *tail_page; /* write to tail */
@@ -459,29 +488,21 @@ struct ring_buffer_per_cpu {
459 struct buffer_page *reader_page; 488 struct buffer_page *reader_page;
460 unsigned long lost_events; 489 unsigned long lost_events;
461 unsigned long last_overrun; 490 unsigned long last_overrun;
462 local_t entries_bytes;
463 local_t entries;
464 local_t overrun;
465 local_t commit_overrun; 491 local_t commit_overrun;
466 local_t dropped_events; 492 local_t overrun;
493 local_t entries;
467 local_t committing; 494 local_t committing;
468 local_t commits; 495 local_t commits;
469 unsigned long read; 496 unsigned long read;
470 unsigned long read_bytes;
471 u64 write_stamp; 497 u64 write_stamp;
472 u64 read_stamp; 498 u64 read_stamp;
473 /* ring buffer pages to update, > 0 to add, < 0 to remove */
474 int nr_pages_to_update;
475 struct list_head new_pages; /* new pages to add */
476 struct work_struct update_pages_work;
477 struct completion update_done;
478}; 499};
479 500
480struct ring_buffer { 501struct ring_buffer {
502 unsigned pages;
481 unsigned flags; 503 unsigned flags;
482 int cpus; 504 int cpus;
483 atomic_t record_disabled; 505 atomic_t record_disabled;
484 atomic_t resize_disabled;
485 cpumask_var_t cpumask; 506 cpumask_var_t cpumask;
486 507
487 struct lock_class_key *reader_lock_key; 508 struct lock_class_key *reader_lock_key;
@@ -946,10 +967,6 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
946 struct list_head *head = cpu_buffer->pages; 967 struct list_head *head = cpu_buffer->pages;
947 struct buffer_page *bpage, *tmp; 968 struct buffer_page *bpage, *tmp;
948 969
949 /* Reset the head page if it exists */
950 if (cpu_buffer->head_page)
951 rb_set_head_page(cpu_buffer);
952
953 rb_head_page_deactivate(cpu_buffer); 970 rb_head_page_deactivate(cpu_buffer);
954 971
955 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 972 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
@@ -976,10 +993,14 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
976 return 0; 993 return 0;
977} 994}
978 995
979static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu) 996static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
997 unsigned nr_pages)
980{ 998{
981 int i;
982 struct buffer_page *bpage, *tmp; 999 struct buffer_page *bpage, *tmp;
1000 LIST_HEAD(pages);
1001 unsigned i;
1002
1003 WARN_ON(!nr_pages);
983 1004
984 for (i = 0; i < nr_pages; i++) { 1005 for (i = 0; i < nr_pages; i++) {
985 struct page *page; 1006 struct page *page;
@@ -990,13 +1011,15 @@ static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
990 */ 1011 */
991 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1012 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
992 GFP_KERNEL | __GFP_NORETRY, 1013 GFP_KERNEL | __GFP_NORETRY,
993 cpu_to_node(cpu)); 1014 cpu_to_node(cpu_buffer->cpu));
994 if (!bpage) 1015 if (!bpage)
995 goto free_pages; 1016 goto free_pages;
996 1017
997 list_add(&bpage->list, pages); 1018 rb_check_bpage(cpu_buffer, bpage);
998 1019
999 page = alloc_pages_node(cpu_to_node(cpu), 1020 list_add(&bpage->list, &pages);
1021
1022 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
1000 GFP_KERNEL | __GFP_NORETRY, 0); 1023 GFP_KERNEL | __GFP_NORETRY, 0);
1001 if (!page) 1024 if (!page)
1002 goto free_pages; 1025 goto free_pages;
@@ -1004,27 +1027,6 @@ static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
1004 rb_init_page(bpage->page); 1027 rb_init_page(bpage->page);
1005 } 1028 }
1006 1029
1007 return 0;
1008
1009free_pages:
1010 list_for_each_entry_safe(bpage, tmp, pages, list) {
1011 list_del_init(&bpage->list);
1012 free_buffer_page(bpage);
1013 }
1014
1015 return -ENOMEM;
1016}
1017
1018static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1019 unsigned nr_pages)
1020{
1021 LIST_HEAD(pages);
1022
1023 WARN_ON(!nr_pages);
1024
1025 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1026 return -ENOMEM;
1027
1028 /* 1030 /*
1029 * The ring buffer page list is a circular list that does not 1031 * The ring buffer page list is a circular list that does not
1030 * start and end with a list head. All page list items point to 1032 * start and end with a list head. All page list items point to
@@ -1033,15 +1035,20 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1033 cpu_buffer->pages = pages.next; 1035 cpu_buffer->pages = pages.next;
1034 list_del(&pages); 1036 list_del(&pages);
1035 1037
1036 cpu_buffer->nr_pages = nr_pages;
1037
1038 rb_check_pages(cpu_buffer); 1038 rb_check_pages(cpu_buffer);
1039 1039
1040 return 0; 1040 return 0;
1041
1042 free_pages:
1043 list_for_each_entry_safe(bpage, tmp, &pages, list) {
1044 list_del_init(&bpage->list);
1045 free_buffer_page(bpage);
1046 }
1047 return -ENOMEM;
1041} 1048}
1042 1049
1043static struct ring_buffer_per_cpu * 1050static struct ring_buffer_per_cpu *
1044rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) 1051rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1045{ 1052{
1046 struct ring_buffer_per_cpu *cpu_buffer; 1053 struct ring_buffer_per_cpu *cpu_buffer;
1047 struct buffer_page *bpage; 1054 struct buffer_page *bpage;
@@ -1055,11 +1062,9 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1055 1062
1056 cpu_buffer->cpu = cpu; 1063 cpu_buffer->cpu = cpu;
1057 cpu_buffer->buffer = buffer; 1064 cpu_buffer->buffer = buffer;
1058 raw_spin_lock_init(&cpu_buffer->reader_lock); 1065 spin_lock_init(&cpu_buffer->reader_lock);
1059 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1066 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1060 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 1067 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1061 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1062 init_completion(&cpu_buffer->update_done);
1063 1068
1064 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1069 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1065 GFP_KERNEL, cpu_to_node(cpu)); 1070 GFP_KERNEL, cpu_to_node(cpu));
@@ -1076,9 +1081,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1076 rb_init_page(bpage->page); 1081 rb_init_page(bpage->page);
1077 1082
1078 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1083 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1079 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1080 1084
1081 ret = rb_allocate_pages(cpu_buffer, nr_pages); 1085 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
1082 if (ret < 0) 1086 if (ret < 0)
1083 goto fail_free_reader; 1087 goto fail_free_reader;
1084 1088
@@ -1139,7 +1143,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1139{ 1143{
1140 struct ring_buffer *buffer; 1144 struct ring_buffer *buffer;
1141 int bsize; 1145 int bsize;
1142 int cpu, nr_pages; 1146 int cpu;
1143 1147
1144 /* keep it in its own cache line */ 1148 /* keep it in its own cache line */
1145 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 1149 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1150,14 +1154,14 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1150 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) 1154 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1151 goto fail_free_buffer; 1155 goto fail_free_buffer;
1152 1156
1153 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1157 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1154 buffer->flags = flags; 1158 buffer->flags = flags;
1155 buffer->clock = trace_clock_local; 1159 buffer->clock = trace_clock_local;
1156 buffer->reader_lock_key = key; 1160 buffer->reader_lock_key = key;
1157 1161
1158 /* need at least two pages */ 1162 /* need at least two pages */
1159 if (nr_pages < 2) 1163 if (buffer->pages < 2)
1160 nr_pages = 2; 1164 buffer->pages = 2;
1161 1165
1162 /* 1166 /*
1163 * In case of non-hotplug cpu, if the ring-buffer is allocated 1167 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1180,7 +1184,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1180 1184
1181 for_each_buffer_cpu(buffer, cpu) { 1185 for_each_buffer_cpu(buffer, cpu) {
1182 buffer->buffers[cpu] = 1186 buffer->buffers[cpu] =
1183 rb_allocate_cpu_buffer(buffer, nr_pages, cpu); 1187 rb_allocate_cpu_buffer(buffer, cpu);
1184 if (!buffer->buffers[cpu]) 1188 if (!buffer->buffers[cpu])
1185 goto fail_free_buffers; 1189 goto fail_free_buffers;
1186 } 1190 }
@@ -1248,223 +1252,58 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
1248 1252
1249static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 1253static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1250 1254
1251static inline unsigned long rb_page_entries(struct buffer_page *bpage) 1255static void
1252{ 1256rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1253 return local_read(&bpage->entries) & RB_WRITE_MASK;
1254}
1255
1256static inline unsigned long rb_page_write(struct buffer_page *bpage)
1257{
1258 return local_read(&bpage->write) & RB_WRITE_MASK;
1259}
1260
1261static int
1262rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
1263{ 1257{
1264 struct list_head *tail_page, *to_remove, *next_page; 1258 struct buffer_page *bpage;
1265 struct buffer_page *to_remove_page, *tmp_iter_page; 1259 struct list_head *p;
1266 struct buffer_page *last_page, *first_page; 1260 unsigned i;
1267 unsigned int nr_removed;
1268 unsigned long head_bit;
1269 int page_entries;
1270
1271 head_bit = 0;
1272
1273 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1274 atomic_inc(&cpu_buffer->record_disabled);
1275 /*
1276 * We don't race with the readers since we have acquired the reader
1277 * lock. We also don't race with writers after disabling recording.
1278 * This makes it easy to figure out the first and the last page to be
1279 * removed from the list. We unlink all the pages in between including
1280 * the first and last pages. This is done in a busy loop so that we
1281 * lose the least number of traces.
1282 * The pages are freed after we restart recording and unlock readers.
1283 */
1284 tail_page = &cpu_buffer->tail_page->list;
1285
1286 /*
1287 * tail page might be on reader page, we remove the next page
1288 * from the ring buffer
1289 */
1290 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1291 tail_page = rb_list_head(tail_page->next);
1292 to_remove = tail_page;
1293 1261
1294 /* start of pages to remove */ 1262 spin_lock_irq(&cpu_buffer->reader_lock);
1295 first_page = list_entry(rb_list_head(to_remove->next), 1263 rb_head_page_deactivate(cpu_buffer);
1296 struct buffer_page, list);
1297 1264
1298 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) { 1265 for (i = 0; i < nr_pages; i++) {
1299 to_remove = rb_list_head(to_remove)->next; 1266 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1300 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD; 1267 goto out;
1268 p = cpu_buffer->pages->next;
1269 bpage = list_entry(p, struct buffer_page, list);
1270 list_del_init(&bpage->list);
1271 free_buffer_page(bpage);
1301 } 1272 }
1273 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1274 goto out;
1302 1275
1303 next_page = rb_list_head(to_remove)->next; 1276 rb_reset_cpu(cpu_buffer);
1304 1277 rb_check_pages(cpu_buffer);
1305 /*
1306 * Now we remove all pages between tail_page and next_page.
1307 * Make sure that we have head_bit value preserved for the
1308 * next page
1309 */
1310 tail_page->next = (struct list_head *)((unsigned long)next_page |
1311 head_bit);
1312 next_page = rb_list_head(next_page);
1313 next_page->prev = tail_page;
1314
1315 /* make sure pages points to a valid page in the ring buffer */
1316 cpu_buffer->pages = next_page;
1317
1318 /* update head page */
1319 if (head_bit)
1320 cpu_buffer->head_page = list_entry(next_page,
1321 struct buffer_page, list);
1322
1323 /*
1324 * change read pointer to make sure any read iterators reset
1325 * themselves
1326 */
1327 cpu_buffer->read = 0;
1328
1329 /* pages are removed, resume tracing and then free the pages */
1330 atomic_dec(&cpu_buffer->record_disabled);
1331 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1332
1333 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1334
1335 /* last buffer page to remove */
1336 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1337 list);
1338 tmp_iter_page = first_page;
1339
1340 do {
1341 to_remove_page = tmp_iter_page;
1342 rb_inc_page(cpu_buffer, &tmp_iter_page);
1343
1344 /* update the counters */
1345 page_entries = rb_page_entries(to_remove_page);
1346 if (page_entries) {
1347 /*
1348 * If something was added to this page, it was full
1349 * since it is not the tail page. So we deduct the
1350 * bytes consumed in ring buffer from here.
1351 * Increment overrun to account for the lost events.
1352 */
1353 local_add(page_entries, &cpu_buffer->overrun);
1354 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1355 }
1356
1357 /*
1358 * We have already removed references to this list item, just
1359 * free up the buffer_page and its page
1360 */
1361 free_buffer_page(to_remove_page);
1362 nr_removed--;
1363
1364 } while (to_remove_page != last_page);
1365
1366 RB_WARN_ON(cpu_buffer, nr_removed);
1367 1278
1368 return nr_removed == 0; 1279out:
1280 spin_unlock_irq(&cpu_buffer->reader_lock);
1369} 1281}
1370 1282
1371static int 1283static void
1372rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) 1284rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1285 struct list_head *pages, unsigned nr_pages)
1373{ 1286{
1374 struct list_head *pages = &cpu_buffer->new_pages; 1287 struct buffer_page *bpage;
1375 int retries, success; 1288 struct list_head *p;
1376 1289 unsigned i;
1377 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1378 /*
1379 * We are holding the reader lock, so the reader page won't be swapped
1380 * in the ring buffer. Now we are racing with the writer trying to
1381 * move head page and the tail page.
1382 * We are going to adapt the reader page update process where:
1383 * 1. We first splice the start and end of list of new pages between
1384 * the head page and its previous page.
1385 * 2. We cmpxchg the prev_page->next to point from head page to the
1386 * start of new pages list.
1387 * 3. Finally, we update the head->prev to the end of new list.
1388 *
1389 * We will try this process 10 times, to make sure that we don't keep
1390 * spinning.
1391 */
1392 retries = 10;
1393 success = 0;
1394 while (retries--) {
1395 struct list_head *head_page, *prev_page, *r;
1396 struct list_head *last_page, *first_page;
1397 struct list_head *head_page_with_bit;
1398
1399 head_page = &rb_set_head_page(cpu_buffer)->list;
1400 if (!head_page)
1401 break;
1402 prev_page = head_page->prev;
1403
1404 first_page = pages->next;
1405 last_page = pages->prev;
1406
1407 head_page_with_bit = (struct list_head *)
1408 ((unsigned long)head_page | RB_PAGE_HEAD);
1409
1410 last_page->next = head_page_with_bit;
1411 first_page->prev = prev_page;
1412
1413 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1414 1290
1415 if (r == head_page_with_bit) { 1291 spin_lock_irq(&cpu_buffer->reader_lock);
1416 /* 1292 rb_head_page_deactivate(cpu_buffer);
1417 * yay, we replaced the page pointer to our new list,
1418 * now, we just have to update to head page's prev
1419 * pointer to point to end of list
1420 */
1421 head_page->prev = last_page;
1422 success = 1;
1423 break;
1424 }
1425 }
1426 1293
1427 if (success) 1294 for (i = 0; i < nr_pages; i++) {
1428 INIT_LIST_HEAD(pages); 1295 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1429 /* 1296 goto out;
1430 * If we weren't successful in adding in new pages, warn and stop 1297 p = pages->next;
1431 * tracing 1298 bpage = list_entry(p, struct buffer_page, list);
1432 */ 1299 list_del_init(&bpage->list);
1433 RB_WARN_ON(cpu_buffer, !success); 1300 list_add_tail(&bpage->list, cpu_buffer->pages);
1434 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1435
1436 /* free pages if they weren't inserted */
1437 if (!success) {
1438 struct buffer_page *bpage, *tmp;
1439 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1440 list) {
1441 list_del_init(&bpage->list);
1442 free_buffer_page(bpage);
1443 }
1444 } 1301 }
1445 return success; 1302 rb_reset_cpu(cpu_buffer);
1446} 1303 rb_check_pages(cpu_buffer);
1447
1448static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1449{
1450 int success;
1451
1452 if (cpu_buffer->nr_pages_to_update > 0)
1453 success = rb_insert_pages(cpu_buffer);
1454 else
1455 success = rb_remove_pages(cpu_buffer,
1456 -cpu_buffer->nr_pages_to_update);
1457
1458 if (success)
1459 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1460}
1461 1304
1462static void update_pages_handler(struct work_struct *work) 1305out:
1463{ 1306 spin_unlock_irq(&cpu_buffer->reader_lock);
1464 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1465 struct ring_buffer_per_cpu, update_pages_work);
1466 rb_update_pages(cpu_buffer);
1467 complete(&cpu_buffer->update_done);
1468} 1307}
1469 1308
1470/** 1309/**
@@ -1474,14 +1313,16 @@ static void update_pages_handler(struct work_struct *work)
1474 * 1313 *
1475 * Minimum size is 2 * BUF_PAGE_SIZE. 1314 * Minimum size is 2 * BUF_PAGE_SIZE.
1476 * 1315 *
1477 * Returns 0 on success and < 0 on failure. 1316 * Returns -1 on failure.
1478 */ 1317 */
1479int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, 1318int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1480 int cpu_id)
1481{ 1319{
1482 struct ring_buffer_per_cpu *cpu_buffer; 1320 struct ring_buffer_per_cpu *cpu_buffer;
1483 unsigned nr_pages; 1321 unsigned nr_pages, rm_pages, new_pages;
1484 int cpu, err = 0; 1322 struct buffer_page *bpage, *tmp;
1323 unsigned long buffer_size;
1324 LIST_HEAD(pages);
1325 int i, cpu;
1485 1326
1486 /* 1327 /*
1487 * Always succeed at resizing a non-existent buffer: 1328 * Always succeed at resizing a non-existent buffer:
@@ -1489,165 +1330,115 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1489 if (!buffer) 1330 if (!buffer)
1490 return size; 1331 return size;
1491 1332
1492 /* Make sure the requested buffer exists */
1493 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1494 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1495 return size;
1496
1497 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1333 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1498 size *= BUF_PAGE_SIZE; 1334 size *= BUF_PAGE_SIZE;
1335 buffer_size = buffer->pages * BUF_PAGE_SIZE;
1499 1336
1500 /* we need a minimum of two pages */ 1337 /* we need a minimum of two pages */
1501 if (size < BUF_PAGE_SIZE * 2) 1338 if (size < BUF_PAGE_SIZE * 2)
1502 size = BUF_PAGE_SIZE * 2; 1339 size = BUF_PAGE_SIZE * 2;
1503 1340
1504 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1341 if (size == buffer_size)
1342 return size;
1505 1343
1506 /* 1344 atomic_inc(&buffer->record_disabled);
1507 * Don't succeed if resizing is disabled, as a reader might be 1345
1508 * manipulating the ring buffer and is expecting a sane state while 1346 /* Make sure all writers are done with this buffer. */
1509 * this is true. 1347 synchronize_sched();
1510 */
1511 if (atomic_read(&buffer->resize_disabled))
1512 return -EBUSY;
1513 1348
1514 /* prevent another thread from changing buffer sizes */
1515 mutex_lock(&buffer->mutex); 1349 mutex_lock(&buffer->mutex);
1350 get_online_cpus();
1516 1351
1517 if (cpu_id == RING_BUFFER_ALL_CPUS) { 1352 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1518 /* calculate the pages to update */
1519 for_each_buffer_cpu(buffer, cpu) {
1520 cpu_buffer = buffer->buffers[cpu];
1521 1353
1522 cpu_buffer->nr_pages_to_update = nr_pages - 1354 if (size < buffer_size) {
1523 cpu_buffer->nr_pages;
1524 /*
1525 * nothing more to do for removing pages or no update
1526 */
1527 if (cpu_buffer->nr_pages_to_update <= 0)
1528 continue;
1529 /*
1530 * to add pages, make sure all new pages can be
1531 * allocated without receiving ENOMEM
1532 */
1533 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1534 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1535 &cpu_buffer->new_pages, cpu)) {
1536 /* not enough memory for new pages */
1537 err = -ENOMEM;
1538 goto out_err;
1539 }
1540 }
1541 1355
1542 get_online_cpus(); 1356 /* easy case, just free pages */
1543 /* 1357 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
1544 * Fire off all the required work handlers 1358 goto out_fail;
1545 * We can't schedule on offline CPUs, but it's not necessary 1359
1546 * since we can change their buffer sizes without any race. 1360 rm_pages = buffer->pages - nr_pages;
1547 */
1548 for_each_buffer_cpu(buffer, cpu) {
1549 cpu_buffer = buffer->buffers[cpu];
1550 if (!cpu_buffer->nr_pages_to_update)
1551 continue;
1552
1553 if (cpu_online(cpu))
1554 schedule_work_on(cpu,
1555 &cpu_buffer->update_pages_work);
1556 else
1557 rb_update_pages(cpu_buffer);
1558 }
1559 1361
1560 /* wait for all the updates to complete */
1561 for_each_buffer_cpu(buffer, cpu) { 1362 for_each_buffer_cpu(buffer, cpu) {
1562 cpu_buffer = buffer->buffers[cpu]; 1363 cpu_buffer = buffer->buffers[cpu];
1563 if (!cpu_buffer->nr_pages_to_update) 1364 rb_remove_pages(cpu_buffer, rm_pages);
1564 continue;
1565
1566 if (cpu_online(cpu))
1567 wait_for_completion(&cpu_buffer->update_done);
1568 cpu_buffer->nr_pages_to_update = 0;
1569 } 1365 }
1366 goto out;
1367 }
1570 1368
1571 put_online_cpus(); 1369 /*
1572 } else { 1370 * This is a bit more difficult. We only want to add pages
1573 /* Make sure this CPU has been intitialized */ 1371 * when we can allocate enough for all CPUs. We do this
1574 if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) 1372 * by allocating all the pages and storing them on a local
1575 goto out; 1373 * link list. If we succeed in our allocation, then we
1576 1374 * add these pages to the cpu_buffers. Otherwise we just free
1577 cpu_buffer = buffer->buffers[cpu_id]; 1375 * them all and return -ENOMEM;
1578 1376 */
1579 if (nr_pages == cpu_buffer->nr_pages) 1377 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
1580 goto out; 1378 goto out_fail;
1581 1379
1582 cpu_buffer->nr_pages_to_update = nr_pages - 1380 new_pages = nr_pages - buffer->pages;
1583 cpu_buffer->nr_pages;
1584 1381
1585 INIT_LIST_HEAD(&cpu_buffer->new_pages); 1382 for_each_buffer_cpu(buffer, cpu) {
1586 if (cpu_buffer->nr_pages_to_update > 0 && 1383 for (i = 0; i < new_pages; i++) {
1587 __rb_allocate_pages(cpu_buffer->nr_pages_to_update, 1384 struct page *page;
1588 &cpu_buffer->new_pages, cpu_id)) { 1385 /*
1589 err = -ENOMEM; 1386 * __GFP_NORETRY flag makes sure that the allocation
1590 goto out_err; 1387 * fails gracefully without invoking oom-killer and
1388 * the system is not destabilized.
1389 */
1390 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
1391 cache_line_size()),
1392 GFP_KERNEL | __GFP_NORETRY,
1393 cpu_to_node(cpu));
1394 if (!bpage)
1395 goto free_pages;
1396 list_add(&bpage->list, &pages);
1397 page = alloc_pages_node(cpu_to_node(cpu),
1398 GFP_KERNEL | __GFP_NORETRY, 0);
1399 if (!page)
1400 goto free_pages;
1401 bpage->page = page_address(page);
1402 rb_init_page(bpage->page);
1591 } 1403 }
1592
1593 get_online_cpus();
1594
1595 if (cpu_online(cpu_id)) {
1596 schedule_work_on(cpu_id,
1597 &cpu_buffer->update_pages_work);
1598 wait_for_completion(&cpu_buffer->update_done);
1599 } else
1600 rb_update_pages(cpu_buffer);
1601
1602 cpu_buffer->nr_pages_to_update = 0;
1603 put_online_cpus();
1604 } 1404 }
1605 1405
1606 out: 1406 for_each_buffer_cpu(buffer, cpu) {
1607 /* 1407 cpu_buffer = buffer->buffers[cpu];
1608 * The ring buffer resize can happen with the ring buffer 1408 rb_insert_pages(cpu_buffer, &pages, new_pages);
1609 * enabled, so that the update disturbs the tracing as little
1610 * as possible. But if the buffer is disabled, we do not need
1611 * to worry about that, and we can take the time to verify
1612 * that the buffer is not corrupt.
1613 */
1614 if (atomic_read(&buffer->record_disabled)) {
1615 atomic_inc(&buffer->record_disabled);
1616 /*
1617 * Even though the buffer was disabled, we must make sure
1618 * that it is truly disabled before calling rb_check_pages.
1619 * There could have been a race between checking
1620 * record_disable and incrementing it.
1621 */
1622 synchronize_sched();
1623 for_each_buffer_cpu(buffer, cpu) {
1624 cpu_buffer = buffer->buffers[cpu];
1625 rb_check_pages(cpu_buffer);
1626 }
1627 atomic_dec(&buffer->record_disabled);
1628 } 1409 }
1629 1410
1630 mutex_unlock(&buffer->mutex); 1411 if (RB_WARN_ON(buffer, !list_empty(&pages)))
1631 return size; 1412 goto out_fail;
1632 1413
1633 out_err: 1414 out:
1634 for_each_buffer_cpu(buffer, cpu) { 1415 buffer->pages = nr_pages;
1635 struct buffer_page *bpage, *tmp; 1416 put_online_cpus();
1417 mutex_unlock(&buffer->mutex);
1636 1418
1637 cpu_buffer = buffer->buffers[cpu]; 1419 atomic_dec(&buffer->record_disabled);
1638 cpu_buffer->nr_pages_to_update = 0;
1639 1420
1640 if (list_empty(&cpu_buffer->new_pages)) 1421 return size;
1641 continue;
1642 1422
1643 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, 1423 free_pages:
1644 list) { 1424 list_for_each_entry_safe(bpage, tmp, &pages, list) {
1645 list_del_init(&bpage->list); 1425 list_del_init(&bpage->list);
1646 free_buffer_page(bpage); 1426 free_buffer_page(bpage);
1647 }
1648 } 1427 }
1428 put_online_cpus();
1649 mutex_unlock(&buffer->mutex); 1429 mutex_unlock(&buffer->mutex);
1650 return err; 1430 atomic_dec(&buffer->record_disabled);
1431 return -ENOMEM;
1432
1433 /*
1434 * Something went totally wrong, and we are too paranoid
1435 * to even clean up the mess.
1436 */
1437 out_fail:
1438 put_online_cpus();
1439 mutex_unlock(&buffer->mutex);
1440 atomic_dec(&buffer->record_disabled);
1441 return -1;
1651} 1442}
1652EXPORT_SYMBOL_GPL(ring_buffer_resize); 1443EXPORT_SYMBOL_GPL(ring_buffer_resize);
1653 1444
@@ -1686,11 +1477,21 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
1686 return __rb_page_index(iter->head_page, iter->head); 1477 return __rb_page_index(iter->head_page, iter->head);
1687} 1478}
1688 1479
1480static inline unsigned long rb_page_write(struct buffer_page *bpage)
1481{
1482 return local_read(&bpage->write) & RB_WRITE_MASK;
1483}
1484
1689static inline unsigned rb_page_commit(struct buffer_page *bpage) 1485static inline unsigned rb_page_commit(struct buffer_page *bpage)
1690{ 1486{
1691 return local_read(&bpage->page->commit); 1487 return local_read(&bpage->page->commit);
1692} 1488}
1693 1489
1490static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1491{
1492 return local_read(&bpage->entries) & RB_WRITE_MASK;
1493}
1494
1694/* Size is determined by what has been committed */ 1495/* Size is determined by what has been committed */
1695static inline unsigned rb_page_size(struct buffer_page *bpage) 1496static inline unsigned rb_page_size(struct buffer_page *bpage)
1696{ 1497{
@@ -1739,7 +1540,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1739 * assign the commit to the tail. 1540 * assign the commit to the tail.
1740 */ 1541 */
1741 again: 1542 again:
1742 max_count = cpu_buffer->nr_pages * 100; 1543 max_count = cpu_buffer->buffer->pages * 100;
1743 1544
1744 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1545 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1745 if (RB_WARN_ON(cpu_buffer, !(--max_count))) 1546 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
@@ -1823,7 +1624,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1823} 1624}
1824 1625
1825/** 1626/**
1826 * rb_update_event - update event type and data 1627 * ring_buffer_update_event - update event type and data
1827 * @event: the even to update 1628 * @event: the even to update
1828 * @type: the type of event 1629 * @type: the type of event
1829 * @length: the size of the event field in the ring buffer 1630 * @length: the size of the event field in the ring buffer
@@ -1907,7 +1708,6 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1907 * the counters. 1708 * the counters.
1908 */ 1709 */
1909 local_add(entries, &cpu_buffer->overrun); 1710 local_add(entries, &cpu_buffer->overrun);
1910 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1911 1711
1912 /* 1712 /*
1913 * The entries will be zeroed out when we move the 1713 * The entries will be zeroed out when we move the
@@ -2063,9 +1863,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2063 event = __rb_page_index(tail_page, tail); 1863 event = __rb_page_index(tail_page, tail);
2064 kmemcheck_annotate_bitfield(event, bitfield); 1864 kmemcheck_annotate_bitfield(event, bitfield);
2065 1865
2066 /* account for padding bytes */
2067 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2068
2069 /* 1866 /*
2070 * Save the original length to the meta data. 1867 * Save the original length to the meta data.
2071 * This will be used by the reader to add lost event 1868 * This will be used by the reader to add lost event
@@ -2158,10 +1955,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2158 * If we are not in overwrite mode, 1955 * If we are not in overwrite mode,
2159 * this is easy, just stop here. 1956 * this is easy, just stop here.
2160 */ 1957 */
2161 if (!(buffer->flags & RB_FL_OVERWRITE)) { 1958 if (!(buffer->flags & RB_FL_OVERWRITE))
2162 local_inc(&cpu_buffer->dropped_events);
2163 goto out_reset; 1959 goto out_reset;
2164 }
2165 1960
2166 ret = rb_handle_head_page(cpu_buffer, 1961 ret = rb_handle_head_page(cpu_buffer,
2167 tail_page, 1962 tail_page,
@@ -2259,9 +2054,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2259 if (!tail) 2054 if (!tail)
2260 tail_page->page->time_stamp = ts; 2055 tail_page->page->time_stamp = ts;
2261 2056
2262 /* account for these added bytes */
2263 local_add(length, &cpu_buffer->entries_bytes);
2264
2265 return event; 2057 return event;
2266} 2058}
2267 2059
@@ -2284,7 +2076,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2284 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { 2076 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2285 unsigned long write_mask = 2077 unsigned long write_mask =
2286 local_read(&bpage->write) & ~RB_WRITE_MASK; 2078 local_read(&bpage->write) & ~RB_WRITE_MASK;
2287 unsigned long event_length = rb_event_length(event);
2288 /* 2079 /*
2289 * This is on the tail page. It is possible that 2080 * This is on the tail page. It is possible that
2290 * a write could come in and move the tail page 2081 * a write could come in and move the tail page
@@ -2294,11 +2085,8 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2294 old_index += write_mask; 2085 old_index += write_mask;
2295 new_index += write_mask; 2086 new_index += write_mask;
2296 index = local_cmpxchg(&bpage->write, old_index, new_index); 2087 index = local_cmpxchg(&bpage->write, old_index, new_index);
2297 if (index == old_index) { 2088 if (index == old_index)
2298 /* update counters */
2299 local_sub(event_length, &cpu_buffer->entries_bytes);
2300 return 1; 2089 return 1;
2301 }
2302 } 2090 }
2303 2091
2304 /* could not discard */ 2092 /* could not discard */
@@ -2725,8 +2513,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2725 * and not the length of the event which would hold the header. 2513 * and not the length of the event which would hold the header.
2726 */ 2514 */
2727int ring_buffer_write(struct ring_buffer *buffer, 2515int ring_buffer_write(struct ring_buffer *buffer,
2728 unsigned long length, 2516 unsigned long length,
2729 void *data) 2517 void *data)
2730{ 2518{
2731 struct ring_buffer_per_cpu *cpu_buffer; 2519 struct ring_buffer_per_cpu *cpu_buffer;
2732 struct ring_buffer_event *event; 2520 struct ring_buffer_event *event;
@@ -2818,63 +2606,6 @@ void ring_buffer_record_enable(struct ring_buffer *buffer)
2818EXPORT_SYMBOL_GPL(ring_buffer_record_enable); 2606EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
2819 2607
2820/** 2608/**
2821 * ring_buffer_record_off - stop all writes into the buffer
2822 * @buffer: The ring buffer to stop writes to.
2823 *
2824 * This prevents all writes to the buffer. Any attempt to write
2825 * to the buffer after this will fail and return NULL.
2826 *
2827 * This is different than ring_buffer_record_disable() as
2828 * it works like an on/off switch, where as the disable() version
2829 * must be paired with a enable().
2830 */
2831void ring_buffer_record_off(struct ring_buffer *buffer)
2832{
2833 unsigned int rd;
2834 unsigned int new_rd;
2835
2836 do {
2837 rd = atomic_read(&buffer->record_disabled);
2838 new_rd = rd | RB_BUFFER_OFF;
2839 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2840}
2841EXPORT_SYMBOL_GPL(ring_buffer_record_off);
2842
2843/**
2844 * ring_buffer_record_on - restart writes into the buffer
2845 * @buffer: The ring buffer to start writes to.
2846 *
2847 * This enables all writes to the buffer that was disabled by
2848 * ring_buffer_record_off().
2849 *
2850 * This is different than ring_buffer_record_enable() as
2851 * it works like an on/off switch, where as the enable() version
2852 * must be paired with a disable().
2853 */
2854void ring_buffer_record_on(struct ring_buffer *buffer)
2855{
2856 unsigned int rd;
2857 unsigned int new_rd;
2858
2859 do {
2860 rd = atomic_read(&buffer->record_disabled);
2861 new_rd = rd & ~RB_BUFFER_OFF;
2862 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2863}
2864EXPORT_SYMBOL_GPL(ring_buffer_record_on);
2865
2866/**
2867 * ring_buffer_record_is_on - return true if the ring buffer can write
2868 * @buffer: The ring buffer to see if write is enabled
2869 *
2870 * Returns true if the ring buffer is in a state that it accepts writes.
2871 */
2872int ring_buffer_record_is_on(struct ring_buffer *buffer)
2873{
2874 return !atomic_read(&buffer->record_disabled);
2875}
2876
2877/**
2878 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer 2609 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
2879 * @buffer: The ring buffer to stop writes to. 2610 * @buffer: The ring buffer to stop writes to.
2880 * @cpu: The CPU buffer to stop 2611 * @cpu: The CPU buffer to stop
@@ -2930,59 +2661,6 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2930} 2661}
2931 2662
2932/** 2663/**
2933 * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
2934 * @buffer: The ring buffer
2935 * @cpu: The per CPU buffer to read from.
2936 */
2937u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
2938{
2939 unsigned long flags;
2940 struct ring_buffer_per_cpu *cpu_buffer;
2941 struct buffer_page *bpage;
2942 u64 ret = 0;
2943
2944 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2945 return 0;
2946
2947 cpu_buffer = buffer->buffers[cpu];
2948 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2949 /*
2950 * if the tail is on reader_page, oldest time stamp is on the reader
2951 * page
2952 */
2953 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
2954 bpage = cpu_buffer->reader_page;
2955 else
2956 bpage = rb_set_head_page(cpu_buffer);
2957 if (bpage)
2958 ret = bpage->page->time_stamp;
2959 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2960
2961 return ret;
2962}
2963EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
2964
2965/**
2966 * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
2967 * @buffer: The ring buffer
2968 * @cpu: The per CPU buffer to read from.
2969 */
2970unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
2971{
2972 struct ring_buffer_per_cpu *cpu_buffer;
2973 unsigned long ret;
2974
2975 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2976 return 0;
2977
2978 cpu_buffer = buffer->buffers[cpu];
2979 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
2980
2981 return ret;
2982}
2983EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
2984
2985/**
2986 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2664 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2987 * @buffer: The ring buffer 2665 * @buffer: The ring buffer
2988 * @cpu: The per CPU buffer to get the entries from. 2666 * @cpu: The per CPU buffer to get the entries from.
@@ -3001,8 +2679,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
3001EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2679EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
3002 2680
3003/** 2681/**
3004 * ring_buffer_overrun_cpu - get the number of overruns caused by the ring 2682 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
3005 * buffer wrapping around (only if RB_FL_OVERWRITE is on).
3006 * @buffer: The ring buffer 2683 * @buffer: The ring buffer
3007 * @cpu: The per CPU buffer to get the number of overruns from 2684 * @cpu: The per CPU buffer to get the number of overruns from
3008 */ 2685 */
@@ -3022,9 +2699,7 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
3022EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 2699EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3023 2700
3024/** 2701/**
3025 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by 2702 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
3026 * commits failing due to the buffer wrapping around while there are uncommitted
3027 * events, such as during an interrupt storm.
3028 * @buffer: The ring buffer 2703 * @buffer: The ring buffer
3029 * @cpu: The per CPU buffer to get the number of overruns from 2704 * @cpu: The per CPU buffer to get the number of overruns from
3030 */ 2705 */
@@ -3045,28 +2720,6 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3045EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); 2720EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3046 2721
3047/** 2722/**
3048 * ring_buffer_dropped_events_cpu - get the number of dropped events caused by
3049 * the ring buffer filling up (only if RB_FL_OVERWRITE is off).
3050 * @buffer: The ring buffer
3051 * @cpu: The per CPU buffer to get the number of overruns from
3052 */
3053unsigned long
3054ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3055{
3056 struct ring_buffer_per_cpu *cpu_buffer;
3057 unsigned long ret;
3058
3059 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3060 return 0;
3061
3062 cpu_buffer = buffer->buffers[cpu];
3063 ret = local_read(&cpu_buffer->dropped_events);
3064
3065 return ret;
3066}
3067EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3068
3069/**
3070 * ring_buffer_entries - get the number of entries in a buffer 2723 * ring_buffer_entries - get the number of entries in a buffer
3071 * @buffer: The ring buffer 2724 * @buffer: The ring buffer
3072 * 2725 *
@@ -3151,9 +2804,9 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3151 2804
3152 cpu_buffer = iter->cpu_buffer; 2805 cpu_buffer = iter->cpu_buffer;
3153 2806
3154 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2807 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3155 rb_iter_reset(iter); 2808 rb_iter_reset(iter);
3156 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2809 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3157} 2810}
3158EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); 2811EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3159 2812
@@ -3274,10 +2927,6 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3274 if (cpu_buffer->commit_page == cpu_buffer->reader_page) 2927 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3275 goto out; 2928 goto out;
3276 2929
3277 /* Don't bother swapping if the ring buffer is empty */
3278 if (rb_num_of_entries(cpu_buffer) == 0)
3279 goto out;
3280
3281 /* 2930 /*
3282 * Reset the reader page to size zero. 2931 * Reset the reader page to size zero.
3283 */ 2932 */
@@ -3291,8 +2940,6 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3291 * Splice the empty reader page into the list around the head. 2940 * Splice the empty reader page into the list around the head.
3292 */ 2941 */
3293 reader = rb_set_head_page(cpu_buffer); 2942 reader = rb_set_head_page(cpu_buffer);
3294 if (!reader)
3295 goto out;
3296 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); 2943 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3297 cpu_buffer->reader_page->list.prev = reader->list.prev; 2944 cpu_buffer->reader_page->list.prev = reader->list.prev;
3298 2945
@@ -3618,12 +3265,12 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3618 again: 3265 again:
3619 local_irq_save(flags); 3266 local_irq_save(flags);
3620 if (dolock) 3267 if (dolock)
3621 raw_spin_lock(&cpu_buffer->reader_lock); 3268 spin_lock(&cpu_buffer->reader_lock);
3622 event = rb_buffer_peek(cpu_buffer, ts, lost_events); 3269 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3623 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3270 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3624 rb_advance_reader(cpu_buffer); 3271 rb_advance_reader(cpu_buffer);
3625 if (dolock) 3272 if (dolock)
3626 raw_spin_unlock(&cpu_buffer->reader_lock); 3273 spin_unlock(&cpu_buffer->reader_lock);
3627 local_irq_restore(flags); 3274 local_irq_restore(flags);
3628 3275
3629 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3276 if (event && event->type_len == RINGBUF_TYPE_PADDING)
@@ -3648,9 +3295,9 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3648 unsigned long flags; 3295 unsigned long flags;
3649 3296
3650 again: 3297 again:
3651 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3298 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3652 event = rb_iter_peek(iter, ts); 3299 event = rb_iter_peek(iter, ts);
3653 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3300 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3654 3301
3655 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3302 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3656 goto again; 3303 goto again;
@@ -3690,7 +3337,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3690 cpu_buffer = buffer->buffers[cpu]; 3337 cpu_buffer = buffer->buffers[cpu];
3691 local_irq_save(flags); 3338 local_irq_save(flags);
3692 if (dolock) 3339 if (dolock)
3693 raw_spin_lock(&cpu_buffer->reader_lock); 3340 spin_lock(&cpu_buffer->reader_lock);
3694 3341
3695 event = rb_buffer_peek(cpu_buffer, ts, lost_events); 3342 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3696 if (event) { 3343 if (event) {
@@ -3699,7 +3346,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3699 } 3346 }
3700 3347
3701 if (dolock) 3348 if (dolock)
3702 raw_spin_unlock(&cpu_buffer->reader_lock); 3349 spin_unlock(&cpu_buffer->reader_lock);
3703 local_irq_restore(flags); 3350 local_irq_restore(flags);
3704 3351
3705 out: 3352 out:
@@ -3749,7 +3396,6 @@ ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3749 3396
3750 iter->cpu_buffer = cpu_buffer; 3397 iter->cpu_buffer = cpu_buffer;
3751 3398
3752 atomic_inc(&buffer->resize_disabled);
3753 atomic_inc(&cpu_buffer->record_disabled); 3399 atomic_inc(&cpu_buffer->record_disabled);
3754 3400
3755 return iter; 3401 return iter;
@@ -3792,11 +3438,11 @@ ring_buffer_read_start(struct ring_buffer_iter *iter)
3792 3438
3793 cpu_buffer = iter->cpu_buffer; 3439 cpu_buffer = iter->cpu_buffer;
3794 3440
3795 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3441 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3796 arch_spin_lock(&cpu_buffer->lock); 3442 arch_spin_lock(&cpu_buffer->lock);
3797 rb_iter_reset(iter); 3443 rb_iter_reset(iter);
3798 arch_spin_unlock(&cpu_buffer->lock); 3444 arch_spin_unlock(&cpu_buffer->lock);
3799 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3445 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3800} 3446}
3801EXPORT_SYMBOL_GPL(ring_buffer_read_start); 3447EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3802 3448
@@ -3811,20 +3457,8 @@ void
3811ring_buffer_read_finish(struct ring_buffer_iter *iter) 3457ring_buffer_read_finish(struct ring_buffer_iter *iter)
3812{ 3458{
3813 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3459 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3814 unsigned long flags;
3815
3816 /*
3817 * Ring buffer is disabled from recording, here's a good place
3818 * to check the integrity of the ring buffer.
3819 * Must prevent readers from trying to read, as the check
3820 * clears the HEAD page and readers require it.
3821 */
3822 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3823 rb_check_pages(cpu_buffer);
3824 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3825 3460
3826 atomic_dec(&cpu_buffer->record_disabled); 3461 atomic_dec(&cpu_buffer->record_disabled);
3827 atomic_dec(&cpu_buffer->buffer->resize_disabled);
3828 kfree(iter); 3462 kfree(iter);
3829} 3463}
3830EXPORT_SYMBOL_GPL(ring_buffer_read_finish); 3464EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
@@ -3843,7 +3477,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
3843 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3477 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3844 unsigned long flags; 3478 unsigned long flags;
3845 3479
3846 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3480 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3847 again: 3481 again:
3848 event = rb_iter_peek(iter, ts); 3482 event = rb_iter_peek(iter, ts);
3849 if (!event) 3483 if (!event)
@@ -3854,7 +3488,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
3854 3488
3855 rb_advance_iter(iter); 3489 rb_advance_iter(iter);
3856 out: 3490 out:
3857 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3491 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3858 3492
3859 return event; 3493 return event;
3860} 3494}
@@ -3864,18 +3498,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_read);
3864 * ring_buffer_size - return the size of the ring buffer (in bytes) 3498 * ring_buffer_size - return the size of the ring buffer (in bytes)
3865 * @buffer: The ring buffer. 3499 * @buffer: The ring buffer.
3866 */ 3500 */
3867unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu) 3501unsigned long ring_buffer_size(struct ring_buffer *buffer)
3868{ 3502{
3869 /* 3503 return BUF_PAGE_SIZE * buffer->pages;
3870 * Earlier, this method returned
3871 * BUF_PAGE_SIZE * buffer->nr_pages
3872 * Since the nr_pages field is now removed, we have converted this to
3873 * return the per cpu buffer value.
3874 */
3875 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3876 return 0;
3877
3878 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
3879} 3504}
3880EXPORT_SYMBOL_GPL(ring_buffer_size); 3505EXPORT_SYMBOL_GPL(ring_buffer_size);
3881 3506
@@ -3896,21 +3521,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3896 cpu_buffer->commit_page = cpu_buffer->head_page; 3521 cpu_buffer->commit_page = cpu_buffer->head_page;
3897 3522
3898 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 3523 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
3899 INIT_LIST_HEAD(&cpu_buffer->new_pages);
3900 local_set(&cpu_buffer->reader_page->write, 0); 3524 local_set(&cpu_buffer->reader_page->write, 0);
3901 local_set(&cpu_buffer->reader_page->entries, 0); 3525 local_set(&cpu_buffer->reader_page->entries, 0);
3902 local_set(&cpu_buffer->reader_page->page->commit, 0); 3526 local_set(&cpu_buffer->reader_page->page->commit, 0);
3903 cpu_buffer->reader_page->read = 0; 3527 cpu_buffer->reader_page->read = 0;
3904 3528
3905 local_set(&cpu_buffer->entries_bytes, 0);
3906 local_set(&cpu_buffer->overrun, 0);
3907 local_set(&cpu_buffer->commit_overrun, 0); 3529 local_set(&cpu_buffer->commit_overrun, 0);
3908 local_set(&cpu_buffer->dropped_events, 0); 3530 local_set(&cpu_buffer->overrun, 0);
3909 local_set(&cpu_buffer->entries, 0); 3531 local_set(&cpu_buffer->entries, 0);
3910 local_set(&cpu_buffer->committing, 0); 3532 local_set(&cpu_buffer->committing, 0);
3911 local_set(&cpu_buffer->commits, 0); 3533 local_set(&cpu_buffer->commits, 0);
3912 cpu_buffer->read = 0; 3534 cpu_buffer->read = 0;
3913 cpu_buffer->read_bytes = 0;
3914 3535
3915 cpu_buffer->write_stamp = 0; 3536 cpu_buffer->write_stamp = 0;
3916 cpu_buffer->read_stamp = 0; 3537 cpu_buffer->read_stamp = 0;
@@ -3934,13 +3555,9 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3934 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3555 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3935 return; 3556 return;
3936 3557
3937 atomic_inc(&buffer->resize_disabled);
3938 atomic_inc(&cpu_buffer->record_disabled); 3558 atomic_inc(&cpu_buffer->record_disabled);
3939 3559
3940 /* Make sure all commits have finished */ 3560 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3941 synchronize_sched();
3942
3943 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3944 3561
3945 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3562 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3946 goto out; 3563 goto out;
@@ -3952,10 +3569,9 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3952 arch_spin_unlock(&cpu_buffer->lock); 3569 arch_spin_unlock(&cpu_buffer->lock);
3953 3570
3954 out: 3571 out:
3955 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3572 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3956 3573
3957 atomic_dec(&cpu_buffer->record_disabled); 3574 atomic_dec(&cpu_buffer->record_disabled);
3958 atomic_dec(&buffer->resize_disabled);
3959} 3575}
3960EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 3576EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
3961 3577
@@ -3991,10 +3607,10 @@ int ring_buffer_empty(struct ring_buffer *buffer)
3991 cpu_buffer = buffer->buffers[cpu]; 3607 cpu_buffer = buffer->buffers[cpu];
3992 local_irq_save(flags); 3608 local_irq_save(flags);
3993 if (dolock) 3609 if (dolock)
3994 raw_spin_lock(&cpu_buffer->reader_lock); 3610 spin_lock(&cpu_buffer->reader_lock);
3995 ret = rb_per_cpu_empty(cpu_buffer); 3611 ret = rb_per_cpu_empty(cpu_buffer);
3996 if (dolock) 3612 if (dolock)
3997 raw_spin_unlock(&cpu_buffer->reader_lock); 3613 spin_unlock(&cpu_buffer->reader_lock);
3998 local_irq_restore(flags); 3614 local_irq_restore(flags);
3999 3615
4000 if (!ret) 3616 if (!ret)
@@ -4025,10 +3641,10 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
4025 cpu_buffer = buffer->buffers[cpu]; 3641 cpu_buffer = buffer->buffers[cpu];
4026 local_irq_save(flags); 3642 local_irq_save(flags);
4027 if (dolock) 3643 if (dolock)
4028 raw_spin_lock(&cpu_buffer->reader_lock); 3644 spin_lock(&cpu_buffer->reader_lock);
4029 ret = rb_per_cpu_empty(cpu_buffer); 3645 ret = rb_per_cpu_empty(cpu_buffer);
4030 if (dolock) 3646 if (dolock)
4031 raw_spin_unlock(&cpu_buffer->reader_lock); 3647 spin_unlock(&cpu_buffer->reader_lock);
4032 local_irq_restore(flags); 3648 local_irq_restore(flags);
4033 3649
4034 return ret; 3650 return ret;
@@ -4057,11 +3673,8 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
4057 !cpumask_test_cpu(cpu, buffer_b->cpumask)) 3673 !cpumask_test_cpu(cpu, buffer_b->cpumask))
4058 goto out; 3674 goto out;
4059 3675
4060 cpu_buffer_a = buffer_a->buffers[cpu];
4061 cpu_buffer_b = buffer_b->buffers[cpu];
4062
4063 /* At least make sure the two buffers are somewhat the same */ 3676 /* At least make sure the two buffers are somewhat the same */
4064 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) 3677 if (buffer_a->pages != buffer_b->pages)
4065 goto out; 3678 goto out;
4066 3679
4067 ret = -EAGAIN; 3680 ret = -EAGAIN;
@@ -4075,6 +3688,9 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
4075 if (atomic_read(&buffer_b->record_disabled)) 3688 if (atomic_read(&buffer_b->record_disabled))
4076 goto out; 3689 goto out;
4077 3690
3691 cpu_buffer_a = buffer_a->buffers[cpu];
3692 cpu_buffer_b = buffer_b->buffers[cpu];
3693
4078 if (atomic_read(&cpu_buffer_a->record_disabled)) 3694 if (atomic_read(&cpu_buffer_a->record_disabled))
4079 goto out; 3695 goto out;
4080 3696
@@ -4225,7 +3841,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
4225 if (!bpage) 3841 if (!bpage)
4226 goto out; 3842 goto out;
4227 3843
4228 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3844 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4229 3845
4230 reader = rb_get_reader_page(cpu_buffer); 3846 reader = rb_get_reader_page(cpu_buffer);
4231 if (!reader) 3847 if (!reader)
@@ -4302,7 +3918,6 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
4302 } else { 3918 } else {
4303 /* update the entry counter */ 3919 /* update the entry counter */
4304 cpu_buffer->read += rb_page_entries(reader); 3920 cpu_buffer->read += rb_page_entries(reader);
4305 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4306 3921
4307 /* swap the pages */ 3922 /* swap the pages */
4308 rb_init_page(bpage); 3923 rb_init_page(bpage);
@@ -4349,13 +3964,75 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
4349 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); 3964 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
4350 3965
4351 out_unlock: 3966 out_unlock:
4352 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3967 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4353 3968
4354 out: 3969 out:
4355 return ret; 3970 return ret;
4356} 3971}
4357EXPORT_SYMBOL_GPL(ring_buffer_read_page); 3972EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4358 3973
3974#ifdef CONFIG_TRACING
3975static ssize_t
3976rb_simple_read(struct file *filp, char __user *ubuf,
3977 size_t cnt, loff_t *ppos)
3978{
3979 unsigned long *p = filp->private_data;
3980 char buf[64];
3981 int r;
3982
3983 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
3984 r = sprintf(buf, "permanently disabled\n");
3985 else
3986 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
3987
3988 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3989}
3990
3991static ssize_t
3992rb_simple_write(struct file *filp, const char __user *ubuf,
3993 size_t cnt, loff_t *ppos)
3994{
3995 unsigned long *p = filp->private_data;
3996 unsigned long val;
3997 int ret;
3998
3999 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4000 if (ret)
4001 return ret;
4002
4003 if (val)
4004 set_bit(RB_BUFFERS_ON_BIT, p);
4005 else
4006 clear_bit(RB_BUFFERS_ON_BIT, p);
4007
4008 (*ppos)++;
4009
4010 return cnt;
4011}
4012
4013static const struct file_operations rb_simple_fops = {
4014 .open = tracing_open_generic,
4015 .read = rb_simple_read,
4016 .write = rb_simple_write,
4017 .llseek = default_llseek,
4018};
4019
4020
4021static __init int rb_init_debugfs(void)
4022{
4023 struct dentry *d_tracer;
4024
4025 d_tracer = tracing_init_dentry();
4026
4027 trace_create_file("tracing_on", 0644, d_tracer,
4028 &ring_buffer_flags, &rb_simple_fops);
4029
4030 return 0;
4031}
4032
4033fs_initcall(rb_init_debugfs);
4034#endif
4035
4359#ifdef CONFIG_HOTPLUG_CPU 4036#ifdef CONFIG_HOTPLUG_CPU
4360static int rb_cpu_notify(struct notifier_block *self, 4037static int rb_cpu_notify(struct notifier_block *self,
4361 unsigned long action, void *hcpu) 4038 unsigned long action, void *hcpu)
@@ -4363,8 +4040,6 @@ static int rb_cpu_notify(struct notifier_block *self,
4363 struct ring_buffer *buffer = 4040 struct ring_buffer *buffer =
4364 container_of(self, struct ring_buffer, cpu_notify); 4041 container_of(self, struct ring_buffer, cpu_notify);
4365 long cpu = (long)hcpu; 4042 long cpu = (long)hcpu;
4366 int cpu_i, nr_pages_same;
4367 unsigned int nr_pages;
4368 4043
4369 switch (action) { 4044 switch (action) {
4370 case CPU_UP_PREPARE: 4045 case CPU_UP_PREPARE:
@@ -4372,23 +4047,8 @@ static int rb_cpu_notify(struct notifier_block *self,
4372 if (cpumask_test_cpu(cpu, buffer->cpumask)) 4047 if (cpumask_test_cpu(cpu, buffer->cpumask))
4373 return NOTIFY_OK; 4048 return NOTIFY_OK;
4374 4049
4375 nr_pages = 0;
4376 nr_pages_same = 1;
4377 /* check if all cpu sizes are same */
4378 for_each_buffer_cpu(buffer, cpu_i) {
4379 /* fill in the size from first enabled cpu */
4380 if (nr_pages == 0)
4381 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4382 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4383 nr_pages_same = 0;
4384 break;
4385 }
4386 }
4387 /* allocate minimum pages, user can later expand it */
4388 if (!nr_pages_same)
4389 nr_pages = 2;
4390 buffer->buffers[cpu] = 4050 buffer->buffers[cpu] =
4391 rb_allocate_cpu_buffer(buffer, nr_pages, cpu); 4051 rb_allocate_cpu_buffer(buffer, cpu);
4392 if (!buffer->buffers[cpu]) { 4052 if (!buffer->buffers[cpu]) {
4393 WARN(1, "failed to allocate ring buffer on CPU %ld\n", 4053 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4394 cpu); 4054 cpu);
diff --git a/kernel/trace/rpm-traces.c b/kernel/trace/rpm-traces.c
deleted file mode 100644
index 4b3b5eaf94d..00000000000
--- a/kernel/trace/rpm-traces.c
+++ /dev/null
@@ -1,20 +0,0 @@
1/*
2 * Power trace points
3 *
4 * Copyright (C) 2009 Ming Lei <ming.lei@canonical.com>
5 */
6
7#include <linux/string.h>
8#include <linux/types.h>
9#include <linux/workqueue.h>
10#include <linux/sched.h>
11#include <linux/module.h>
12#include <linux/usb.h>
13
14#define CREATE_TRACE_POINTS
15#include <trace/events/rpm.h>
16
17EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_return_int);
18EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_idle);
19EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_suspend);
20EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_resume);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3c13e46d7d2..17a2d44e1af 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * Based on code from the latency_tracer, that is: 10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar 11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h> 14#include <linux/ring_buffer.h>
15#include <generated/utsrelease.h> 15#include <generated/utsrelease.h>
@@ -19,7 +19,6 @@
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/notifier.h> 20#include <linux/notifier.h>
21#include <linux/irqflags.h> 21#include <linux/irqflags.h>
22#include <linux/irq_work.h>
23#include <linux/debugfs.h> 22#include <linux/debugfs.h>
24#include <linux/pagemap.h> 23#include <linux/pagemap.h>
25#include <linux/hardirq.h> 24#include <linux/hardirq.h>
@@ -37,7 +36,6 @@
37#include <linux/ctype.h> 36#include <linux/ctype.h>
38#include <linux/init.h> 37#include <linux/init.h>
39#include <linux/poll.h> 38#include <linux/poll.h>
40#include <linux/nmi.h>
41#include <linux/fs.h> 39#include <linux/fs.h>
42 40
43#include "trace.h" 41#include "trace.h"
@@ -79,21 +77,6 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
79} 77}
80 78
81/* 79/*
82 * To prevent the comm cache from being overwritten when no
83 * tracing is active, only save the comm when a trace event
84 * occurred.
85 */
86static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88/*
89 * When a reader is waiting for data, then this variable is
90 * set to true.
91 */
92static bool trace_wakeup_needed;
93
94static struct irq_work trace_work_wakeup;
95
96/*
97 * Kill all tracing for good (never come back). 80 * Kill all tracing for good (never come back).
98 * It is initialized to 1 but will turn to zero if the initialization 81 * It is initialized to 1 but will turn to zero if the initialization
99 * of the tracer is successful. But that is the only place that sets 82 * of the tracer is successful. But that is the only place that sets
@@ -103,6 +86,18 @@ static int tracing_disabled = 1;
103 86
104DEFINE_PER_CPU(int, ftrace_cpu_disabled); 87DEFINE_PER_CPU(int, ftrace_cpu_disabled);
105 88
89static inline void ftrace_disable_cpu(void)
90{
91 preempt_disable();
92 __this_cpu_inc(ftrace_cpu_disabled);
93}
94
95static inline void ftrace_enable_cpu(void)
96{
97 __this_cpu_dec(ftrace_cpu_disabled);
98 preempt_enable();
99}
100
106cpumask_var_t __read_mostly tracing_buffer_mask; 101cpumask_var_t __read_mostly tracing_buffer_mask;
107 102
108/* 103/*
@@ -155,18 +150,6 @@ static int __init set_ftrace_dump_on_oops(char *str)
155} 150}
156__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 151__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
157 152
158
159static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
160static char *trace_boot_options __initdata;
161
162static int __init set_trace_boot_options(char *str)
163{
164 strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
165 trace_boot_options = trace_boot_options_buf;
166 return 0;
167}
168__setup("trace_options=", set_trace_boot_options);
169
170unsigned long long ns2usecs(cycle_t nsec) 153unsigned long long ns2usecs(cycle_t nsec)
171{ 154{
172 nsec += 500; 155 nsec += 500;
@@ -226,9 +209,20 @@ static struct trace_array max_tr;
226 209
227static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data); 210static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
228 211
212/* tracer_enabled is used to toggle activation of a tracer */
213static int tracer_enabled = 1;
214
215/**
216 * tracing_is_enabled - return tracer_enabled status
217 *
218 * This function is used by other tracers to know the status
219 * of the tracer_enabled flag. Tracers may use this function
220 * to know if it should enable their features when starting
221 * up. See irqsoff tracer for an example (start_irqsoff_tracer).
222 */
229int tracing_is_enabled(void) 223int tracing_is_enabled(void)
230{ 224{
231 return tracing_is_on(); 225 return tracer_enabled;
232} 226}
233 227
234/* 228/*
@@ -344,77 +338,33 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
344/* trace_flags holds trace_options default values */ 338/* trace_flags holds trace_options default values */
345unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 339unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
346 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 340 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
347 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | 341 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
348 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
349 342
350static int trace_stop_count; 343static int trace_stop_count;
351static DEFINE_RAW_SPINLOCK(tracing_start_lock); 344static DEFINE_SPINLOCK(tracing_start_lock);
352 345
353/** 346static void wakeup_work_handler(struct work_struct *work)
354 * trace_wake_up - wake up tasks waiting for trace input
355 *
356 * Schedules a delayed work to wake up any task that is blocked on the
357 * trace_wait queue. These is used with trace_poll for tasks polling the
358 * trace.
359 */
360static void trace_wake_up(struct irq_work *work)
361{ 347{
362 wake_up_all(&trace_wait); 348 wake_up(&trace_wait);
363
364} 349}
365 350
366/** 351static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
367 * tracing_on - enable tracing buffers
368 *
369 * This function enables tracing buffers that may have been
370 * disabled with tracing_off.
371 */
372void tracing_on(void)
373{
374 if (global_trace.buffer)
375 ring_buffer_record_on(global_trace.buffer);
376 /*
377 * This flag is only looked at when buffers haven't been
378 * allocated yet. We don't really care about the race
379 * between setting this flag and actually turning
380 * on the buffer.
381 */
382 global_trace.buffer_disabled = 0;
383}
384EXPORT_SYMBOL_GPL(tracing_on);
385 352
386/** 353/**
387 * tracing_off - turn off tracing buffers 354 * trace_wake_up - wake up tasks waiting for trace input
388 * 355 *
389 * This function stops the tracing buffers from recording data. 356 * Schedules a delayed work to wake up any task that is blocked on the
390 * It does not disable any overhead the tracers themselves may 357 * trace_wait queue. These is used with trace_poll for tasks polling the
391 * be causing. This function simply causes all recording to 358 * trace.
392 * the ring buffers to fail.
393 */ 359 */
394void tracing_off(void) 360void trace_wake_up(void)
395{ 361{
396 if (global_trace.buffer) 362 const unsigned long delay = msecs_to_jiffies(2);
397 ring_buffer_record_off(global_trace.buffer);
398 /*
399 * This flag is only looked at when buffers haven't been
400 * allocated yet. We don't really care about the race
401 * between setting this flag and actually turning
402 * on the buffer.
403 */
404 global_trace.buffer_disabled = 1;
405}
406EXPORT_SYMBOL_GPL(tracing_off);
407 363
408/** 364 if (trace_flags & TRACE_ITER_BLOCK)
409 * tracing_is_on - show state of ring buffers enabled 365 return;
410 */ 366 schedule_delayed_work(&wakeup_work, delay);
411int tracing_is_on(void)
412{
413 if (global_trace.buffer)
414 return ring_buffer_record_is_on(global_trace.buffer);
415 return !global_trace.buffer_disabled;
416} 367}
417EXPORT_SYMBOL_GPL(tracing_is_on);
418 368
419static int __init set_buf_size(char *str) 369static int __init set_buf_size(char *str)
420{ 370{
@@ -433,15 +383,15 @@ __setup("trace_buf_size=", set_buf_size);
433 383
434static int __init set_tracing_thresh(char *str) 384static int __init set_tracing_thresh(char *str)
435{ 385{
436 unsigned long threshold; 386 unsigned long threshhold;
437 int ret; 387 int ret;
438 388
439 if (!str) 389 if (!str)
440 return 0; 390 return 0;
441 ret = kstrtoul(str, 0, &threshold); 391 ret = strict_strtoul(str, 0, &threshhold);
442 if (ret < 0) 392 if (ret < 0)
443 return 0; 393 return 0;
444 tracing_thresh = threshold * 1000; 394 tracing_thresh = threshhold * 1000;
445 return 1; 395 return 1;
446} 396}
447__setup("tracing_thresh=", set_tracing_thresh); 397__setup("tracing_thresh=", set_tracing_thresh);
@@ -476,20 +426,15 @@ static const char *trace_options[] = {
476 "record-cmd", 426 "record-cmd",
477 "overwrite", 427 "overwrite",
478 "disable_on_free", 428 "disable_on_free",
479 "irq-info",
480 "markers",
481 NULL 429 NULL
482}; 430};
483 431
484static struct { 432static struct {
485 u64 (*func)(void); 433 u64 (*func)(void);
486 const char *name; 434 const char *name;
487 int in_ns; /* is this clock in nanoseconds? */
488} trace_clocks[] = { 435} trace_clocks[] = {
489 { trace_clock_local, "local", 1 }, 436 { trace_clock_local, "local" },
490 { trace_clock_global, "global", 1 }, 437 { trace_clock_global, "global" },
491 { trace_clock_counter, "counter", 0 },
492 ARCH_TRACE_CLOCKS
493}; 438};
494 439
495int trace_clock_id; 440int trace_clock_id;
@@ -627,6 +572,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
627static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 572static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
628{ 573{
629 int len; 574 int len;
575 void *ret;
630 576
631 if (s->len <= s->readpos) 577 if (s->len <= s->readpos)
632 return -EBUSY; 578 return -EBUSY;
@@ -634,7 +580,9 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
634 len = s->len - s->readpos; 580 len = s->len - s->readpos;
635 if (cnt > len) 581 if (cnt > len)
636 cnt = len; 582 cnt = len;
637 memcpy(buf, s->buffer + s->readpos, cnt); 583 ret = memcpy(buf, s->buffer + s->readpos, cnt);
584 if (!ret)
585 return -EFAULT;
638 586
639 s->readpos += cnt; 587 s->readpos += cnt;
640 return cnt; 588 return cnt;
@@ -746,6 +694,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
746 694
747 arch_spin_lock(&ftrace_max_lock); 695 arch_spin_lock(&ftrace_max_lock);
748 696
697 ftrace_disable_cpu();
698
749 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 699 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
750 700
751 if (ret == -EBUSY) { 701 if (ret == -EBUSY) {
@@ -759,6 +709,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
759 "Failed to swap buffers due to commit in progress\n"); 709 "Failed to swap buffers due to commit in progress\n");
760 } 710 }
761 711
712 ftrace_enable_cpu();
713
762 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 714 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
763 715
764 __update_max_tr(tr, tsk, cpu); 716 __update_max_tr(tr, tsk, cpu);
@@ -766,40 +718,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
766} 718}
767#endif /* CONFIG_TRACER_MAX_TRACE */ 719#endif /* CONFIG_TRACER_MAX_TRACE */
768 720
769static void default_wait_pipe(struct trace_iterator *iter)
770{
771 DEFINE_WAIT(wait);
772
773 prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
774
775 /*
776 * The events can happen in critical sections where
777 * checking a work queue can cause deadlocks.
778 * After adding a task to the queue, this flag is set
779 * only to notify events to try to wake up the queue
780 * using irq_work.
781 *
782 * We don't clear it even if the buffer is no longer
783 * empty. The flag only causes the next event to run
784 * irq_work to do the work queue wake up. The worse
785 * that can happen if we race with !trace_empty() is that
786 * an event will cause an irq_work to try to wake up
787 * an empty queue.
788 *
789 * There's no reason to protect this flag either, as
790 * the work queue and irq_work logic will do the necessary
791 * synchronization for the wake ups. The only thing
792 * that is necessary is that the wake up happens after
793 * a task has been queued. It's OK for spurious wake ups.
794 */
795 trace_wakeup_needed = true;
796
797 if (trace_empty(iter))
798 schedule();
799
800 finish_wait(&trace_wait, &wait);
801}
802
803/** 721/**
804 * register_tracer - register a tracer with the ftrace system. 722 * register_tracer - register a tracer with the ftrace system.
805 * @type - the plugin for the tracer 723 * @type - the plugin for the tracer
@@ -807,6 +725,8 @@ static void default_wait_pipe(struct trace_iterator *iter)
807 * Register a new plugin tracer. 725 * Register a new plugin tracer.
808 */ 726 */
809int register_tracer(struct tracer *type) 727int register_tracer(struct tracer *type)
728__releases(kernel_lock)
729__acquires(kernel_lock)
810{ 730{
811 struct tracer *t; 731 struct tracer *t;
812 int ret = 0; 732 int ret = 0;
@@ -864,8 +784,7 @@ int register_tracer(struct tracer *type)
864 784
865 /* If we expanded the buffers, make sure the max is expanded too */ 785 /* If we expanded the buffers, make sure the max is expanded too */
866 if (ring_buffer_expanded && type->use_max_tr) 786 if (ring_buffer_expanded && type->use_max_tr)
867 ring_buffer_resize(max_tr.buffer, trace_buf_size, 787 ring_buffer_resize(max_tr.buffer, trace_buf_size);
868 RING_BUFFER_ALL_CPUS);
869 788
870 /* the test is responsible for initializing and enabling */ 789 /* the test is responsible for initializing and enabling */
871 pr_info("Testing tracer %s: ", type->name); 790 pr_info("Testing tracer %s: ", type->name);
@@ -874,8 +793,6 @@ int register_tracer(struct tracer *type)
874 current_trace = saved_tracer; 793 current_trace = saved_tracer;
875 if (ret) { 794 if (ret) {
876 printk(KERN_CONT "FAILED!\n"); 795 printk(KERN_CONT "FAILED!\n");
877 /* Add the warning after printing 'FAILED' */
878 WARN_ON(1);
879 goto out; 796 goto out;
880 } 797 }
881 /* Only reset on passing, to avoid touching corrupted buffers */ 798 /* Only reset on passing, to avoid touching corrupted buffers */
@@ -883,8 +800,7 @@ int register_tracer(struct tracer *type)
883 800
884 /* Shrink the max buffer again */ 801 /* Shrink the max buffer again */
885 if (ring_buffer_expanded && type->use_max_tr) 802 if (ring_buffer_expanded && type->use_max_tr)
886 ring_buffer_resize(max_tr.buffer, 1, 803 ring_buffer_resize(max_tr.buffer, 1);
887 RING_BUFFER_ALL_CPUS);
888 804
889 printk(KERN_CONT "PASSED\n"); 805 printk(KERN_CONT "PASSED\n");
890 } 806 }
@@ -918,6 +834,39 @@ int register_tracer(struct tracer *type)
918 return ret; 834 return ret;
919} 835}
920 836
837void unregister_tracer(struct tracer *type)
838{
839 struct tracer **t;
840
841 mutex_lock(&trace_types_lock);
842 for (t = &trace_types; *t; t = &(*t)->next) {
843 if (*t == type)
844 goto found;
845 }
846 pr_info("Tracer %s not registered\n", type->name);
847 goto out;
848
849 found:
850 *t = (*t)->next;
851
852 if (type == current_trace && tracer_enabled) {
853 tracer_enabled = 0;
854 tracing_stop();
855 if (current_trace->stop)
856 current_trace->stop(&global_trace);
857 current_trace = &nop_trace;
858 }
859out:
860 mutex_unlock(&trace_types_lock);
861}
862
863static void __tracing_reset(struct ring_buffer *buffer, int cpu)
864{
865 ftrace_disable_cpu();
866 ring_buffer_reset_cpu(buffer, cpu);
867 ftrace_enable_cpu();
868}
869
921void tracing_reset(struct trace_array *tr, int cpu) 870void tracing_reset(struct trace_array *tr, int cpu)
922{ 871{
923 struct ring_buffer *buffer = tr->buffer; 872 struct ring_buffer *buffer = tr->buffer;
@@ -926,7 +875,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
926 875
927 /* Make sure all commits have finished */ 876 /* Make sure all commits have finished */
928 synchronize_sched(); 877 synchronize_sched();
929 ring_buffer_reset_cpu(buffer, cpu); 878 __tracing_reset(buffer, cpu);
930 879
931 ring_buffer_record_enable(buffer); 880 ring_buffer_record_enable(buffer);
932} 881}
@@ -944,7 +893,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
944 tr->time_start = ftrace_now(tr->cpu); 893 tr->time_start = ftrace_now(tr->cpu);
945 894
946 for_each_online_cpu(cpu) 895 for_each_online_cpu(cpu)
947 ring_buffer_reset_cpu(buffer, cpu); 896 __tracing_reset(buffer, cpu);
948 897
949 ring_buffer_record_enable(buffer); 898 ring_buffer_record_enable(buffer);
950} 899}
@@ -1011,7 +960,7 @@ void tracing_start(void)
1011 if (tracing_disabled) 960 if (tracing_disabled)
1012 return; 961 return;
1013 962
1014 raw_spin_lock_irqsave(&tracing_start_lock, flags); 963 spin_lock_irqsave(&tracing_start_lock, flags);
1015 if (--trace_stop_count) { 964 if (--trace_stop_count) {
1016 if (trace_stop_count < 0) { 965 if (trace_stop_count < 0) {
1017 /* Someone screwed up their debugging */ 966 /* Someone screwed up their debugging */
@@ -1036,7 +985,7 @@ void tracing_start(void)
1036 985
1037 ftrace_start(); 986 ftrace_start();
1038 out: 987 out:
1039 raw_spin_unlock_irqrestore(&tracing_start_lock, flags); 988 spin_unlock_irqrestore(&tracing_start_lock, flags);
1040} 989}
1041 990
1042/** 991/**
@@ -1051,7 +1000,7 @@ void tracing_stop(void)
1051 unsigned long flags; 1000 unsigned long flags;
1052 1001
1053 ftrace_stop(); 1002 ftrace_stop();
1054 raw_spin_lock_irqsave(&tracing_start_lock, flags); 1003 spin_lock_irqsave(&tracing_start_lock, flags);
1055 if (trace_stop_count++) 1004 if (trace_stop_count++)
1056 goto out; 1005 goto out;
1057 1006
@@ -1069,7 +1018,7 @@ void tracing_stop(void)
1069 arch_spin_unlock(&ftrace_max_lock); 1018 arch_spin_unlock(&ftrace_max_lock);
1070 1019
1071 out: 1020 out:
1072 raw_spin_unlock_irqrestore(&tracing_start_lock, flags); 1021 spin_unlock_irqrestore(&tracing_start_lock, flags);
1073} 1022}
1074 1023
1075void trace_stop_cmdline_recording(void); 1024void trace_stop_cmdline_recording(void);
@@ -1148,14 +1097,10 @@ void trace_find_cmdline(int pid, char comm[])
1148 1097
1149void tracing_record_cmdline(struct task_struct *tsk) 1098void tracing_record_cmdline(struct task_struct *tsk)
1150{ 1099{
1151 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on()) 1100 if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
1101 !tracing_is_on())
1152 return; 1102 return;
1153 1103
1154 if (!__this_cpu_read(trace_cmdline_save))
1155 return;
1156
1157 __this_cpu_write(trace_cmdline_save, false);
1158
1159 trace_save_cmdline(tsk); 1104 trace_save_cmdline(tsk);
1160} 1105}
1161 1106
@@ -1199,36 +1144,27 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,
1199 return event; 1144 return event;
1200} 1145}
1201 1146
1202void
1203__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1204{
1205 __this_cpu_write(trace_cmdline_save, true);
1206 if (trace_wakeup_needed) {
1207 trace_wakeup_needed = false;
1208 /* irq_work_queue() supplies it's own memory barriers */
1209 irq_work_queue(&trace_work_wakeup);
1210 }
1211 ring_buffer_unlock_commit(buffer, event);
1212}
1213
1214static inline void 1147static inline void
1215__trace_buffer_unlock_commit(struct ring_buffer *buffer, 1148__trace_buffer_unlock_commit(struct ring_buffer *buffer,
1216 struct ring_buffer_event *event, 1149 struct ring_buffer_event *event,
1217 unsigned long flags, int pc) 1150 unsigned long flags, int pc,
1151 int wake)
1218{ 1152{
1219 __buffer_unlock_commit(buffer, event); 1153 ring_buffer_unlock_commit(buffer, event);
1220 1154
1221 ftrace_trace_stack(buffer, flags, 6, pc); 1155 ftrace_trace_stack(buffer, flags, 6, pc);
1222 ftrace_trace_userstack(buffer, flags, pc); 1156 ftrace_trace_userstack(buffer, flags, pc);
1157
1158 if (wake)
1159 trace_wake_up();
1223} 1160}
1224 1161
1225void trace_buffer_unlock_commit(struct ring_buffer *buffer, 1162void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1226 struct ring_buffer_event *event, 1163 struct ring_buffer_event *event,
1227 unsigned long flags, int pc) 1164 unsigned long flags, int pc)
1228{ 1165{
1229 __trace_buffer_unlock_commit(buffer, event, flags, pc); 1166 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1230} 1167}
1231EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1232 1168
1233struct ring_buffer_event * 1169struct ring_buffer_event *
1234trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, 1170trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
@@ -1245,21 +1181,29 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1245 struct ring_buffer_event *event, 1181 struct ring_buffer_event *event,
1246 unsigned long flags, int pc) 1182 unsigned long flags, int pc)
1247{ 1183{
1248 __trace_buffer_unlock_commit(buffer, event, flags, pc); 1184 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1249} 1185}
1250EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); 1186EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1251 1187
1252void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, 1188void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
1253 struct ring_buffer_event *event, 1189 struct ring_buffer_event *event,
1254 unsigned long flags, int pc, 1190 unsigned long flags, int pc)
1255 struct pt_regs *regs) 1191{
1192 __trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
1193}
1194EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
1195
1196void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1197 struct ring_buffer_event *event,
1198 unsigned long flags, int pc,
1199 struct pt_regs *regs)
1256{ 1200{
1257 __buffer_unlock_commit(buffer, event); 1201 ring_buffer_unlock_commit(buffer, event);
1258 1202
1259 ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); 1203 ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1260 ftrace_trace_userstack(buffer, flags, pc); 1204 ftrace_trace_userstack(buffer, flags, pc);
1261} 1205}
1262EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); 1206EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
1263 1207
1264void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 1208void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1265 struct ring_buffer_event *event) 1209 struct ring_buffer_event *event)
@@ -1291,7 +1235,7 @@ trace_function(struct trace_array *tr,
1291 entry->parent_ip = parent_ip; 1235 entry->parent_ip = parent_ip;
1292 1236
1293 if (!filter_check_discard(call, entry, buffer, event)) 1237 if (!filter_check_discard(call, entry, buffer, event))
1294 __buffer_unlock_commit(buffer, event); 1238 ring_buffer_unlock_commit(buffer, event);
1295} 1239}
1296 1240
1297void 1241void
@@ -1384,7 +1328,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
1384 entry->size = trace.nr_entries; 1328 entry->size = trace.nr_entries;
1385 1329
1386 if (!filter_check_discard(call, entry, buffer, event)) 1330 if (!filter_check_discard(call, entry, buffer, event))
1387 __buffer_unlock_commit(buffer, event); 1331 ring_buffer_unlock_commit(buffer, event);
1388 1332
1389 out: 1333 out:
1390 /* Again, don't let gcc optimize things here */ 1334 /* Again, don't let gcc optimize things here */
@@ -1480,7 +1424,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1480 1424
1481 save_stack_trace_user(&trace); 1425 save_stack_trace_user(&trace);
1482 if (!filter_check_discard(call, entry, buffer, event)) 1426 if (!filter_check_discard(call, entry, buffer, event))
1483 __buffer_unlock_commit(buffer, event); 1427 ring_buffer_unlock_commit(buffer, event);
1484 1428
1485 out_drop_count: 1429 out_drop_count:
1486 __this_cpu_dec(user_stack_count); 1430 __this_cpu_dec(user_stack_count);
@@ -1497,150 +1441,25 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1497 1441
1498#endif /* CONFIG_STACKTRACE */ 1442#endif /* CONFIG_STACKTRACE */
1499 1443
1500/* created for use with alloc_percpu */
1501struct trace_buffer_struct {
1502 char buffer[TRACE_BUF_SIZE];
1503};
1504
1505static struct trace_buffer_struct *trace_percpu_buffer;
1506static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1507static struct trace_buffer_struct *trace_percpu_irq_buffer;
1508static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1509
1510/*
1511 * The buffer used is dependent on the context. There is a per cpu
1512 * buffer for normal context, softirq contex, hard irq context and
1513 * for NMI context. Thise allows for lockless recording.
1514 *
1515 * Note, if the buffers failed to be allocated, then this returns NULL
1516 */
1517static char *get_trace_buf(void)
1518{
1519 struct trace_buffer_struct *percpu_buffer;
1520 struct trace_buffer_struct *buffer;
1521
1522 /*
1523 * If we have allocated per cpu buffers, then we do not
1524 * need to do any locking.
1525 */
1526 if (in_nmi())
1527 percpu_buffer = trace_percpu_nmi_buffer;
1528 else if (in_irq())
1529 percpu_buffer = trace_percpu_irq_buffer;
1530 else if (in_softirq())
1531 percpu_buffer = trace_percpu_sirq_buffer;
1532 else
1533 percpu_buffer = trace_percpu_buffer;
1534
1535 if (!percpu_buffer)
1536 return NULL;
1537
1538 buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
1539
1540 return buffer->buffer;
1541}
1542
1543static int alloc_percpu_trace_buffer(void)
1544{
1545 struct trace_buffer_struct *buffers;
1546 struct trace_buffer_struct *sirq_buffers;
1547 struct trace_buffer_struct *irq_buffers;
1548 struct trace_buffer_struct *nmi_buffers;
1549
1550 buffers = alloc_percpu(struct trace_buffer_struct);
1551 if (!buffers)
1552 goto err_warn;
1553
1554 sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1555 if (!sirq_buffers)
1556 goto err_sirq;
1557
1558 irq_buffers = alloc_percpu(struct trace_buffer_struct);
1559 if (!irq_buffers)
1560 goto err_irq;
1561
1562 nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1563 if (!nmi_buffers)
1564 goto err_nmi;
1565
1566 trace_percpu_buffer = buffers;
1567 trace_percpu_sirq_buffer = sirq_buffers;
1568 trace_percpu_irq_buffer = irq_buffers;
1569 trace_percpu_nmi_buffer = nmi_buffers;
1570
1571 return 0;
1572
1573 err_nmi:
1574 free_percpu(irq_buffers);
1575 err_irq:
1576 free_percpu(sirq_buffers);
1577 err_sirq:
1578 free_percpu(buffers);
1579 err_warn:
1580 WARN(1, "Could not allocate percpu trace_printk buffer");
1581 return -ENOMEM;
1582}
1583
1584static int buffers_allocated;
1585
1586void trace_printk_init_buffers(void)
1587{
1588 if (buffers_allocated)
1589 return;
1590
1591 if (alloc_percpu_trace_buffer())
1592 return;
1593
1594 pr_info("ftrace: Allocated trace_printk buffers\n");
1595
1596 /* Expand the buffers to set size */
1597 tracing_update_buffers();
1598
1599 buffers_allocated = 1;
1600
1601 /*
1602 * trace_printk_init_buffers() can be called by modules.
1603 * If that happens, then we need to start cmdline recording
1604 * directly here. If the global_trace.buffer is already
1605 * allocated here, then this was called by module code.
1606 */
1607 if (global_trace.buffer)
1608 tracing_start_cmdline_record();
1609}
1610
1611void trace_printk_start_comm(void)
1612{
1613 /* Start tracing comms if trace printk is set */
1614 if (!buffers_allocated)
1615 return;
1616 tracing_start_cmdline_record();
1617}
1618
1619static void trace_printk_start_stop_comm(int enabled)
1620{
1621 if (!buffers_allocated)
1622 return;
1623
1624 if (enabled)
1625 tracing_start_cmdline_record();
1626 else
1627 tracing_stop_cmdline_record();
1628}
1629
1630/** 1444/**
1631 * trace_vbprintk - write binary msg to tracing buffer 1445 * trace_vbprintk - write binary msg to tracing buffer
1632 * 1446 *
1633 */ 1447 */
1634int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1448int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1635{ 1449{
1450 static arch_spinlock_t trace_buf_lock =
1451 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1452 static u32 trace_buf[TRACE_BUF_SIZE];
1453
1636 struct ftrace_event_call *call = &event_bprint; 1454 struct ftrace_event_call *call = &event_bprint;
1637 struct ring_buffer_event *event; 1455 struct ring_buffer_event *event;
1638 struct ring_buffer *buffer; 1456 struct ring_buffer *buffer;
1639 struct trace_array *tr = &global_trace; 1457 struct trace_array *tr = &global_trace;
1458 struct trace_array_cpu *data;
1640 struct bprint_entry *entry; 1459 struct bprint_entry *entry;
1641 unsigned long flags; 1460 unsigned long flags;
1642 char *tbuffer; 1461 int disable;
1643 int len = 0, size, pc; 1462 int cpu, len = 0, size, pc;
1644 1463
1645 if (unlikely(tracing_selftest_running || tracing_disabled)) 1464 if (unlikely(tracing_selftest_running || tracing_disabled))
1646 return 0; 1465 return 0;
@@ -1650,36 +1469,43 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1650 1469
1651 pc = preempt_count(); 1470 pc = preempt_count();
1652 preempt_disable_notrace(); 1471 preempt_disable_notrace();
1472 cpu = raw_smp_processor_id();
1473 data = tr->data[cpu];
1653 1474
1654 tbuffer = get_trace_buf(); 1475 disable = atomic_inc_return(&data->disabled);
1655 if (!tbuffer) { 1476 if (unlikely(disable != 1))
1656 len = 0;
1657 goto out; 1477 goto out;
1658 }
1659 1478
1660 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); 1479 /* Lockdep uses trace_printk for lock tracing */
1480 local_irq_save(flags);
1481 arch_spin_lock(&trace_buf_lock);
1482 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1661 1483
1662 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) 1484 if (len > TRACE_BUF_SIZE || len < 0)
1663 goto out; 1485 goto out_unlock;
1664 1486
1665 local_save_flags(flags);
1666 size = sizeof(*entry) + sizeof(u32) * len; 1487 size = sizeof(*entry) + sizeof(u32) * len;
1667 buffer = tr->buffer; 1488 buffer = tr->buffer;
1668 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, 1489 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1669 flags, pc); 1490 flags, pc);
1670 if (!event) 1491 if (!event)
1671 goto out; 1492 goto out_unlock;
1672 entry = ring_buffer_event_data(event); 1493 entry = ring_buffer_event_data(event);
1673 entry->ip = ip; 1494 entry->ip = ip;
1674 entry->fmt = fmt; 1495 entry->fmt = fmt;
1675 1496
1676 memcpy(entry->buf, tbuffer, sizeof(u32) * len); 1497 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1677 if (!filter_check_discard(call, entry, buffer, event)) { 1498 if (!filter_check_discard(call, entry, buffer, event)) {
1678 __buffer_unlock_commit(buffer, event); 1499 ring_buffer_unlock_commit(buffer, event);
1679 ftrace_trace_stack(buffer, flags, 6, pc); 1500 ftrace_trace_stack(buffer, flags, 6, pc);
1680 } 1501 }
1681 1502
1503out_unlock:
1504 arch_spin_unlock(&trace_buf_lock);
1505 local_irq_restore(flags);
1506
1682out: 1507out:
1508 atomic_dec_return(&data->disabled);
1683 preempt_enable_notrace(); 1509 preempt_enable_notrace();
1684 unpause_graph_tracing(); 1510 unpause_graph_tracing();
1685 1511
@@ -1705,53 +1531,58 @@ int trace_array_printk(struct trace_array *tr,
1705int trace_array_vprintk(struct trace_array *tr, 1531int trace_array_vprintk(struct trace_array *tr,
1706 unsigned long ip, const char *fmt, va_list args) 1532 unsigned long ip, const char *fmt, va_list args)
1707{ 1533{
1534 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1535 static char trace_buf[TRACE_BUF_SIZE];
1536
1708 struct ftrace_event_call *call = &event_print; 1537 struct ftrace_event_call *call = &event_print;
1709 struct ring_buffer_event *event; 1538 struct ring_buffer_event *event;
1710 struct ring_buffer *buffer; 1539 struct ring_buffer *buffer;
1711 int len = 0, size, pc; 1540 struct trace_array_cpu *data;
1541 int cpu, len = 0, size, pc;
1712 struct print_entry *entry; 1542 struct print_entry *entry;
1713 unsigned long flags; 1543 unsigned long irq_flags;
1714 char *tbuffer; 1544 int disable;
1715 1545
1716 if (tracing_disabled || tracing_selftest_running) 1546 if (tracing_disabled || tracing_selftest_running)
1717 return 0; 1547 return 0;
1718 1548
1719 /* Don't pollute graph traces with trace_vprintk internals */
1720 pause_graph_tracing();
1721
1722 pc = preempt_count(); 1549 pc = preempt_count();
1723 preempt_disable_notrace(); 1550 preempt_disable_notrace();
1551 cpu = raw_smp_processor_id();
1552 data = tr->data[cpu];
1724 1553
1725 1554 disable = atomic_inc_return(&data->disabled);
1726 tbuffer = get_trace_buf(); 1555 if (unlikely(disable != 1))
1727 if (!tbuffer) {
1728 len = 0;
1729 goto out; 1556 goto out;
1730 }
1731 1557
1732 len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); 1558 pause_graph_tracing();
1733 if (len > TRACE_BUF_SIZE) 1559 raw_local_irq_save(irq_flags);
1734 goto out; 1560 arch_spin_lock(&trace_buf_lock);
1561 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1735 1562
1736 local_save_flags(flags);
1737 size = sizeof(*entry) + len + 1; 1563 size = sizeof(*entry) + len + 1;
1738 buffer = tr->buffer; 1564 buffer = tr->buffer;
1739 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1565 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1740 flags, pc); 1566 irq_flags, pc);
1741 if (!event) 1567 if (!event)
1742 goto out; 1568 goto out_unlock;
1743 entry = ring_buffer_event_data(event); 1569 entry = ring_buffer_event_data(event);
1744 entry->ip = ip; 1570 entry->ip = ip;
1745 1571
1746 memcpy(&entry->buf, tbuffer, len); 1572 memcpy(&entry->buf, trace_buf, len);
1747 entry->buf[len] = '\0'; 1573 entry->buf[len] = '\0';
1748 if (!filter_check_discard(call, entry, buffer, event)) { 1574 if (!filter_check_discard(call, entry, buffer, event)) {
1749 __buffer_unlock_commit(buffer, event); 1575 ring_buffer_unlock_commit(buffer, event);
1750 ftrace_trace_stack(buffer, flags, 6, pc); 1576 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1751 } 1577 }
1578
1579 out_unlock:
1580 arch_spin_unlock(&trace_buf_lock);
1581 raw_local_irq_restore(irq_flags);
1582 unpause_graph_tracing();
1752 out: 1583 out:
1584 atomic_dec_return(&data->disabled);
1753 preempt_enable_notrace(); 1585 preempt_enable_notrace();
1754 unpause_graph_tracing();
1755 1586
1756 return len; 1587 return len;
1757} 1588}
@@ -1764,11 +1595,14 @@ EXPORT_SYMBOL_GPL(trace_vprintk);
1764 1595
1765static void trace_iterator_increment(struct trace_iterator *iter) 1596static void trace_iterator_increment(struct trace_iterator *iter)
1766{ 1597{
1767 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); 1598 /* Don't allow ftrace to trace into the ring buffers */
1599 ftrace_disable_cpu();
1768 1600
1769 iter->idx++; 1601 iter->idx++;
1770 if (buf_iter) 1602 if (iter->buffer_iter[iter->cpu])
1771 ring_buffer_read(buf_iter, NULL); 1603 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1604
1605 ftrace_enable_cpu();
1772} 1606}
1773 1607
1774static struct trace_entry * 1608static struct trace_entry *
@@ -1776,7 +1610,10 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1776 unsigned long *lost_events) 1610 unsigned long *lost_events)
1777{ 1611{
1778 struct ring_buffer_event *event; 1612 struct ring_buffer_event *event;
1779 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 1613 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1614
1615 /* Don't allow ftrace to trace into the ring buffers */
1616 ftrace_disable_cpu();
1780 1617
1781 if (buf_iter) 1618 if (buf_iter)
1782 event = ring_buffer_iter_peek(buf_iter, ts); 1619 event = ring_buffer_iter_peek(buf_iter, ts);
@@ -1784,6 +1621,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1784 event = ring_buffer_peek(iter->tr->buffer, cpu, ts, 1621 event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
1785 lost_events); 1622 lost_events);
1786 1623
1624 ftrace_enable_cpu();
1625
1787 if (event) { 1626 if (event) {
1788 iter->ent_size = ring_buffer_event_length(event); 1627 iter->ent_size = ring_buffer_event_length(event);
1789 return ring_buffer_event_data(event); 1628 return ring_buffer_event_data(event);
@@ -1802,7 +1641,6 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1802 int cpu_file = iter->cpu_file; 1641 int cpu_file = iter->cpu_file;
1803 u64 next_ts = 0, ts; 1642 u64 next_ts = 0, ts;
1804 int next_cpu = -1; 1643 int next_cpu = -1;
1805 int next_size = 0;
1806 int cpu; 1644 int cpu;
1807 1645
1808 /* 1646 /*
@@ -1834,12 +1672,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1834 next_cpu = cpu; 1672 next_cpu = cpu;
1835 next_ts = ts; 1673 next_ts = ts;
1836 next_lost = lost_events; 1674 next_lost = lost_events;
1837 next_size = iter->ent_size;
1838 } 1675 }
1839 } 1676 }
1840 1677
1841 iter->ent_size = next_size;
1842
1843 if (ent_cpu) 1678 if (ent_cpu)
1844 *ent_cpu = next_cpu; 1679 *ent_cpu = next_cpu;
1845 1680
@@ -1873,8 +1708,11 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter)
1873 1708
1874static void trace_consume(struct trace_iterator *iter) 1709static void trace_consume(struct trace_iterator *iter)
1875{ 1710{
1711 /* Don't allow ftrace to trace into the ring buffers */
1712 ftrace_disable_cpu();
1876 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, 1713 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
1877 &iter->lost_events); 1714 &iter->lost_events);
1715 ftrace_enable_cpu();
1878} 1716}
1879 1717
1880static void *s_next(struct seq_file *m, void *v, loff_t *pos) 1718static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1914,10 +1752,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1914 1752
1915 tr->data[cpu]->skipped_entries = 0; 1753 tr->data[cpu]->skipped_entries = 0;
1916 1754
1917 buf_iter = trace_buffer_iter(iter, cpu); 1755 if (!iter->buffer_iter[cpu])
1918 if (!buf_iter)
1919 return; 1756 return;
1920 1757
1758 buf_iter = iter->buffer_iter[cpu];
1921 ring_buffer_iter_reset(buf_iter); 1759 ring_buffer_iter_reset(buf_iter);
1922 1760
1923 /* 1761 /*
@@ -1963,12 +1801,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1963 iter->cpu = 0; 1801 iter->cpu = 0;
1964 iter->idx = -1; 1802 iter->idx = -1;
1965 1803
1804 ftrace_disable_cpu();
1805
1966 if (cpu_file == TRACE_PIPE_ALL_CPU) { 1806 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1967 for_each_tracing_cpu(cpu) 1807 for_each_tracing_cpu(cpu)
1968 tracing_iter_reset(iter, cpu); 1808 tracing_iter_reset(iter, cpu);
1969 } else 1809 } else
1970 tracing_iter_reset(iter, cpu_file); 1810 tracing_iter_reset(iter, cpu_file);
1971 1811
1812 ftrace_enable_cpu();
1813
1972 iter->leftover = 0; 1814 iter->leftover = 0;
1973 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1815 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1974 ; 1816 ;
@@ -2000,33 +1842,6 @@ static void s_stop(struct seq_file *m, void *p)
2000 trace_event_read_unlock(); 1842 trace_event_read_unlock();
2001} 1843}
2002 1844
2003static void
2004get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
2005{
2006 unsigned long count;
2007 int cpu;
2008
2009 *total = 0;
2010 *entries = 0;
2011
2012 for_each_tracing_cpu(cpu) {
2013 count = ring_buffer_entries_cpu(tr->buffer, cpu);
2014 /*
2015 * If this buffer has skipped entries, then we hold all
2016 * entries for the trace and we need to ignore the
2017 * ones before the time stamp.
2018 */
2019 if (tr->data[cpu]->skipped_entries) {
2020 count -= tr->data[cpu]->skipped_entries;
2021 /* total is the same as the entries */
2022 *total += count;
2023 } else
2024 *total += count +
2025 ring_buffer_overrun_cpu(tr->buffer, cpu);
2026 *entries += count;
2027 }
2028}
2029
2030static void print_lat_help_header(struct seq_file *m) 1845static void print_lat_help_header(struct seq_file *m)
2031{ 1846{
2032 seq_puts(m, "# _------=> CPU# \n"); 1847 seq_puts(m, "# _------=> CPU# \n");
@@ -2039,35 +1854,12 @@ static void print_lat_help_header(struct seq_file *m)
2039 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1854 seq_puts(m, "# \\ / ||||| \\ | / \n");
2040} 1855}
2041 1856
2042static void print_event_info(struct trace_array *tr, struct seq_file *m) 1857static void print_func_help_header(struct seq_file *m)
2043{
2044 unsigned long total;
2045 unsigned long entries;
2046
2047 get_total_entries(tr, &total, &entries);
2048 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
2049 entries, total, num_online_cpus());
2050 seq_puts(m, "#\n");
2051}
2052
2053static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
2054{ 1858{
2055 print_event_info(tr, m); 1859 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
2056 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
2057 seq_puts(m, "# | | | | |\n"); 1860 seq_puts(m, "# | | | | |\n");
2058} 1861}
2059 1862
2060static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
2061{
2062 print_event_info(tr, m);
2063 seq_puts(m, "# _-----=> irqs-off\n");
2064 seq_puts(m, "# / _----=> need-resched\n");
2065 seq_puts(m, "# | / _---=> hardirq/softirq\n");
2066 seq_puts(m, "# || / _--=> preempt-depth\n");
2067 seq_puts(m, "# ||| / delay\n");
2068 seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
2069 seq_puts(m, "# | | | |||| | |\n");
2070}
2071 1863
2072void 1864void
2073print_trace_header(struct seq_file *m, struct trace_iterator *iter) 1865print_trace_header(struct seq_file *m, struct trace_iterator *iter)
@@ -2076,14 +1868,32 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2076 struct trace_array *tr = iter->tr; 1868 struct trace_array *tr = iter->tr;
2077 struct trace_array_cpu *data = tr->data[tr->cpu]; 1869 struct trace_array_cpu *data = tr->data[tr->cpu];
2078 struct tracer *type = current_trace; 1870 struct tracer *type = current_trace;
2079 unsigned long entries; 1871 unsigned long entries = 0;
2080 unsigned long total; 1872 unsigned long total = 0;
1873 unsigned long count;
2081 const char *name = "preemption"; 1874 const char *name = "preemption";
1875 int cpu;
2082 1876
2083 if (type) 1877 if (type)
2084 name = type->name; 1878 name = type->name;
2085 1879
2086 get_total_entries(tr, &total, &entries); 1880
1881 for_each_tracing_cpu(cpu) {
1882 count = ring_buffer_entries_cpu(tr->buffer, cpu);
1883 /*
1884 * If this buffer has skipped entries, then we hold all
1885 * entries for the trace and we need to ignore the
1886 * ones before the time stamp.
1887 */
1888 if (tr->data[cpu]->skipped_entries) {
1889 count -= tr->data[cpu]->skipped_entries;
1890 /* total is the same as the entries */
1891 total += count;
1892 } else
1893 total += count +
1894 ring_buffer_overrun_cpu(tr->buffer, cpu);
1895 entries += count;
1896 }
2087 1897
2088 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 1898 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2089 name, UTS_RELEASE); 1899 name, UTS_RELEASE);
@@ -2114,8 +1924,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2114 seq_puts(m, "# -----------------\n"); 1924 seq_puts(m, "# -----------------\n");
2115 seq_printf(m, "# | task: %.16s-%d " 1925 seq_printf(m, "# | task: %.16s-%d "
2116 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 1926 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2117 data->comm, data->pid, 1927 data->comm, data->pid, data->uid, data->nice,
2118 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2119 data->policy, data->rt_priority); 1928 data->policy, data->rt_priority);
2120 seq_puts(m, "# -----------------\n"); 1929 seq_puts(m, "# -----------------\n");
2121 1930
@@ -2264,15 +2073,13 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2264 2073
2265int trace_empty(struct trace_iterator *iter) 2074int trace_empty(struct trace_iterator *iter)
2266{ 2075{
2267 struct ring_buffer_iter *buf_iter;
2268 int cpu; 2076 int cpu;
2269 2077
2270 /* If we are looking at one CPU buffer, only check that one */ 2078 /* If we are looking at one CPU buffer, only check that one */
2271 if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { 2079 if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
2272 cpu = iter->cpu_file; 2080 cpu = iter->cpu_file;
2273 buf_iter = trace_buffer_iter(iter, cpu); 2081 if (iter->buffer_iter[cpu]) {
2274 if (buf_iter) { 2082 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
2275 if (!ring_buffer_iter_empty(buf_iter))
2276 return 0; 2083 return 0;
2277 } else { 2084 } else {
2278 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) 2085 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
@@ -2282,9 +2089,8 @@ int trace_empty(struct trace_iterator *iter)
2282 } 2089 }
2283 2090
2284 for_each_tracing_cpu(cpu) { 2091 for_each_tracing_cpu(cpu) {
2285 buf_iter = trace_buffer_iter(iter, cpu); 2092 if (iter->buffer_iter[cpu]) {
2286 if (buf_iter) { 2093 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
2287 if (!ring_buffer_iter_empty(buf_iter))
2288 return 0; 2094 return 0;
2289 } else { 2095 } else {
2290 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) 2096 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
@@ -2333,21 +2139,6 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
2333 return print_trace_fmt(iter); 2139 return print_trace_fmt(iter);
2334} 2140}
2335 2141
2336void trace_latency_header(struct seq_file *m)
2337{
2338 struct trace_iterator *iter = m->private;
2339
2340 /* print nothing if the buffers are empty */
2341 if (trace_empty(iter))
2342 return;
2343
2344 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2345 print_trace_header(m, iter);
2346
2347 if (!(trace_flags & TRACE_ITER_VERBOSE))
2348 print_lat_help_header(m);
2349}
2350
2351void trace_default_header(struct seq_file *m) 2142void trace_default_header(struct seq_file *m)
2352{ 2143{
2353 struct trace_iterator *iter = m->private; 2144 struct trace_iterator *iter = m->private;
@@ -2363,23 +2154,11 @@ void trace_default_header(struct seq_file *m)
2363 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2154 if (!(trace_flags & TRACE_ITER_VERBOSE))
2364 print_lat_help_header(m); 2155 print_lat_help_header(m);
2365 } else { 2156 } else {
2366 if (!(trace_flags & TRACE_ITER_VERBOSE)) { 2157 if (!(trace_flags & TRACE_ITER_VERBOSE))
2367 if (trace_flags & TRACE_ITER_IRQ_INFO) 2158 print_func_help_header(m);
2368 print_func_help_header_irq(iter->tr, m);
2369 else
2370 print_func_help_header(iter->tr, m);
2371 }
2372 } 2159 }
2373} 2160}
2374 2161
2375static void test_ftrace_alive(struct seq_file *m)
2376{
2377 if (!ftrace_is_dead())
2378 return;
2379 seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2380 seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n");
2381}
2382
2383static int s_show(struct seq_file *m, void *v) 2162static int s_show(struct seq_file *m, void *v)
2384{ 2163{
2385 struct trace_iterator *iter = v; 2164 struct trace_iterator *iter = v;
@@ -2389,7 +2168,6 @@ static int s_show(struct seq_file *m, void *v)
2389 if (iter->tr) { 2168 if (iter->tr) {
2390 seq_printf(m, "# tracer: %s\n", iter->trace->name); 2169 seq_printf(m, "# tracer: %s\n", iter->trace->name);
2391 seq_puts(m, "#\n"); 2170 seq_puts(m, "#\n");
2392 test_ftrace_alive(m);
2393 } 2171 }
2394 if (iter->trace && iter->trace->print_header) 2172 if (iter->trace && iter->trace->print_header)
2395 iter->trace->print_header(m); 2173 iter->trace->print_header(m);
@@ -2433,21 +2211,18 @@ static struct trace_iterator *
2433__tracing_open(struct inode *inode, struct file *file) 2211__tracing_open(struct inode *inode, struct file *file)
2434{ 2212{
2435 long cpu_file = (long) inode->i_private; 2213 long cpu_file = (long) inode->i_private;
2214 void *fail_ret = ERR_PTR(-ENOMEM);
2436 struct trace_iterator *iter; 2215 struct trace_iterator *iter;
2437 int cpu; 2216 struct seq_file *m;
2217 int cpu, ret;
2438 2218
2439 if (tracing_disabled) 2219 if (tracing_disabled)
2440 return ERR_PTR(-ENODEV); 2220 return ERR_PTR(-ENODEV);
2441 2221
2442 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 2222 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2443 if (!iter) 2223 if (!iter)
2444 return ERR_PTR(-ENOMEM); 2224 return ERR_PTR(-ENOMEM);
2445 2225
2446 iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2447 GFP_KERNEL);
2448 if (!iter->buffer_iter)
2449 goto release;
2450
2451 /* 2226 /*
2452 * We make a copy of the current tracer to avoid concurrent 2227 * We make a copy of the current tracer to avoid concurrent
2453 * changes on it while we are reading. 2228 * changes on it while we are reading.
@@ -2479,10 +2254,6 @@ __tracing_open(struct inode *inode, struct file *file)
2479 if (ring_buffer_overruns(iter->tr->buffer)) 2254 if (ring_buffer_overruns(iter->tr->buffer))
2480 iter->iter_flags |= TRACE_FILE_ANNOTATE; 2255 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2481 2256
2482 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2483 if (trace_clocks[trace_clock_id].in_ns)
2484 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2485
2486 /* stop the trace while dumping */ 2257 /* stop the trace while dumping */
2487 tracing_stop(); 2258 tracing_stop();
2488 2259
@@ -2505,17 +2276,32 @@ __tracing_open(struct inode *inode, struct file *file)
2505 tracing_iter_reset(iter, cpu); 2276 tracing_iter_reset(iter, cpu);
2506 } 2277 }
2507 2278
2279 ret = seq_open(file, &tracer_seq_ops);
2280 if (ret < 0) {
2281 fail_ret = ERR_PTR(ret);
2282 goto fail_buffer;
2283 }
2284
2285 m = file->private_data;
2286 m->private = iter;
2287
2508 mutex_unlock(&trace_types_lock); 2288 mutex_unlock(&trace_types_lock);
2509 2289
2510 return iter; 2290 return iter;
2511 2291
2292 fail_buffer:
2293 for_each_tracing_cpu(cpu) {
2294 if (iter->buffer_iter[cpu])
2295 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2296 }
2297 free_cpumask_var(iter->started);
2298 tracing_start();
2512 fail: 2299 fail:
2513 mutex_unlock(&trace_types_lock); 2300 mutex_unlock(&trace_types_lock);
2514 kfree(iter->trace); 2301 kfree(iter->trace);
2515 kfree(iter->buffer_iter); 2302 kfree(iter);
2516release: 2303
2517 seq_release_private(inode, file); 2304 return fail_ret;
2518 return ERR_PTR(-ENOMEM);
2519} 2305}
2520 2306
2521int tracing_open_generic(struct inode *inode, struct file *filp) 2307int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2551,11 +2337,11 @@ static int tracing_release(struct inode *inode, struct file *file)
2551 tracing_start(); 2337 tracing_start();
2552 mutex_unlock(&trace_types_lock); 2338 mutex_unlock(&trace_types_lock);
2553 2339
2340 seq_release(inode, file);
2554 mutex_destroy(&iter->mutex); 2341 mutex_destroy(&iter->mutex);
2555 free_cpumask_var(iter->started); 2342 free_cpumask_var(iter->started);
2556 kfree(iter->trace); 2343 kfree(iter->trace);
2557 kfree(iter->buffer_iter); 2344 kfree(iter);
2558 seq_release_private(inode, file);
2559 return 0; 2345 return 0;
2560} 2346}
2561 2347
@@ -2741,12 +2527,10 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2741 if (cpumask_test_cpu(cpu, tracing_cpumask) && 2527 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2742 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 2528 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2743 atomic_inc(&global_trace.data[cpu]->disabled); 2529 atomic_inc(&global_trace.data[cpu]->disabled);
2744 ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
2745 } 2530 }
2746 if (!cpumask_test_cpu(cpu, tracing_cpumask) && 2531 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2747 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 2532 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2748 atomic_dec(&global_trace.data[cpu]->disabled); 2533 atomic_dec(&global_trace.data[cpu]->disabled);
2749 ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
2750 } 2534 }
2751 } 2535 }
2752 arch_spin_unlock(&ftrace_max_lock); 2536 arch_spin_unlock(&ftrace_max_lock);
@@ -2851,19 +2635,26 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2851 2635
2852 if (mask == TRACE_ITER_OVERWRITE) 2636 if (mask == TRACE_ITER_OVERWRITE)
2853 ring_buffer_change_overwrite(global_trace.buffer, enabled); 2637 ring_buffer_change_overwrite(global_trace.buffer, enabled);
2854
2855 if (mask == TRACE_ITER_PRINTK)
2856 trace_printk_start_stop_comm(enabled);
2857} 2638}
2858 2639
2859static int trace_set_options(char *option) 2640static ssize_t
2641tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2642 size_t cnt, loff_t *ppos)
2860{ 2643{
2644 char buf[64];
2861 char *cmp; 2645 char *cmp;
2862 int neg = 0; 2646 int neg = 0;
2863 int ret = 0; 2647 int ret;
2864 int i; 2648 int i;
2865 2649
2866 cmp = strstrip(option); 2650 if (cnt >= sizeof(buf))
2651 return -EINVAL;
2652
2653 if (copy_from_user(&buf, ubuf, cnt))
2654 return -EFAULT;
2655
2656 buf[cnt] = 0;
2657 cmp = strstrip(buf);
2867 2658
2868 if (strncmp(cmp, "no", 2) == 0) { 2659 if (strncmp(cmp, "no", 2) == 0) {
2869 neg = 1; 2660 neg = 1;
@@ -2882,27 +2673,10 @@ static int trace_set_options(char *option)
2882 mutex_lock(&trace_types_lock); 2673 mutex_lock(&trace_types_lock);
2883 ret = set_tracer_option(current_trace, cmp, neg); 2674 ret = set_tracer_option(current_trace, cmp, neg);
2884 mutex_unlock(&trace_types_lock); 2675 mutex_unlock(&trace_types_lock);
2676 if (ret)
2677 return ret;
2885 } 2678 }
2886 2679
2887 return ret;
2888}
2889
2890static ssize_t
2891tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2892 size_t cnt, loff_t *ppos)
2893{
2894 char buf[64];
2895
2896 if (cnt >= sizeof(buf))
2897 return -EINVAL;
2898
2899 if (copy_from_user(&buf, ubuf, cnt))
2900 return -EFAULT;
2901
2902 buf[cnt] = 0;
2903
2904 trace_set_options(buf);
2905
2906 *ppos += cnt; 2680 *ppos += cnt;
2907 2681
2908 return cnt; 2682 return cnt;
@@ -2927,18 +2701,18 @@ static const char readme_msg[] =
2927 "tracing mini-HOWTO:\n\n" 2701 "tracing mini-HOWTO:\n\n"
2928 "# mount -t debugfs nodev /sys/kernel/debug\n\n" 2702 "# mount -t debugfs nodev /sys/kernel/debug\n\n"
2929 "# cat /sys/kernel/debug/tracing/available_tracers\n" 2703 "# cat /sys/kernel/debug/tracing/available_tracers\n"
2930 "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n" 2704 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2931 "# cat /sys/kernel/debug/tracing/current_tracer\n" 2705 "# cat /sys/kernel/debug/tracing/current_tracer\n"
2932 "nop\n" 2706 "nop\n"
2933 "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n" 2707 "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
2934 "# cat /sys/kernel/debug/tracing/current_tracer\n" 2708 "# cat /sys/kernel/debug/tracing/current_tracer\n"
2935 "wakeup\n" 2709 "sched_switch\n"
2936 "# cat /sys/kernel/debug/tracing/trace_options\n" 2710 "# cat /sys/kernel/debug/tracing/trace_options\n"
2937 "noprint-parent nosym-offset nosym-addr noverbose\n" 2711 "noprint-parent nosym-offset nosym-addr noverbose\n"
2938 "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" 2712 "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
2939 "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n" 2713 "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
2940 "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" 2714 "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
2941 "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n" 2715 "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
2942; 2716;
2943 2717
2944static ssize_t 2718static ssize_t
@@ -3007,6 +2781,56 @@ static const struct file_operations tracing_saved_cmdlines_fops = {
3007}; 2781};
3008 2782
3009static ssize_t 2783static ssize_t
2784tracing_ctrl_read(struct file *filp, char __user *ubuf,
2785 size_t cnt, loff_t *ppos)
2786{
2787 char buf[64];
2788 int r;
2789
2790 r = sprintf(buf, "%u\n", tracer_enabled);
2791 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2792}
2793
2794static ssize_t
2795tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2796 size_t cnt, loff_t *ppos)
2797{
2798 struct trace_array *tr = filp->private_data;
2799 unsigned long val;
2800 int ret;
2801
2802 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
2803 if (ret)
2804 return ret;
2805
2806 val = !!val;
2807
2808 mutex_lock(&trace_types_lock);
2809 if (tracer_enabled ^ val) {
2810
2811 /* Only need to warn if this is used to change the state */
2812 WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
2813
2814 if (val) {
2815 tracer_enabled = 1;
2816 if (current_trace->start)
2817 current_trace->start(tr);
2818 tracing_start();
2819 } else {
2820 tracer_enabled = 0;
2821 tracing_stop();
2822 if (current_trace->stop)
2823 current_trace->stop(tr);
2824 }
2825 }
2826 mutex_unlock(&trace_types_lock);
2827
2828 *ppos += cnt;
2829
2830 return cnt;
2831}
2832
2833static ssize_t
3010tracing_set_trace_read(struct file *filp, char __user *ubuf, 2834tracing_set_trace_read(struct file *filp, char __user *ubuf,
3011 size_t cnt, loff_t *ppos) 2835 size_t cnt, loff_t *ppos)
3012{ 2836{
@@ -3029,39 +2853,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
3029 return t->init(tr); 2853 return t->init(tr);
3030} 2854}
3031 2855
3032static void set_buffer_entries(struct trace_array *tr, unsigned long val) 2856static int __tracing_resize_ring_buffer(unsigned long size)
3033{
3034 int cpu;
3035 for_each_tracing_cpu(cpu)
3036 tr->data[cpu]->entries = val;
3037}
3038
3039/* resize @tr's buffer to the size of @size_tr's entries */
3040static int resize_buffer_duplicate_size(struct trace_array *tr,
3041 struct trace_array *size_tr, int cpu_id)
3042{
3043 int cpu, ret = 0;
3044
3045 if (cpu_id == RING_BUFFER_ALL_CPUS) {
3046 for_each_tracing_cpu(cpu) {
3047 ret = ring_buffer_resize(tr->buffer,
3048 size_tr->data[cpu]->entries, cpu);
3049 if (ret < 0)
3050 break;
3051 tr->data[cpu]->entries = size_tr->data[cpu]->entries;
3052 }
3053 } else {
3054 ret = ring_buffer_resize(tr->buffer,
3055 size_tr->data[cpu_id]->entries, cpu_id);
3056 if (ret == 0)
3057 tr->data[cpu_id]->entries =
3058 size_tr->data[cpu_id]->entries;
3059 }
3060
3061 return ret;
3062}
3063
3064static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
3065{ 2857{
3066 int ret; 2858 int ret;
3067 2859
@@ -3072,21 +2864,19 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
3072 */ 2864 */
3073 ring_buffer_expanded = 1; 2865 ring_buffer_expanded = 1;
3074 2866
3075 /* May be called before buffers are initialized */ 2867 ret = ring_buffer_resize(global_trace.buffer, size);
3076 if (!global_trace.buffer)
3077 return 0;
3078
3079 ret = ring_buffer_resize(global_trace.buffer, size, cpu);
3080 if (ret < 0) 2868 if (ret < 0)
3081 return ret; 2869 return ret;
3082 2870
3083 if (!current_trace->use_max_tr) 2871 if (!current_trace->use_max_tr)
3084 goto out; 2872 goto out;
3085 2873
3086 ret = ring_buffer_resize(max_tr.buffer, size, cpu); 2874 ret = ring_buffer_resize(max_tr.buffer, size);
3087 if (ret < 0) { 2875 if (ret < 0) {
3088 int r = resize_buffer_duplicate_size(&global_trace, 2876 int r;
3089 &global_trace, cpu); 2877
2878 r = ring_buffer_resize(global_trace.buffer,
2879 global_trace.entries);
3090 if (r < 0) { 2880 if (r < 0) {
3091 /* 2881 /*
3092 * AARGH! We are left with different 2882 * AARGH! We are left with different
@@ -3108,39 +2898,43 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
3108 return ret; 2898 return ret;
3109 } 2899 }
3110 2900
3111 if (cpu == RING_BUFFER_ALL_CPUS) 2901 max_tr.entries = size;
3112 set_buffer_entries(&max_tr, size);
3113 else
3114 max_tr.data[cpu]->entries = size;
3115
3116 out: 2902 out:
3117 if (cpu == RING_BUFFER_ALL_CPUS) 2903 global_trace.entries = size;
3118 set_buffer_entries(&global_trace, size);
3119 else
3120 global_trace.data[cpu]->entries = size;
3121 2904
3122 return ret; 2905 return ret;
3123} 2906}
3124 2907
3125static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id) 2908static ssize_t tracing_resize_ring_buffer(unsigned long size)
3126{ 2909{
3127 int ret = size; 2910 int cpu, ret = size;
3128 2911
3129 mutex_lock(&trace_types_lock); 2912 mutex_lock(&trace_types_lock);
3130 2913
3131 if (cpu_id != RING_BUFFER_ALL_CPUS) { 2914 tracing_stop();
3132 /* make sure, this cpu is enabled in the mask */ 2915
3133 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) { 2916 /* disable all cpu buffers */
3134 ret = -EINVAL; 2917 for_each_tracing_cpu(cpu) {
3135 goto out; 2918 if (global_trace.data[cpu])
3136 } 2919 atomic_inc(&global_trace.data[cpu]->disabled);
2920 if (max_tr.data[cpu])
2921 atomic_inc(&max_tr.data[cpu]->disabled);
3137 } 2922 }
3138 2923
3139 ret = __tracing_resize_ring_buffer(size, cpu_id); 2924 if (size != global_trace.entries)
2925 ret = __tracing_resize_ring_buffer(size);
2926
3140 if (ret < 0) 2927 if (ret < 0)
3141 ret = -ENOMEM; 2928 ret = -ENOMEM;
3142 2929
3143out: 2930 for_each_tracing_cpu(cpu) {
2931 if (global_trace.data[cpu])
2932 atomic_dec(&global_trace.data[cpu]->disabled);
2933 if (max_tr.data[cpu])
2934 atomic_dec(&max_tr.data[cpu]->disabled);
2935 }
2936
2937 tracing_start();
3144 mutex_unlock(&trace_types_lock); 2938 mutex_unlock(&trace_types_lock);
3145 2939
3146 return ret; 2940 return ret;
@@ -3163,8 +2957,7 @@ int tracing_update_buffers(void)
3163 2957
3164 mutex_lock(&trace_types_lock); 2958 mutex_lock(&trace_types_lock);
3165 if (!ring_buffer_expanded) 2959 if (!ring_buffer_expanded)
3166 ret = __tracing_resize_ring_buffer(trace_buf_size, 2960 ret = __tracing_resize_ring_buffer(trace_buf_size);
3167 RING_BUFFER_ALL_CPUS);
3168 mutex_unlock(&trace_types_lock); 2961 mutex_unlock(&trace_types_lock);
3169 2962
3170 return ret; 2963 return ret;
@@ -3188,8 +2981,7 @@ static int tracing_set_tracer(const char *buf)
3188 mutex_lock(&trace_types_lock); 2981 mutex_lock(&trace_types_lock);
3189 2982
3190 if (!ring_buffer_expanded) { 2983 if (!ring_buffer_expanded) {
3191 ret = __tracing_resize_ring_buffer(trace_buf_size, 2984 ret = __tracing_resize_ring_buffer(trace_buf_size);
3192 RING_BUFFER_ALL_CPUS);
3193 if (ret < 0) 2985 if (ret < 0)
3194 goto out; 2986 goto out;
3195 ret = 0; 2987 ret = 0;
@@ -3215,20 +3007,19 @@ static int tracing_set_tracer(const char *buf)
3215 * The max_tr ring buffer has some state (e.g. ring->clock) and 3007 * The max_tr ring buffer has some state (e.g. ring->clock) and
3216 * we want preserve it. 3008 * we want preserve it.
3217 */ 3009 */
3218 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); 3010 ring_buffer_resize(max_tr.buffer, 1);
3219 set_buffer_entries(&max_tr, 1); 3011 max_tr.entries = 1;
3220 } 3012 }
3221 destroy_trace_option_files(topts); 3013 destroy_trace_option_files(topts);
3222 3014
3223 current_trace = &nop_trace; 3015 current_trace = t;
3224 3016
3225 topts = create_trace_option_files(t); 3017 topts = create_trace_option_files(current_trace);
3226 if (t->use_max_tr) { 3018 if (current_trace->use_max_tr) {
3227 /* we need to make per cpu buffer sizes equivalent */ 3019 ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
3228 ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
3229 RING_BUFFER_ALL_CPUS);
3230 if (ret < 0) 3020 if (ret < 0)
3231 goto out; 3021 goto out;
3022 max_tr.entries = global_trace.entries;
3232 } 3023 }
3233 3024
3234 if (t->init) { 3025 if (t->init) {
@@ -3237,7 +3028,6 @@ static int tracing_set_tracer(const char *buf)
3237 goto out; 3028 goto out;
3238 } 3029 }
3239 3030
3240 current_trace = t;
3241 trace_branch_enable(tr); 3031 trace_branch_enable(tr);
3242 out: 3032 out:
3243 mutex_unlock(&trace_types_lock); 3033 mutex_unlock(&trace_types_lock);
@@ -3350,10 +3140,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
3350 if (trace_flags & TRACE_ITER_LATENCY_FMT) 3140 if (trace_flags & TRACE_ITER_LATENCY_FMT)
3351 iter->iter_flags |= TRACE_FILE_LAT_FMT; 3141 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3352 3142
3353 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3354 if (trace_clocks[trace_clock_id].in_ns)
3355 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3356
3357 iter->cpu_file = cpu_file; 3143 iter->cpu_file = cpu_file;
3358 iter->tr = &global_trace; 3144 iter->tr = &global_trace;
3359 mutex_init(&iter->mutex); 3145 mutex_init(&iter->mutex);
@@ -3414,6 +3200,19 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
3414 } 3200 }
3415} 3201}
3416 3202
3203
3204void default_wait_pipe(struct trace_iterator *iter)
3205{
3206 DEFINE_WAIT(wait);
3207
3208 prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
3209
3210 if (trace_empty(iter))
3211 schedule();
3212
3213 finish_wait(&trace_wait, &wait);
3214}
3215
3417/* 3216/*
3418 * This is a make-shift waitqueue. 3217 * This is a make-shift waitqueue.
3419 * A tracer might use this callback on some rare cases: 3218 * A tracer might use this callback on some rare cases:
@@ -3462,7 +3261,7 @@ static int tracing_wait_pipe(struct file *filp)
3462 * 3261 *
3463 * iter->pos will be 0 if we haven't read anything. 3262 * iter->pos will be 0 if we haven't read anything.
3464 */ 3263 */
3465 if (!tracing_is_enabled() && iter->pos) 3264 if (!tracer_enabled && iter->pos)
3466 break; 3265 break;
3467 } 3266 }
3468 3267
@@ -3643,7 +3442,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3643 .pages = pages_def, 3442 .pages = pages_def,
3644 .partial = partial_def, 3443 .partial = partial_def,
3645 .nr_pages = 0, /* This gets updated below. */ 3444 .nr_pages = 0, /* This gets updated below. */
3646 .nr_pages_max = PIPE_DEF_BUFFERS,
3647 .flags = flags, 3445 .flags = flags,
3648 .ops = &tracing_pipe_buf_ops, 3446 .ops = &tracing_pipe_buf_ops,
3649 .spd_release = tracing_spd_release_pipe, 3447 .spd_release = tracing_spd_release_pipe,
@@ -3715,7 +3513,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3715 3513
3716 ret = splice_to_pipe(pipe, &spd); 3514 ret = splice_to_pipe(pipe, &spd);
3717out: 3515out:
3718 splice_shrink_spd(&spd); 3516 splice_shrink_spd(pipe, &spd);
3719 return ret; 3517 return ret;
3720 3518
3721out_err: 3519out_err:
@@ -3723,82 +3521,30 @@ out_err:
3723 goto out; 3521 goto out;
3724} 3522}
3725 3523
3726struct ftrace_entries_info {
3727 struct trace_array *tr;
3728 int cpu;
3729};
3730
3731static int tracing_entries_open(struct inode *inode, struct file *filp)
3732{
3733 struct ftrace_entries_info *info;
3734
3735 if (tracing_disabled)
3736 return -ENODEV;
3737
3738 info = kzalloc(sizeof(*info), GFP_KERNEL);
3739 if (!info)
3740 return -ENOMEM;
3741
3742 info->tr = &global_trace;
3743 info->cpu = (unsigned long)inode->i_private;
3744
3745 filp->private_data = info;
3746
3747 return 0;
3748}
3749
3750static ssize_t 3524static ssize_t
3751tracing_entries_read(struct file *filp, char __user *ubuf, 3525tracing_entries_read(struct file *filp, char __user *ubuf,
3752 size_t cnt, loff_t *ppos) 3526 size_t cnt, loff_t *ppos)
3753{ 3527{
3754 struct ftrace_entries_info *info = filp->private_data; 3528 struct trace_array *tr = filp->private_data;
3755 struct trace_array *tr = info->tr; 3529 char buf[96];
3756 char buf[64]; 3530 int r;
3757 int r = 0;
3758 ssize_t ret;
3759 3531
3760 mutex_lock(&trace_types_lock); 3532 mutex_lock(&trace_types_lock);
3761 3533 if (!ring_buffer_expanded)
3762 if (info->cpu == RING_BUFFER_ALL_CPUS) { 3534 r = sprintf(buf, "%lu (expanded: %lu)\n",
3763 int cpu, buf_size_same; 3535 tr->entries >> 10,
3764 unsigned long size; 3536 trace_buf_size >> 10);
3765 3537 else
3766 size = 0; 3538 r = sprintf(buf, "%lu\n", tr->entries >> 10);
3767 buf_size_same = 1;
3768 /* check if all cpu sizes are same */
3769 for_each_tracing_cpu(cpu) {
3770 /* fill in the size from first enabled cpu */
3771 if (size == 0)
3772 size = tr->data[cpu]->entries;
3773 if (size != tr->data[cpu]->entries) {
3774 buf_size_same = 0;
3775 break;
3776 }
3777 }
3778
3779 if (buf_size_same) {
3780 if (!ring_buffer_expanded)
3781 r = sprintf(buf, "%lu (expanded: %lu)\n",
3782 size >> 10,
3783 trace_buf_size >> 10);
3784 else
3785 r = sprintf(buf, "%lu\n", size >> 10);
3786 } else
3787 r = sprintf(buf, "X\n");
3788 } else
3789 r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10);
3790
3791 mutex_unlock(&trace_types_lock); 3539 mutex_unlock(&trace_types_lock);
3792 3540
3793 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3541 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3794 return ret;
3795} 3542}
3796 3543
3797static ssize_t 3544static ssize_t
3798tracing_entries_write(struct file *filp, const char __user *ubuf, 3545tracing_entries_write(struct file *filp, const char __user *ubuf,
3799 size_t cnt, loff_t *ppos) 3546 size_t cnt, loff_t *ppos)
3800{ 3547{
3801 struct ftrace_entries_info *info = filp->private_data;
3802 unsigned long val; 3548 unsigned long val;
3803 int ret; 3549 int ret;
3804 3550
@@ -3813,7 +3559,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3813 /* value is in KB */ 3559 /* value is in KB */
3814 val <<= 10; 3560 val <<= 10;
3815 3561
3816 ret = tracing_resize_ring_buffer(val, info->cpu); 3562 ret = tracing_resize_ring_buffer(val);
3817 if (ret < 0) 3563 if (ret < 0)
3818 return ret; 3564 return ret;
3819 3565
@@ -3822,40 +3568,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3822 return cnt; 3568 return cnt;
3823} 3569}
3824 3570
3825static int
3826tracing_entries_release(struct inode *inode, struct file *filp)
3827{
3828 struct ftrace_entries_info *info = filp->private_data;
3829
3830 kfree(info);
3831
3832 return 0;
3833}
3834
3835static ssize_t
3836tracing_total_entries_read(struct file *filp, char __user *ubuf,
3837 size_t cnt, loff_t *ppos)
3838{
3839 struct trace_array *tr = filp->private_data;
3840 char buf[64];
3841 int r, cpu;
3842 unsigned long size = 0, expanded_size = 0;
3843
3844 mutex_lock(&trace_types_lock);
3845 for_each_tracing_cpu(cpu) {
3846 size += tr->data[cpu]->entries >> 10;
3847 if (!ring_buffer_expanded)
3848 expanded_size += trace_buf_size >> 10;
3849 }
3850 if (ring_buffer_expanded)
3851 r = sprintf(buf, "%lu\n", size);
3852 else
3853 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
3854 mutex_unlock(&trace_types_lock);
3855
3856 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3857}
3858
3859static ssize_t 3571static ssize_t
3860tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 3572tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
3861 size_t cnt, loff_t *ppos) 3573 size_t cnt, loff_t *ppos)
@@ -3877,112 +3589,56 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
3877 if (trace_flags & TRACE_ITER_STOP_ON_FREE) 3589 if (trace_flags & TRACE_ITER_STOP_ON_FREE)
3878 tracing_off(); 3590 tracing_off();
3879 /* resize the ring buffer to 0 */ 3591 /* resize the ring buffer to 0 */
3880 tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS); 3592 tracing_resize_ring_buffer(0);
3881 3593
3882 return 0; 3594 return 0;
3883} 3595}
3884 3596
3597static int mark_printk(const char *fmt, ...)
3598{
3599 int ret;
3600 va_list args;
3601 va_start(args, fmt);
3602 ret = trace_vprintk(0, fmt, args);
3603 va_end(args);
3604 return ret;
3605}
3606
3885static ssize_t 3607static ssize_t
3886tracing_mark_write(struct file *filp, const char __user *ubuf, 3608tracing_mark_write(struct file *filp, const char __user *ubuf,
3887 size_t cnt, loff_t *fpos) 3609 size_t cnt, loff_t *fpos)
3888{ 3610{
3889 unsigned long addr = (unsigned long)ubuf; 3611 char *buf;
3890 struct ring_buffer_event *event; 3612 size_t written;
3891 struct ring_buffer *buffer;
3892 struct print_entry *entry;
3893 unsigned long irq_flags;
3894 struct page *pages[2];
3895 void *map_page[2];
3896 int nr_pages = 1;
3897 ssize_t written;
3898 int offset;
3899 int size;
3900 int len;
3901 int ret;
3902 int i;
3903 3613
3904 if (tracing_disabled) 3614 if (tracing_disabled)
3905 return -EINVAL; 3615 return -EINVAL;
3906 3616
3907 if (!(trace_flags & TRACE_ITER_MARKERS))
3908 return -EINVAL;
3909
3910 if (cnt > TRACE_BUF_SIZE) 3617 if (cnt > TRACE_BUF_SIZE)
3911 cnt = TRACE_BUF_SIZE; 3618 cnt = TRACE_BUF_SIZE;
3912 3619
3913 /* 3620 buf = kmalloc(cnt + 2, GFP_KERNEL);
3914 * Userspace is injecting traces into the kernel trace buffer. 3621 if (buf == NULL)
3915 * We want to be as non intrusive as possible. 3622 return -ENOMEM;
3916 * To do so, we do not want to allocate any special buffers
3917 * or take any locks, but instead write the userspace data
3918 * straight into the ring buffer.
3919 *
3920 * First we need to pin the userspace buffer into memory,
3921 * which, most likely it is, because it just referenced it.
3922 * But there's no guarantee that it is. By using get_user_pages_fast()
3923 * and kmap_atomic/kunmap_atomic() we can get access to the
3924 * pages directly. We then write the data directly into the
3925 * ring buffer.
3926 */
3927 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
3928
3929 /* check if we cross pages */
3930 if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
3931 nr_pages = 2;
3932
3933 offset = addr & (PAGE_SIZE - 1);
3934 addr &= PAGE_MASK;
3935
3936 ret = get_user_pages_fast(addr, nr_pages, 0, pages);
3937 if (ret < nr_pages) {
3938 while (--ret >= 0)
3939 put_page(pages[ret]);
3940 written = -EFAULT;
3941 goto out;
3942 }
3943
3944 for (i = 0; i < nr_pages; i++)
3945 map_page[i] = kmap_atomic(pages[i]);
3946 3623
3947 local_save_flags(irq_flags); 3624 if (copy_from_user(buf, ubuf, cnt)) {
3948 size = sizeof(*entry) + cnt + 2; /* possible \n added */ 3625 kfree(buf);
3949 buffer = global_trace.buffer; 3626 return -EFAULT;
3950 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3951 irq_flags, preempt_count());
3952 if (!event) {
3953 /* Ring buffer disabled, return as if not open for write */
3954 written = -EBADF;
3955 goto out_unlock;
3956 } 3627 }
3957 3628 if (buf[cnt-1] != '\n') {
3958 entry = ring_buffer_event_data(event); 3629 buf[cnt] = '\n';
3959 entry->ip = _THIS_IP_; 3630 buf[cnt+1] = '\0';
3960
3961 if (nr_pages == 2) {
3962 len = PAGE_SIZE - offset;
3963 memcpy(&entry->buf, map_page[0] + offset, len);
3964 memcpy(&entry->buf[len], map_page[1], cnt - len);
3965 } else
3966 memcpy(&entry->buf, map_page[0] + offset, cnt);
3967
3968 if (entry->buf[cnt - 1] != '\n') {
3969 entry->buf[cnt] = '\n';
3970 entry->buf[cnt + 1] = '\0';
3971 } else 3631 } else
3972 entry->buf[cnt] = '\0'; 3632 buf[cnt] = '\0';
3973
3974 __buffer_unlock_commit(buffer, event);
3975
3976 written = cnt;
3977 3633
3634 written = mark_printk("%s", buf);
3635 kfree(buf);
3978 *fpos += written; 3636 *fpos += written;
3979 3637
3980 out_unlock: 3638 /* don't tell userspace we wrote more - it might confuse them */
3981 for (i = 0; i < nr_pages; i++){ 3639 if (written > cnt)
3982 kunmap_atomic(map_page[i]); 3640 written = cnt;
3983 put_page(pages[i]); 3641
3984 }
3985 out:
3986 return written; 3642 return written;
3987} 3643}
3988 3644
@@ -4032,14 +3688,6 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4032 if (max_tr.buffer) 3688 if (max_tr.buffer)
4033 ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func); 3689 ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
4034 3690
4035 /*
4036 * New clock may not be consistent with the previous clock.
4037 * Reset the buffer so that it doesn't have incomparable timestamps.
4038 */
4039 tracing_reset_online_cpus(&global_trace);
4040 if (max_tr.buffer)
4041 tracing_reset_online_cpus(&max_tr);
4042
4043 mutex_unlock(&trace_types_lock); 3691 mutex_unlock(&trace_types_lock);
4044 3692
4045 *fpos += cnt; 3693 *fpos += cnt;
@@ -4061,6 +3709,13 @@ static const struct file_operations tracing_max_lat_fops = {
4061 .llseek = generic_file_llseek, 3709 .llseek = generic_file_llseek,
4062}; 3710};
4063 3711
3712static const struct file_operations tracing_ctrl_fops = {
3713 .open = tracing_open_generic,
3714 .read = tracing_ctrl_read,
3715 .write = tracing_ctrl_write,
3716 .llseek = generic_file_llseek,
3717};
3718
4064static const struct file_operations set_tracer_fops = { 3719static const struct file_operations set_tracer_fops = {
4065 .open = tracing_open_generic, 3720 .open = tracing_open_generic,
4066 .read = tracing_set_trace_read, 3721 .read = tracing_set_trace_read,
@@ -4078,16 +3733,9 @@ static const struct file_operations tracing_pipe_fops = {
4078}; 3733};
4079 3734
4080static const struct file_operations tracing_entries_fops = { 3735static const struct file_operations tracing_entries_fops = {
4081 .open = tracing_entries_open, 3736 .open = tracing_open_generic,
4082 .read = tracing_entries_read, 3737 .read = tracing_entries_read,
4083 .write = tracing_entries_write, 3738 .write = tracing_entries_write,
4084 .release = tracing_entries_release,
4085 .llseek = generic_file_llseek,
4086};
4087
4088static const struct file_operations tracing_total_entries_fops = {
4089 .open = tracing_open_generic,
4090 .read = tracing_total_entries_read,
4091 .llseek = generic_file_llseek, 3739 .llseek = generic_file_llseek,
4092}; 3740};
4093 3741
@@ -4217,6 +3865,12 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
4217 buf->private = 0; 3865 buf->private = 0;
4218} 3866}
4219 3867
3868static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3869 struct pipe_buffer *buf)
3870{
3871 return 1;
3872}
3873
4220static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, 3874static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
4221 struct pipe_buffer *buf) 3875 struct pipe_buffer *buf)
4222{ 3876{
@@ -4232,7 +3886,7 @@ static const struct pipe_buf_operations buffer_pipe_buf_ops = {
4232 .unmap = generic_pipe_buf_unmap, 3886 .unmap = generic_pipe_buf_unmap,
4233 .confirm = generic_pipe_buf_confirm, 3887 .confirm = generic_pipe_buf_confirm,
4234 .release = buffer_pipe_buf_release, 3888 .release = buffer_pipe_buf_release,
4235 .steal = generic_pipe_buf_steal, 3889 .steal = buffer_pipe_buf_steal,
4236 .get = buffer_pipe_buf_get, 3890 .get = buffer_pipe_buf_get,
4237}; 3891};
4238 3892
@@ -4264,7 +3918,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4264 struct splice_pipe_desc spd = { 3918 struct splice_pipe_desc spd = {
4265 .pages = pages_def, 3919 .pages = pages_def,
4266 .partial = partial_def, 3920 .partial = partial_def,
4267 .nr_pages_max = PIPE_DEF_BUFFERS,
4268 .flags = flags, 3921 .flags = flags,
4269 .ops = &buffer_pipe_buf_ops, 3922 .ops = &buffer_pipe_buf_ops,
4270 .spd_release = buffer_spd_release, 3923 .spd_release = buffer_spd_release,
@@ -4277,11 +3930,13 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4277 return -ENOMEM; 3930 return -ENOMEM;
4278 3931
4279 if (*ppos & (PAGE_SIZE - 1)) { 3932 if (*ppos & (PAGE_SIZE - 1)) {
3933 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
4280 ret = -EINVAL; 3934 ret = -EINVAL;
4281 goto out; 3935 goto out;
4282 } 3936 }
4283 3937
4284 if (len & (PAGE_SIZE - 1)) { 3938 if (len & (PAGE_SIZE - 1)) {
3939 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
4285 if (len < PAGE_SIZE) { 3940 if (len < PAGE_SIZE) {
4286 ret = -EINVAL; 3941 ret = -EINVAL;
4287 goto out; 3942 goto out;
@@ -4350,7 +4005,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4350 } 4005 }
4351 4006
4352 ret = splice_to_pipe(pipe, &spd); 4007 ret = splice_to_pipe(pipe, &spd);
4353 splice_shrink_spd(&spd); 4008 splice_shrink_spd(pipe, &spd);
4354out: 4009out:
4355 return ret; 4010 return ret;
4356} 4011}
@@ -4371,8 +4026,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
4371 struct trace_array *tr = &global_trace; 4026 struct trace_array *tr = &global_trace;
4372 struct trace_seq *s; 4027 struct trace_seq *s;
4373 unsigned long cnt; 4028 unsigned long cnt;
4374 unsigned long long t;
4375 unsigned long usec_rem;
4376 4029
4377 s = kmalloc(sizeof(*s), GFP_KERNEL); 4030 s = kmalloc(sizeof(*s), GFP_KERNEL);
4378 if (!s) 4031 if (!s)
@@ -4389,31 +4042,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
4389 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); 4042 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
4390 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 4043 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
4391 4044
4392 cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
4393 trace_seq_printf(s, "bytes: %ld\n", cnt);
4394
4395 if (trace_clocks[trace_clock_id].in_ns) {
4396 /* local or global for trace_clock */
4397 t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
4398 usec_rem = do_div(t, USEC_PER_SEC);
4399 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
4400 t, usec_rem);
4401
4402 t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
4403 usec_rem = do_div(t, USEC_PER_SEC);
4404 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
4405 } else {
4406 /* counter or tsc mode for trace_clock */
4407 trace_seq_printf(s, "oldest event ts: %llu\n",
4408 ring_buffer_oldest_event_ts(tr->buffer, cpu));
4409
4410 trace_seq_printf(s, "now ts: %llu\n",
4411 ring_buffer_time_stamp(tr->buffer, cpu));
4412 }
4413
4414 cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
4415 trace_seq_printf(s, "dropped events: %ld\n", cnt);
4416
4417 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 4045 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
4418 4046
4419 kfree(s); 4047 kfree(s);
@@ -4520,9 +4148,6 @@ static void tracing_init_debugfs_percpu(long cpu)
4520 struct dentry *d_cpu; 4148 struct dentry *d_cpu;
4521 char cpu_dir[30]; /* 30 characters should be more than enough */ 4149 char cpu_dir[30]; /* 30 characters should be more than enough */
4522 4150
4523 if (!d_percpu)
4524 return;
4525
4526 snprintf(cpu_dir, 30, "cpu%ld", cpu); 4151 snprintf(cpu_dir, 30, "cpu%ld", cpu);
4527 d_cpu = debugfs_create_dir(cpu_dir, d_percpu); 4152 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
4528 if (!d_cpu) { 4153 if (!d_cpu) {
@@ -4543,9 +4168,6 @@ static void tracing_init_debugfs_percpu(long cpu)
4543 4168
4544 trace_create_file("stats", 0444, d_cpu, 4169 trace_create_file("stats", 0444, d_cpu,
4545 (void *) cpu, &tracing_stats_fops); 4170 (void *) cpu, &tracing_stats_fops);
4546
4547 trace_create_file("buffer_size_kb", 0444, d_cpu,
4548 (void *) cpu, &tracing_entries_fops);
4549} 4171}
4550 4172
4551#ifdef CONFIG_FTRACE_SELFTEST 4173#ifdef CONFIG_FTRACE_SELFTEST
@@ -4655,7 +4277,7 @@ static const struct file_operations trace_options_core_fops = {
4655}; 4277};
4656 4278
4657struct dentry *trace_create_file(const char *name, 4279struct dentry *trace_create_file(const char *name,
4658 umode_t mode, 4280 mode_t mode,
4659 struct dentry *parent, 4281 struct dentry *parent,
4660 void *data, 4282 void *data,
4661 const struct file_operations *fops) 4283 const struct file_operations *fops)
@@ -4784,64 +4406,6 @@ static __init void create_trace_options_dir(void)
4784 create_trace_option_core_file(trace_options[i], i); 4406 create_trace_option_core_file(trace_options[i], i);
4785} 4407}
4786 4408
4787static ssize_t
4788rb_simple_read(struct file *filp, char __user *ubuf,
4789 size_t cnt, loff_t *ppos)
4790{
4791 struct trace_array *tr = filp->private_data;
4792 struct ring_buffer *buffer = tr->buffer;
4793 char buf[64];
4794 int r;
4795
4796 if (buffer)
4797 r = ring_buffer_record_is_on(buffer);
4798 else
4799 r = 0;
4800
4801 r = sprintf(buf, "%d\n", r);
4802
4803 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4804}
4805
4806static ssize_t
4807rb_simple_write(struct file *filp, const char __user *ubuf,
4808 size_t cnt, loff_t *ppos)
4809{
4810 struct trace_array *tr = filp->private_data;
4811 struct ring_buffer *buffer = tr->buffer;
4812 unsigned long val;
4813 int ret;
4814
4815 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4816 if (ret)
4817 return ret;
4818
4819 if (buffer) {
4820 mutex_lock(&trace_types_lock);
4821 if (val) {
4822 ring_buffer_record_on(buffer);
4823 if (current_trace->start)
4824 current_trace->start(tr);
4825 } else {
4826 ring_buffer_record_off(buffer);
4827 if (current_trace->stop)
4828 current_trace->stop(tr);
4829 }
4830 mutex_unlock(&trace_types_lock);
4831 }
4832
4833 (*ppos)++;
4834
4835 return cnt;
4836}
4837
4838static const struct file_operations rb_simple_fops = {
4839 .open = tracing_open_generic,
4840 .read = rb_simple_read,
4841 .write = rb_simple_write,
4842 .llseek = default_llseek,
4843};
4844
4845static __init int tracer_init_debugfs(void) 4409static __init int tracer_init_debugfs(void)
4846{ 4410{
4847 struct dentry *d_tracer; 4411 struct dentry *d_tracer;
@@ -4851,6 +4415,9 @@ static __init int tracer_init_debugfs(void)
4851 4415
4852 d_tracer = tracing_init_dentry(); 4416 d_tracer = tracing_init_dentry();
4853 4417
4418 trace_create_file("tracing_enabled", 0644, d_tracer,
4419 &global_trace, &tracing_ctrl_fops);
4420
4854 trace_create_file("trace_options", 0644, d_tracer, 4421 trace_create_file("trace_options", 0644, d_tracer,
4855 NULL, &tracing_iter_fops); 4422 NULL, &tracing_iter_fops);
4856 4423
@@ -4881,10 +4448,7 @@ static __init int tracer_init_debugfs(void)
4881 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); 4448 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
4882 4449
4883 trace_create_file("buffer_size_kb", 0644, d_tracer, 4450 trace_create_file("buffer_size_kb", 0644, d_tracer,
4884 (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops); 4451 &global_trace, &tracing_entries_fops);
4885
4886 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
4887 &global_trace, &tracing_total_entries_fops);
4888 4452
4889 trace_create_file("free_buffer", 0644, d_tracer, 4453 trace_create_file("free_buffer", 0644, d_tracer,
4890 &global_trace, &tracing_free_buffer_fops); 4454 &global_trace, &tracing_free_buffer_fops);
@@ -4898,9 +4462,6 @@ static __init int tracer_init_debugfs(void)
4898 trace_create_file("trace_clock", 0644, d_tracer, NULL, 4462 trace_create_file("trace_clock", 0644, d_tracer, NULL,
4899 &trace_clock_fops); 4463 &trace_clock_fops);
4900 4464
4901 trace_create_file("tracing_on", 0644, d_tracer,
4902 &global_trace, &rb_simple_fops);
4903
4904#ifdef CONFIG_DYNAMIC_FTRACE 4465#ifdef CONFIG_DYNAMIC_FTRACE
4905 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4466 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4906 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4467 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -5005,12 +4566,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5005 4566
5006 tracing_off(); 4567 tracing_off();
5007 4568
5008 /* Did function tracer already get disabled? */
5009 if (ftrace_is_dead()) {
5010 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
5011 printk("# MAY BE MISSING FUNCTION EVENTS\n");
5012 }
5013
5014 if (disable_tracing) 4569 if (disable_tracing)
5015 ftrace_kill(); 4570 ftrace_kill();
5016 4571
@@ -5073,7 +4628,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5073 if (ret != TRACE_TYPE_NO_CONSUME) 4628 if (ret != TRACE_TYPE_NO_CONSUME)
5074 trace_consume(&iter); 4629 trace_consume(&iter);
5075 } 4630 }
5076 touch_nmi_watchdog();
5077 4631
5078 trace_printk_seq(&iter.seq); 4632 trace_printk_seq(&iter.seq);
5079 } 4633 }
@@ -5104,7 +4658,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
5104{ 4658{
5105 __ftrace_dump(true, oops_dump_mode); 4659 __ftrace_dump(true, oops_dump_mode);
5106} 4660}
5107EXPORT_SYMBOL_GPL(ftrace_dump);
5108 4661
5109__init static int tracer_alloc_buffers(void) 4662__init static int tracer_alloc_buffers(void)
5110{ 4663{
@@ -5120,11 +4673,6 @@ __init static int tracer_alloc_buffers(void)
5120 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4673 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
5121 goto out_free_buffer_mask; 4674 goto out_free_buffer_mask;
5122 4675
5123 /* Only allocate trace_printk buffers if a trace_printk exists */
5124 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
5125 /* Must be called before global_trace.buffer is allocated */
5126 trace_printk_init_buffers();
5127
5128 /* To save memory, keep the ring buffer size to its minimum */ 4676 /* To save memory, keep the ring buffer size to its minimum */
5129 if (ring_buffer_expanded) 4677 if (ring_buffer_expanded)
5130 ring_buf_size = trace_buf_size; 4678 ring_buf_size = trace_buf_size;
@@ -5143,8 +4691,7 @@ __init static int tracer_alloc_buffers(void)
5143 WARN_ON(1); 4691 WARN_ON(1);
5144 goto out_free_cpumask; 4692 goto out_free_cpumask;
5145 } 4693 }
5146 if (global_trace.buffer_disabled) 4694 global_trace.entries = ring_buffer_size(global_trace.buffer);
5147 tracing_off();
5148 4695
5149 4696
5150#ifdef CONFIG_TRACER_MAX_TRACE 4697#ifdef CONFIG_TRACER_MAX_TRACE
@@ -5155,6 +4702,7 @@ __init static int tracer_alloc_buffers(void)
5155 ring_buffer_free(global_trace.buffer); 4702 ring_buffer_free(global_trace.buffer);
5156 goto out_free_cpumask; 4703 goto out_free_cpumask;
5157 } 4704 }
4705 max_tr.entries = 1;
5158#endif 4706#endif
5159 4707
5160 /* Allocate the first page for all buffers */ 4708 /* Allocate the first page for all buffers */
@@ -5163,14 +4711,7 @@ __init static int tracer_alloc_buffers(void)
5163 max_tr.data[i] = &per_cpu(max_tr_data, i); 4711 max_tr.data[i] = &per_cpu(max_tr_data, i);
5164 } 4712 }
5165 4713
5166 set_buffer_entries(&global_trace,
5167 ring_buffer_size(global_trace.buffer, 0));
5168#ifdef CONFIG_TRACER_MAX_TRACE
5169 set_buffer_entries(&max_tr, 1);
5170#endif
5171
5172 trace_init_cmdlines(); 4714 trace_init_cmdlines();
5173 init_irq_work(&trace_work_wakeup, trace_wake_up);
5174 4715
5175 register_tracer(&nop_trace); 4716 register_tracer(&nop_trace);
5176 current_trace = &nop_trace; 4717 current_trace = &nop_trace;
@@ -5182,13 +4723,6 @@ __init static int tracer_alloc_buffers(void)
5182 4723
5183 register_die_notifier(&trace_die_notifier); 4724 register_die_notifier(&trace_die_notifier);
5184 4725
5185 while (trace_boot_options) {
5186 char *option;
5187
5188 option = strsep(&trace_boot_options, ",");
5189 trace_set_options(option);
5190 }
5191
5192 return 0; 4726 return 0;
5193 4727
5194out_free_cpumask: 4728out_free_cpumask:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c75d7988902..616846bcfee 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -56,23 +56,17 @@ enum trace_type {
56#define F_STRUCT(args...) args 56#define F_STRUCT(args...) args
57 57
58#undef FTRACE_ENTRY 58#undef FTRACE_ENTRY
59#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ 59#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
60 struct struct_name { \ 60 struct struct_name { \
61 struct trace_entry ent; \ 61 struct trace_entry ent; \
62 tstruct \ 62 tstruct \
63 } 63 }
64 64
65#undef TP_ARGS 65#undef TP_ARGS
66#define TP_ARGS(args...) args 66#define TP_ARGS(args...) args
67 67
68#undef FTRACE_ENTRY_DUP 68#undef FTRACE_ENTRY_DUP
69#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) 69#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
70
71#undef FTRACE_ENTRY_REG
72#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
73 filter, regfn) \
74 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
75 filter)
76 70
77#include "trace_entries.h" 71#include "trace_entries.h"
78 72
@@ -103,11 +97,6 @@ struct kretprobe_trace_entry_head {
103 unsigned long ret_ip; 97 unsigned long ret_ip;
104}; 98};
105 99
106struct uprobe_trace_entry_head {
107 struct trace_entry ent;
108 unsigned long ip;
109};
110
111/* 100/*
112 * trace_flag_type is an enumeration that holds different 101 * trace_flag_type is an enumeration that holds different
113 * states when a trace occurs. These are: 102 * states when a trace occurs. These are:
@@ -136,7 +125,6 @@ struct trace_array_cpu {
136 atomic_t disabled; 125 atomic_t disabled;
137 void *buffer_page; /* ring buffer spare */ 126 void *buffer_page; /* ring buffer spare */
138 127
139 unsigned long entries;
140 unsigned long saved_latency; 128 unsigned long saved_latency;
141 unsigned long critical_start; 129 unsigned long critical_start;
142 unsigned long critical_end; 130 unsigned long critical_end;
@@ -147,7 +135,7 @@ struct trace_array_cpu {
147 unsigned long skipped_entries; 135 unsigned long skipped_entries;
148 cycle_t preempt_timestamp; 136 cycle_t preempt_timestamp;
149 pid_t pid; 137 pid_t pid;
150 kuid_t uid; 138 uid_t uid;
151 char comm[TASK_COMM_LEN]; 139 char comm[TASK_COMM_LEN];
152}; 140};
153 141
@@ -158,8 +146,8 @@ struct trace_array_cpu {
158 */ 146 */
159struct trace_array { 147struct trace_array {
160 struct ring_buffer *buffer; 148 struct ring_buffer *buffer;
149 unsigned long entries;
161 int cpu; 150 int cpu;
162 int buffer_disabled;
163 cycle_t time_start; 151 cycle_t time_start;
164 struct task_struct *waiter; 152 struct task_struct *waiter;
165 struct trace_array_cpu *data[NR_CPUS]; 153 struct trace_array_cpu *data[NR_CPUS];
@@ -285,8 +273,8 @@ struct tracer {
285 int (*set_flag)(u32 old_flags, u32 bit, int set); 273 int (*set_flag)(u32 old_flags, u32 bit, int set);
286 struct tracer *next; 274 struct tracer *next;
287 struct tracer_flags *flags; 275 struct tracer_flags *flags;
288 bool print_max; 276 int print_max;
289 bool use_max_tr; 277 int use_max_tr;
290}; 278};
291 279
292 280
@@ -300,8 +288,6 @@ struct tracer {
300/* for function tracing recursion */ 288/* for function tracing recursion */
301#define TRACE_INTERNAL_BIT (1<<11) 289#define TRACE_INTERNAL_BIT (1<<11)
302#define TRACE_GLOBAL_BIT (1<<12) 290#define TRACE_GLOBAL_BIT (1<<12)
303#define TRACE_CONTROL_BIT (1<<13)
304
305/* 291/*
306 * Abuse of the trace_recursion. 292 * Abuse of the trace_recursion.
307 * As we need a way to maintain state if we are tracing the function 293 * As we need a way to maintain state if we are tracing the function
@@ -317,23 +303,16 @@ struct tracer {
317 303
318#define TRACE_PIPE_ALL_CPU -1 304#define TRACE_PIPE_ALL_CPU -1
319 305
320static inline struct ring_buffer_iter *
321trace_buffer_iter(struct trace_iterator *iter, int cpu)
322{
323 if (iter->buffer_iter && iter->buffer_iter[cpu])
324 return iter->buffer_iter[cpu];
325 return NULL;
326}
327
328int tracer_init(struct tracer *t, struct trace_array *tr); 306int tracer_init(struct tracer *t, struct trace_array *tr);
329int tracing_is_enabled(void); 307int tracing_is_enabled(void);
308void trace_wake_up(void);
330void tracing_reset(struct trace_array *tr, int cpu); 309void tracing_reset(struct trace_array *tr, int cpu);
331void tracing_reset_online_cpus(struct trace_array *tr); 310void tracing_reset_online_cpus(struct trace_array *tr);
332void tracing_reset_current(int cpu); 311void tracing_reset_current(int cpu);
333void tracing_reset_current_online_cpus(void); 312void tracing_reset_current_online_cpus(void);
334int tracing_open_generic(struct inode *inode, struct file *filp); 313int tracing_open_generic(struct inode *inode, struct file *filp);
335struct dentry *trace_create_file(const char *name, 314struct dentry *trace_create_file(const char *name,
336 umode_t mode, 315 mode_t mode,
337 struct dentry *parent, 316 struct dentry *parent,
338 void *data, 317 void *data,
339 const struct file_operations *fops); 318 const struct file_operations *fops);
@@ -348,6 +327,9 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,
348 unsigned long len, 327 unsigned long len,
349 unsigned long flags, 328 unsigned long flags,
350 int pc); 329 int pc);
330void trace_buffer_unlock_commit(struct ring_buffer *buffer,
331 struct ring_buffer_event *event,
332 unsigned long flags, int pc);
351 333
352struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 334struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
353 struct trace_array_cpu *data); 335 struct trace_array_cpu *data);
@@ -355,9 +337,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
355struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 337struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
356 int *ent_cpu, u64 *ent_ts); 338 int *ent_cpu, u64 *ent_ts);
357 339
358void __buffer_unlock_commit(struct ring_buffer *buffer,
359 struct ring_buffer_event *event);
360
361int trace_empty(struct trace_iterator *iter); 340int trace_empty(struct trace_iterator *iter);
362 341
363void *trace_find_next_entry_inc(struct trace_iterator *iter); 342void *trace_find_next_entry_inc(struct trace_iterator *iter);
@@ -366,6 +345,7 @@ void trace_init_global_iter(struct trace_iterator *iter);
366 345
367void tracing_iter_reset(struct trace_iterator *iter, int cpu); 346void tracing_iter_reset(struct trace_iterator *iter, int cpu);
368 347
348void default_wait_pipe(struct trace_iterator *iter);
369void poll_wait_pipe(struct trace_iterator *iter); 349void poll_wait_pipe(struct trace_iterator *iter);
370 350
371void ftrace(struct trace_array *tr, 351void ftrace(struct trace_array *tr,
@@ -390,7 +370,6 @@ void trace_graph_function(struct trace_array *tr,
390 unsigned long ip, 370 unsigned long ip,
391 unsigned long parent_ip, 371 unsigned long parent_ip,
392 unsigned long flags, int pc); 372 unsigned long flags, int pc);
393void trace_latency_header(struct seq_file *m);
394void trace_default_header(struct seq_file *m); 373void trace_default_header(struct seq_file *m);
395void print_trace_header(struct seq_file *m, struct trace_iterator *iter); 374void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
396int trace_empty(struct trace_iterator *iter); 375int trace_empty(struct trace_iterator *iter);
@@ -405,7 +384,12 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr);
405void tracing_stop_sched_switch_record(void); 384void tracing_stop_sched_switch_record(void);
406void tracing_start_sched_switch_record(void); 385void tracing_start_sched_switch_record(void);
407int register_tracer(struct tracer *type); 386int register_tracer(struct tracer *type);
387void unregister_tracer(struct tracer *type);
408int is_tracing_stopped(void); 388int is_tracing_stopped(void);
389enum trace_file_type {
390 TRACE_FILE_LAT_FMT = 1,
391 TRACE_FILE_ANNOTATE = 2,
392};
409 393
410extern cpumask_var_t __read_mostly tracing_buffer_mask; 394extern cpumask_var_t __read_mostly tracing_buffer_mask;
411 395
@@ -465,11 +449,11 @@ extern void trace_find_cmdline(int pid, char comm[]);
465 449
466#ifdef CONFIG_DYNAMIC_FTRACE 450#ifdef CONFIG_DYNAMIC_FTRACE
467extern unsigned long ftrace_update_tot_cnt; 451extern unsigned long ftrace_update_tot_cnt;
468#endif
469#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func 452#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
470extern int DYN_FTRACE_TEST_NAME(void); 453extern int DYN_FTRACE_TEST_NAME(void);
471#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 454#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2
472extern int DYN_FTRACE_TEST_NAME2(void); 455extern int DYN_FTRACE_TEST_NAME2(void);
456#endif
473 457
474extern int ring_buffer_expanded; 458extern int ring_buffer_expanded;
475extern bool tracing_selftest_disabled; 459extern bool tracing_selftest_disabled;
@@ -595,17 +579,13 @@ static inline int ftrace_trace_task(struct task_struct *task)
595 579
596 return test_tsk_trace_trace(task); 580 return test_tsk_trace_trace(task);
597} 581}
598extern int ftrace_is_dead(void);
599#else 582#else
600static inline int ftrace_trace_task(struct task_struct *task) 583static inline int ftrace_trace_task(struct task_struct *task)
601{ 584{
602 return 1; 585 return 1;
603} 586}
604static inline int ftrace_is_dead(void) { return 0; }
605#endif 587#endif
606 588
607int ftrace_event_is_function(struct ftrace_event_call *call);
608
609/* 589/*
610 * struct trace_parser - servers for reading the user input separated by spaces 590 * struct trace_parser - servers for reading the user input separated by spaces
611 * @cont: set if the input is not complete - no final space char was found 591 * @cont: set if the input is not complete - no final space char was found
@@ -672,8 +652,6 @@ enum trace_iterator_flags {
672 TRACE_ITER_RECORD_CMD = 0x100000, 652 TRACE_ITER_RECORD_CMD = 0x100000,
673 TRACE_ITER_OVERWRITE = 0x200000, 653 TRACE_ITER_OVERWRITE = 0x200000,
674 TRACE_ITER_STOP_ON_FREE = 0x400000, 654 TRACE_ITER_STOP_ON_FREE = 0x400000,
675 TRACE_ITER_IRQ_INFO = 0x800000,
676 TRACE_ITER_MARKERS = 0x1000000,
677}; 655};
678 656
679/* 657/*
@@ -783,8 +761,16 @@ struct filter_pred {
783 filter_pred_fn_t fn; 761 filter_pred_fn_t fn;
784 u64 val; 762 u64 val;
785 struct regex regex; 763 struct regex regex;
786 unsigned short *ops; 764 /*
787 struct ftrace_event_field *field; 765 * Leaf nodes use field_name, ops is used by AND and OR
766 * nodes. The field_name is always freed when freeing a pred.
767 * We can overload field_name for ops and have it freed
768 * as well.
769 */
770 union {
771 char *field_name;
772 unsigned short *ops;
773 };
788 int offset; 774 int offset;
789 int not; 775 int not;
790 int op; 776 int op;
@@ -833,24 +819,13 @@ extern struct list_head ftrace_events;
833extern const char *__start___trace_bprintk_fmt[]; 819extern const char *__start___trace_bprintk_fmt[];
834extern const char *__stop___trace_bprintk_fmt[]; 820extern const char *__stop___trace_bprintk_fmt[];
835 821
836void trace_printk_init_buffers(void);
837void trace_printk_start_comm(void);
838
839#undef FTRACE_ENTRY 822#undef FTRACE_ENTRY
840#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ 823#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
841 extern struct ftrace_event_call \ 824 extern struct ftrace_event_call \
842 __attribute__((__aligned__(4))) event_##call; 825 __attribute__((__aligned__(4))) event_##call;
843#undef FTRACE_ENTRY_DUP 826#undef FTRACE_ENTRY_DUP
844#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ 827#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
845 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 828 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
846 filter)
847#include "trace_entries.h" 829#include "trace_entries.h"
848 830
849#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER)
850int perf_ftrace_event_register(struct ftrace_event_call *call,
851 enum trace_reg type, void *data);
852#else
853#define perf_ftrace_event_register NULL
854#endif
855
856#endif /* _LINUX_KERNEL_TRACE_H */ 831#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 95e96842ed2..8d3538b4ea5 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -77,7 +77,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
77 entry->correct = val == expect; 77 entry->correct = val == expect;
78 78
79 if (!filter_check_discard(call, entry, buffer, event)) 79 if (!filter_check_discard(call, entry, buffer, event))
80 __buffer_unlock_commit(buffer, event); 80 ring_buffer_unlock_commit(buffer, event);
81 81
82 out: 82 out:
83 atomic_dec(&tr->data[cpu]->disabled); 83 atomic_dec(&tr->data[cpu]->disabled);
@@ -199,7 +199,7 @@ __init static int init_branch_tracer(void)
199 } 199 }
200 return register_tracer(&branch_trace); 200 return register_tracer(&branch_trace);
201} 201}
202core_initcall(init_branch_tracer); 202device_initcall(init_branch_tracer);
203 203
204#else 204#else
205static inline 205static inline
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 394783531cb..6302747a139 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -113,15 +113,3 @@ u64 notrace trace_clock_global(void)
113 113
114 return now; 114 return now;
115} 115}
116
117static atomic64_t trace_counter;
118
119/*
120 * trace_clock_counter(): simply an atomic counter.
121 * Use the trace_counter "counter" for cases where you do not care
122 * about timings, but are interested in strict ordering.
123 */
124u64 notrace trace_clock_counter(void)
125{
126 return atomic64_add_return(1, &trace_counter);
127}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 4108e1250ca..93365907f21 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -55,7 +55,7 @@
55/* 55/*
56 * Function trace entry - function address and parent function address: 56 * Function trace entry - function address and parent function address:
57 */ 57 */
58FTRACE_ENTRY_REG(function, ftrace_entry, 58FTRACE_ENTRY(function, ftrace_entry,
59 59
60 TRACE_FN, 60 TRACE_FN,
61 61
@@ -64,11 +64,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
64 __field( unsigned long, parent_ip ) 64 __field( unsigned long, parent_ip )
65 ), 65 ),
66 66
67 F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip), 67 F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip)
68
69 FILTER_TRACE_FN,
70
71 perf_ftrace_event_register
72); 68);
73 69
74/* Function call entry */ 70/* Function call entry */
@@ -82,9 +78,7 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
82 __field_desc( int, graph_ent, depth ) 78 __field_desc( int, graph_ent, depth )
83 ), 79 ),
84 80
85 F_printk("--> %lx (%d)", __entry->func, __entry->depth), 81 F_printk("--> %lx (%d)", __entry->func, __entry->depth)
86
87 FILTER_OTHER
88); 82);
89 83
90/* Function return entry */ 84/* Function return entry */
@@ -104,9 +98,7 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
104 F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", 98 F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
105 __entry->func, __entry->depth, 99 __entry->func, __entry->depth,
106 __entry->calltime, __entry->rettime, 100 __entry->calltime, __entry->rettime,
107 __entry->depth), 101 __entry->depth)
108
109 FILTER_OTHER
110); 102);
111 103
112/* 104/*
@@ -135,9 +127,8 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry,
135 F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", 127 F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
136 __entry->prev_pid, __entry->prev_prio, __entry->prev_state, 128 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
137 __entry->next_pid, __entry->next_prio, __entry->next_state, 129 __entry->next_pid, __entry->next_prio, __entry->next_state,
138 __entry->next_cpu), 130 __entry->next_cpu
139 131 )
140 FILTER_OTHER
141); 132);
142 133
143/* 134/*
@@ -155,9 +146,8 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
155 F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", 146 F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
156 __entry->prev_pid, __entry->prev_prio, __entry->prev_state, 147 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
157 __entry->next_pid, __entry->next_prio, __entry->next_state, 148 __entry->next_pid, __entry->next_prio, __entry->next_state,
158 __entry->next_cpu), 149 __entry->next_cpu
159 150 )
160 FILTER_OTHER
161); 151);
162 152
163/* 153/*
@@ -166,12 +156,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
166 156
167#define FTRACE_STACK_ENTRIES 8 157#define FTRACE_STACK_ENTRIES 8
168 158
169#ifndef CONFIG_64BIT
170# define IP_FMT "%08lx"
171#else
172# define IP_FMT "%016lx"
173#endif
174
175FTRACE_ENTRY(kernel_stack, stack_entry, 159FTRACE_ENTRY(kernel_stack, stack_entry,
176 160
177 TRACE_STACK, 161 TRACE_STACK,
@@ -181,14 +165,11 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
181 __dynamic_array(unsigned long, caller ) 165 __dynamic_array(unsigned long, caller )
182 ), 166 ),
183 167
184 F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" 168 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
185 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" 169 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
186 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
187 __entry->caller[0], __entry->caller[1], __entry->caller[2], 170 __entry->caller[0], __entry->caller[1], __entry->caller[2],
188 __entry->caller[3], __entry->caller[4], __entry->caller[5], 171 __entry->caller[3], __entry->caller[4], __entry->caller[5],
189 __entry->caller[6], __entry->caller[7]), 172 __entry->caller[6], __entry->caller[7])
190
191 FILTER_OTHER
192); 173);
193 174
194FTRACE_ENTRY(user_stack, userstack_entry, 175FTRACE_ENTRY(user_stack, userstack_entry,
@@ -200,14 +181,11 @@ FTRACE_ENTRY(user_stack, userstack_entry,
200 __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) 181 __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
201 ), 182 ),
202 183
203 F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" 184 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
204 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" 185 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
205 "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
206 __entry->caller[0], __entry->caller[1], __entry->caller[2], 186 __entry->caller[0], __entry->caller[1], __entry->caller[2],
207 __entry->caller[3], __entry->caller[4], __entry->caller[5], 187 __entry->caller[3], __entry->caller[4], __entry->caller[5],
208 __entry->caller[6], __entry->caller[7]), 188 __entry->caller[6], __entry->caller[7])
209
210 FILTER_OTHER
211); 189);
212 190
213/* 191/*
@@ -224,9 +202,7 @@ FTRACE_ENTRY(bprint, bprint_entry,
224 ), 202 ),
225 203
226 F_printk("%08lx fmt:%p", 204 F_printk("%08lx fmt:%p",
227 __entry->ip, __entry->fmt), 205 __entry->ip, __entry->fmt)
228
229 FILTER_OTHER
230); 206);
231 207
232FTRACE_ENTRY(print, print_entry, 208FTRACE_ENTRY(print, print_entry,
@@ -239,9 +215,7 @@ FTRACE_ENTRY(print, print_entry,
239 ), 215 ),
240 216
241 F_printk("%08lx %s", 217 F_printk("%08lx %s",
242 __entry->ip, __entry->buf), 218 __entry->ip, __entry->buf)
243
244 FILTER_OTHER
245); 219);
246 220
247FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, 221FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
@@ -260,9 +234,7 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
260 234
261 F_printk("%lx %lx %lx %d %x %x", 235 F_printk("%lx %lx %lx %d %x %x",
262 (unsigned long)__entry->phys, __entry->value, __entry->pc, 236 (unsigned long)__entry->phys, __entry->value, __entry->pc,
263 __entry->map_id, __entry->opcode, __entry->width), 237 __entry->map_id, __entry->opcode, __entry->width)
264
265 FILTER_OTHER
266); 238);
267 239
268FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, 240FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
@@ -280,9 +252,7 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
280 252
281 F_printk("%lx %lx %lx %d %x", 253 F_printk("%lx %lx %lx %d %x",
282 (unsigned long)__entry->phys, __entry->virt, __entry->len, 254 (unsigned long)__entry->phys, __entry->virt, __entry->len,
283 __entry->map_id, __entry->opcode), 255 __entry->map_id, __entry->opcode)
284
285 FILTER_OTHER
286); 256);
287 257
288 258
@@ -302,8 +272,6 @@ FTRACE_ENTRY(branch, trace_branch,
302 272
303 F_printk("%u:%s:%s (%u)", 273 F_printk("%u:%s:%s (%u)",
304 __entry->line, 274 __entry->line,
305 __entry->func, __entry->file, __entry->correct), 275 __entry->func, __entry->file, __entry->correct)
306
307 FILTER_OTHER
308); 276);
309 277
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 84b1e045fab..19a359d5e6d 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -24,11 +24,6 @@ static int total_ref_count;
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event, 24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 25 struct perf_event *p_event)
26{ 26{
27 /* The ftrace function trace is allowed only for root. */
28 if (ftrace_event_is_function(tp_event) &&
29 perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
30 return -EPERM;
31
32 /* No tracing, just counting, so no obvious leak */ 27 /* No tracing, just counting, so no obvious leak */
33 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) 28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
34 return 0; 29 return 0;
@@ -49,17 +44,23 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
49 return 0; 44 return 0;
50} 45}
51 46
52static int perf_trace_event_reg(struct ftrace_event_call *tp_event, 47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
53 struct perf_event *p_event) 48 struct perf_event *p_event)
54{ 49{
55 struct hlist_head __percpu *list; 50 struct hlist_head __percpu *list;
56 int ret = -ENOMEM; 51 int ret;
57 int cpu; 52 int cpu;
58 53
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
59 p_event->tp_event = tp_event; 58 p_event->tp_event = tp_event;
60 if (tp_event->perf_refcount++ > 0) 59 if (tp_event->perf_refcount++ > 0)
61 return 0; 60 return 0;
62 61
62 ret = -ENOMEM;
63
63 list = alloc_percpu(struct hlist_head); 64 list = alloc_percpu(struct hlist_head);
64 if (!list) 65 if (!list)
65 goto fail; 66 goto fail;
@@ -82,7 +83,7 @@ static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
82 } 83 }
83 } 84 }
84 85
85 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL); 86 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
86 if (ret) 87 if (ret)
87 goto fail; 88 goto fail;
88 89
@@ -107,69 +108,6 @@ fail:
107 return ret; 108 return ret;
108} 109}
109 110
110static void perf_trace_event_unreg(struct perf_event *p_event)
111{
112 struct ftrace_event_call *tp_event = p_event->tp_event;
113 int i;
114
115 if (--tp_event->perf_refcount > 0)
116 goto out;
117
118 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
119
120 /*
121 * Ensure our callback won't be called anymore. The buffers
122 * will be freed after that.
123 */
124 tracepoint_synchronize_unregister();
125
126 free_percpu(tp_event->perf_events);
127 tp_event->perf_events = NULL;
128
129 if (!--total_ref_count) {
130 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
131 free_percpu(perf_trace_buf[i]);
132 perf_trace_buf[i] = NULL;
133 }
134 }
135out:
136 module_put(tp_event->mod);
137}
138
139static int perf_trace_event_open(struct perf_event *p_event)
140{
141 struct ftrace_event_call *tp_event = p_event->tp_event;
142 return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
143}
144
145static void perf_trace_event_close(struct perf_event *p_event)
146{
147 struct ftrace_event_call *tp_event = p_event->tp_event;
148 tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
149}
150
151static int perf_trace_event_init(struct ftrace_event_call *tp_event,
152 struct perf_event *p_event)
153{
154 int ret;
155
156 ret = perf_trace_event_perm(tp_event, p_event);
157 if (ret)
158 return ret;
159
160 ret = perf_trace_event_reg(tp_event, p_event);
161 if (ret)
162 return ret;
163
164 ret = perf_trace_event_open(p_event);
165 if (ret) {
166 perf_trace_event_unreg(p_event);
167 return ret;
168 }
169
170 return 0;
171}
172
173int perf_trace_init(struct perf_event *p_event) 111int perf_trace_init(struct perf_event *p_event)
174{ 112{
175 struct ftrace_event_call *tp_event; 113 struct ftrace_event_call *tp_event;
@@ -192,14 +130,6 @@ int perf_trace_init(struct perf_event *p_event)
192 return ret; 130 return ret;
193} 131}
194 132
195void perf_trace_destroy(struct perf_event *p_event)
196{
197 mutex_lock(&event_mutex);
198 perf_trace_event_close(p_event);
199 perf_trace_event_unreg(p_event);
200 mutex_unlock(&event_mutex);
201}
202
203int perf_trace_add(struct perf_event *p_event, int flags) 133int perf_trace_add(struct perf_event *p_event, int flags)
204{ 134{
205 struct ftrace_event_call *tp_event = p_event->tp_event; 135 struct ftrace_event_call *tp_event = p_event->tp_event;
@@ -216,14 +146,43 @@ int perf_trace_add(struct perf_event *p_event, int flags)
216 list = this_cpu_ptr(pcpu_list); 146 list = this_cpu_ptr(pcpu_list);
217 hlist_add_head_rcu(&p_event->hlist_entry, list); 147 hlist_add_head_rcu(&p_event->hlist_entry, list);
218 148
219 return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); 149 return 0;
220} 150}
221 151
222void perf_trace_del(struct perf_event *p_event, int flags) 152void perf_trace_del(struct perf_event *p_event, int flags)
223{ 153{
224 struct ftrace_event_call *tp_event = p_event->tp_event;
225 hlist_del_rcu(&p_event->hlist_entry); 154 hlist_del_rcu(&p_event->hlist_entry);
226 tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); 155}
156
157void perf_trace_destroy(struct perf_event *p_event)
158{
159 struct ftrace_event_call *tp_event = p_event->tp_event;
160 int i;
161
162 mutex_lock(&event_mutex);
163 if (--tp_event->perf_refcount > 0)
164 goto out;
165
166 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
167
168 /*
169 * Ensure our callback won't be called anymore. The buffers
170 * will be freed after that.
171 */
172 tracepoint_synchronize_unregister();
173
174 free_percpu(tp_event->perf_events);
175 tp_event->perf_events = NULL;
176
177 if (!--total_ref_count) {
178 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
179 free_percpu(perf_trace_buf[i]);
180 perf_trace_buf[i] = NULL;
181 }
182 }
183out:
184 module_put(tp_event->mod);
185 mutex_unlock(&event_mutex);
227} 186}
228 187
229__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 188__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -255,87 +214,3 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
255 return raw_data; 214 return raw_data;
256} 215}
257EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); 216EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
258
259#ifdef CONFIG_FUNCTION_TRACER
260static void
261perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
262 struct ftrace_ops *ops, struct pt_regs *pt_regs)
263{
264 struct ftrace_entry *entry;
265 struct hlist_head *head;
266 struct pt_regs regs;
267 int rctx;
268
269#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
270 sizeof(u64)) - sizeof(u32))
271
272 BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
273
274 perf_fetch_caller_regs(&regs);
275
276 entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
277 if (!entry)
278 return;
279
280 entry->ip = ip;
281 entry->parent_ip = parent_ip;
282
283 head = this_cpu_ptr(event_function.perf_events);
284 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
285 1, &regs, head, NULL);
286
287#undef ENTRY_SIZE
288}
289
290static int perf_ftrace_function_register(struct perf_event *event)
291{
292 struct ftrace_ops *ops = &event->ftrace_ops;
293
294 ops->flags |= FTRACE_OPS_FL_CONTROL;
295 ops->func = perf_ftrace_function_call;
296 return register_ftrace_function(ops);
297}
298
299static int perf_ftrace_function_unregister(struct perf_event *event)
300{
301 struct ftrace_ops *ops = &event->ftrace_ops;
302 int ret = unregister_ftrace_function(ops);
303 ftrace_free_filter(ops);
304 return ret;
305}
306
307static void perf_ftrace_function_enable(struct perf_event *event)
308{
309 ftrace_function_local_enable(&event->ftrace_ops);
310}
311
312static void perf_ftrace_function_disable(struct perf_event *event)
313{
314 ftrace_function_local_disable(&event->ftrace_ops);
315}
316
317int perf_ftrace_event_register(struct ftrace_event_call *call,
318 enum trace_reg type, void *data)
319{
320 switch (type) {
321 case TRACE_REG_REGISTER:
322 case TRACE_REG_UNREGISTER:
323 break;
324 case TRACE_REG_PERF_REGISTER:
325 case TRACE_REG_PERF_UNREGISTER:
326 return 0;
327 case TRACE_REG_PERF_OPEN:
328 return perf_ftrace_function_register(data);
329 case TRACE_REG_PERF_CLOSE:
330 return perf_ftrace_function_unregister(data);
331 case TRACE_REG_PERF_ADD:
332 perf_ftrace_function_enable(data);
333 return 0;
334 case TRACE_REG_PERF_DEL:
335 perf_ftrace_function_disable(data);
336 return 0;
337 }
338
339 return -EINVAL;
340}
341#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 880073d0b94..c212a7f934e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -147,8 +147,7 @@ int trace_event_raw_init(struct ftrace_event_call *call)
147} 147}
148EXPORT_SYMBOL_GPL(trace_event_raw_init); 148EXPORT_SYMBOL_GPL(trace_event_raw_init);
149 149
150int ftrace_event_reg(struct ftrace_event_call *call, 150int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
151 enum trace_reg type, void *data)
152{ 151{
153 switch (type) { 152 switch (type) {
154 case TRACE_REG_REGISTER: 153 case TRACE_REG_REGISTER:
@@ -171,11 +170,6 @@ int ftrace_event_reg(struct ftrace_event_call *call,
171 call->class->perf_probe, 170 call->class->perf_probe,
172 call); 171 call);
173 return 0; 172 return 0;
174 case TRACE_REG_PERF_OPEN:
175 case TRACE_REG_PERF_CLOSE:
176 case TRACE_REG_PERF_ADD:
177 case TRACE_REG_PERF_DEL:
178 return 0;
179#endif 173#endif
180 } 174 }
181 return 0; 175 return 0;
@@ -215,7 +209,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
215 tracing_stop_cmdline_record(); 209 tracing_stop_cmdline_record();
216 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; 210 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
217 } 211 }
218 call->class->reg(call, TRACE_REG_UNREGISTER, NULL); 212 call->class->reg(call, TRACE_REG_UNREGISTER);
219 } 213 }
220 break; 214 break;
221 case 1: 215 case 1:
@@ -224,7 +218,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
224 tracing_start_cmdline_record(); 218 tracing_start_cmdline_record();
225 call->flags |= TRACE_EVENT_FL_RECORDED_CMD; 219 call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
226 } 220 }
227 ret = call->class->reg(call, TRACE_REG_REGISTER, NULL); 221 ret = call->class->reg(call, TRACE_REG_REGISTER);
228 if (ret) { 222 if (ret) {
229 tracing_stop_cmdline_record(); 223 tracing_stop_cmdline_record();
230 pr_info("event trace: Could not enable event " 224 pr_info("event trace: Could not enable event "
@@ -294,9 +288,6 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
294 if (!call->name || !call->class || !call->class->reg) 288 if (!call->name || !call->class || !call->class->reg)
295 continue; 289 continue;
296 290
297 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
298 continue;
299
300 if (match && 291 if (match &&
301 strcmp(match, call->name) != 0 && 292 strcmp(match, call->name) != 0 &&
302 strcmp(match, call->class->system) != 0) 293 strcmp(match, call->class->system) != 0)
@@ -491,6 +482,19 @@ static void t_stop(struct seq_file *m, void *p)
491 mutex_unlock(&event_mutex); 482 mutex_unlock(&event_mutex);
492} 483}
493 484
485static int
486ftrace_event_seq_open(struct inode *inode, struct file *file)
487{
488 const struct seq_operations *seq_ops;
489
490 if ((file->f_mode & FMODE_WRITE) &&
491 (file->f_flags & O_TRUNC))
492 ftrace_clear_events();
493
494 seq_ops = inode->i_private;
495 return seq_open(file, seq_ops);
496}
497
494static ssize_t 498static ssize_t
495event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 499event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
496 loff_t *ppos) 500 loff_t *ppos)
@@ -967,9 +971,6 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
967 return r; 971 return r;
968} 972}
969 973
970static int ftrace_event_avail_open(struct inode *inode, struct file *file);
971static int ftrace_event_set_open(struct inode *inode, struct file *file);
972
973static const struct seq_operations show_event_seq_ops = { 974static const struct seq_operations show_event_seq_ops = {
974 .start = t_start, 975 .start = t_start,
975 .next = t_next, 976 .next = t_next,
@@ -985,14 +986,14 @@ static const struct seq_operations show_set_event_seq_ops = {
985}; 986};
986 987
987static const struct file_operations ftrace_avail_fops = { 988static const struct file_operations ftrace_avail_fops = {
988 .open = ftrace_event_avail_open, 989 .open = ftrace_event_seq_open,
989 .read = seq_read, 990 .read = seq_read,
990 .llseek = seq_lseek, 991 .llseek = seq_lseek,
991 .release = seq_release, 992 .release = seq_release,
992}; 993};
993 994
994static const struct file_operations ftrace_set_event_fops = { 995static const struct file_operations ftrace_set_event_fops = {
995 .open = ftrace_event_set_open, 996 .open = ftrace_event_seq_open,
996 .read = seq_read, 997 .read = seq_read,
997 .write = ftrace_event_write, 998 .write = ftrace_event_write,
998 .llseek = seq_lseek, 999 .llseek = seq_lseek,
@@ -1068,26 +1069,6 @@ static struct dentry *event_trace_events_dir(void)
1068 return d_events; 1069 return d_events;
1069} 1070}
1070 1071
1071static int
1072ftrace_event_avail_open(struct inode *inode, struct file *file)
1073{
1074 const struct seq_operations *seq_ops = &show_event_seq_ops;
1075
1076 return seq_open(file, seq_ops);
1077}
1078
1079static int
1080ftrace_event_set_open(struct inode *inode, struct file *file)
1081{
1082 const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1083
1084 if ((file->f_mode & FMODE_WRITE) &&
1085 (file->f_flags & O_TRUNC))
1086 ftrace_clear_events();
1087
1088 return seq_open(file, seq_ops);
1089}
1090
1091static struct dentry * 1072static struct dentry *
1092event_subsystem_dir(const char *name, struct dentry *d_events) 1073event_subsystem_dir(const char *name, struct dentry *d_events)
1093{ 1074{
@@ -1177,7 +1158,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
1177 return -1; 1158 return -1;
1178 } 1159 }
1179 1160
1180 if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) 1161 if (call->class->reg)
1181 trace_create_file("enable", 0644, call->dir, call, 1162 trace_create_file("enable", 0644, call->dir, call,
1182 enable); 1163 enable);
1183 1164
@@ -1209,31 +1190,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
1209 return 0; 1190 return 0;
1210} 1191}
1211 1192
1212static void event_remove(struct ftrace_event_call *call)
1213{
1214 ftrace_event_enable_disable(call, 0);
1215 if (call->event.funcs)
1216 __unregister_ftrace_event(&call->event);
1217 list_del(&call->list);
1218}
1219
1220static int event_init(struct ftrace_event_call *call)
1221{
1222 int ret = 0;
1223
1224 if (WARN_ON(!call->name))
1225 return -EINVAL;
1226
1227 if (call->class->raw_init) {
1228 ret = call->class->raw_init(call);
1229 if (ret < 0 && ret != -ENOSYS)
1230 pr_warn("Could not initialize trace events/%s\n",
1231 call->name);
1232 }
1233
1234 return ret;
1235}
1236
1237static int 1193static int
1238__trace_add_event_call(struct ftrace_event_call *call, struct module *mod, 1194__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
1239 const struct file_operations *id, 1195 const struct file_operations *id,
@@ -1244,9 +1200,19 @@ __trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
1244 struct dentry *d_events; 1200 struct dentry *d_events;
1245 int ret; 1201 int ret;
1246 1202
1247 ret = event_init(call); 1203 /* The linker may leave blanks */
1248 if (ret < 0) 1204 if (!call->name)
1249 return ret; 1205 return -EINVAL;
1206
1207 if (call->class->raw_init) {
1208 ret = call->class->raw_init(call);
1209 if (ret < 0) {
1210 if (ret != -ENOSYS)
1211 pr_warning("Could not initialize trace events/%s\n",
1212 call->name);
1213 return ret;
1214 }
1215 }
1250 1216
1251 d_events = event_trace_events_dir(); 1217 d_events = event_trace_events_dir();
1252 if (!d_events) 1218 if (!d_events)
@@ -1297,10 +1263,13 @@ static void remove_subsystem_dir(const char *name)
1297 */ 1263 */
1298static void __trace_remove_event_call(struct ftrace_event_call *call) 1264static void __trace_remove_event_call(struct ftrace_event_call *call)
1299{ 1265{
1300 event_remove(call); 1266 ftrace_event_enable_disable(call, 0);
1267 if (call->event.funcs)
1268 __unregister_ftrace_event(&call->event);
1269 debugfs_remove_recursive(call->dir);
1270 list_del(&call->list);
1301 trace_destroy_fields(call); 1271 trace_destroy_fields(call);
1302 destroy_preds(call); 1272 destroy_preds(call);
1303 debugfs_remove_recursive(call->dir);
1304 remove_subsystem_dir(call->class->system); 1273 remove_subsystem_dir(call->class->system);
1305} 1274}
1306 1275
@@ -1472,59 +1441,30 @@ static __init int setup_trace_event(char *str)
1472} 1441}
1473__setup("trace_event=", setup_trace_event); 1442__setup("trace_event=", setup_trace_event);
1474 1443
1475static __init int event_trace_enable(void)
1476{
1477 struct ftrace_event_call **iter, *call;
1478 char *buf = bootup_event_buf;
1479 char *token;
1480 int ret;
1481
1482 for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
1483
1484 call = *iter;
1485 ret = event_init(call);
1486 if (!ret)
1487 list_add(&call->list, &ftrace_events);
1488 }
1489
1490 while (true) {
1491 token = strsep(&buf, ",");
1492
1493 if (!token)
1494 break;
1495 if (!*token)
1496 continue;
1497
1498 ret = ftrace_set_clr_event(token, 1);
1499 if (ret)
1500 pr_warn("Failed to enable trace event: %s\n", token);
1501 }
1502
1503 trace_printk_start_comm();
1504
1505 return 0;
1506}
1507
1508static __init int event_trace_init(void) 1444static __init int event_trace_init(void)
1509{ 1445{
1510 struct ftrace_event_call *call; 1446 struct ftrace_event_call **call;
1511 struct dentry *d_tracer; 1447 struct dentry *d_tracer;
1512 struct dentry *entry; 1448 struct dentry *entry;
1513 struct dentry *d_events; 1449 struct dentry *d_events;
1514 int ret; 1450 int ret;
1451 char *buf = bootup_event_buf;
1452 char *token;
1515 1453
1516 d_tracer = tracing_init_dentry(); 1454 d_tracer = tracing_init_dentry();
1517 if (!d_tracer) 1455 if (!d_tracer)
1518 return 0; 1456 return 0;
1519 1457
1520 entry = debugfs_create_file("available_events", 0444, d_tracer, 1458 entry = debugfs_create_file("available_events", 0444, d_tracer,
1521 NULL, &ftrace_avail_fops); 1459 (void *)&show_event_seq_ops,
1460 &ftrace_avail_fops);
1522 if (!entry) 1461 if (!entry)
1523 pr_warning("Could not create debugfs " 1462 pr_warning("Could not create debugfs "
1524 "'available_events' entry\n"); 1463 "'available_events' entry\n");
1525 1464
1526 entry = debugfs_create_file("set_event", 0644, d_tracer, 1465 entry = debugfs_create_file("set_event", 0644, d_tracer,
1527 NULL, &ftrace_set_event_fops); 1466 (void *)&show_set_event_seq_ops,
1467 &ftrace_set_event_fops);
1528 if (!entry) 1468 if (!entry)
1529 pr_warning("Could not create debugfs " 1469 pr_warning("Could not create debugfs "
1530 "'set_event' entry\n"); 1470 "'set_event' entry\n");
@@ -1548,19 +1488,24 @@ static __init int event_trace_init(void)
1548 if (trace_define_common_fields()) 1488 if (trace_define_common_fields())
1549 pr_warning("tracing: Failed to allocate common fields"); 1489 pr_warning("tracing: Failed to allocate common fields");
1550 1490
1551 /* 1491 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1552 * Early initialization already enabled ftrace event. 1492 __trace_add_event_call(*call, NULL, &ftrace_event_id_fops,
1553 * Now it's only necessary to create the event directory.
1554 */
1555 list_for_each_entry(call, &ftrace_events, list) {
1556
1557 ret = event_create_dir(call, d_events,
1558 &ftrace_event_id_fops,
1559 &ftrace_enable_fops, 1493 &ftrace_enable_fops,
1560 &ftrace_event_filter_fops, 1494 &ftrace_event_filter_fops,
1561 &ftrace_event_format_fops); 1495 &ftrace_event_format_fops);
1562 if (ret < 0) 1496 }
1563 event_remove(call); 1497
1498 while (true) {
1499 token = strsep(&buf, ",");
1500
1501 if (!token)
1502 break;
1503 if (!*token)
1504 continue;
1505
1506 ret = ftrace_set_clr_event(token, 1);
1507 if (ret)
1508 pr_warning("Failed to enable trace event: %s\n", token);
1564 } 1509 }
1565 1510
1566 ret = register_module_notifier(&trace_module_nb); 1511 ret = register_module_notifier(&trace_module_nb);
@@ -1569,7 +1514,6 @@ static __init int event_trace_init(void)
1569 1514
1570 return 0; 1515 return 0;
1571} 1516}
1572core_initcall(event_trace_enable);
1573fs_initcall(event_trace_init); 1517fs_initcall(event_trace_init);
1574 1518
1575#ifdef CONFIG_FTRACE_STARTUP_TEST 1519#ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -1693,11 +1637,9 @@ static __init void event_trace_self_tests(void)
1693 event_test_stuff(); 1637 event_test_stuff();
1694 1638
1695 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0); 1639 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1696 if (WARN_ON_ONCE(ret)) { 1640 if (WARN_ON_ONCE(ret))
1697 pr_warning("error disabling system %s\n", 1641 pr_warning("error disabling system %s\n",
1698 system->name); 1642 system->name);
1699 continue;
1700 }
1701 1643
1702 pr_cont("OK\n"); 1644 pr_cont("OK\n");
1703 } 1645 }
@@ -1730,8 +1672,7 @@ static __init void event_trace_self_tests(void)
1730static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); 1672static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1731 1673
1732static void 1674static void
1733function_test_events_call(unsigned long ip, unsigned long parent_ip, 1675function_test_events_call(unsigned long ip, unsigned long parent_ip)
1734 struct ftrace_ops *op, struct pt_regs *pt_regs)
1735{ 1676{
1736 struct ring_buffer_event *event; 1677 struct ring_buffer_event *event;
1737 struct ring_buffer *buffer; 1678 struct ring_buffer *buffer;
@@ -1760,7 +1701,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
1760 entry->ip = ip; 1701 entry->ip = ip;
1761 entry->parent_ip = parent_ip; 1702 entry->parent_ip = parent_ip;
1762 1703
1763 trace_buffer_unlock_commit(buffer, event, flags, pc); 1704 trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1764 1705
1765 out: 1706 out:
1766 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); 1707 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
@@ -1770,7 +1711,6 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
1770static struct ftrace_ops trace_ops __initdata = 1711static struct ftrace_ops trace_ops __initdata =
1771{ 1712{
1772 .func = function_test_events_call, 1713 .func = function_test_events_call,
1773 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
1774}; 1714};
1775 1715
1776static __init void event_trace_self_test_with_function(void) 1716static __init void event_trace_self_test_with_function(void)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e5b0ca8b8d4..bd3c6369f80 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -27,12 +27,6 @@
27#include "trace.h" 27#include "trace.h"
28#include "trace_output.h" 28#include "trace_output.h"
29 29
30#define DEFAULT_SYS_FILTER_MESSAGE \
31 "### global filter ###\n" \
32 "# Use this to set filters for multiple events.\n" \
33 "# Only events with the given fields will be affected.\n" \
34 "# If no events are modified, an error message will be displayed here"
35
36enum filter_op_ids 30enum filter_op_ids
37{ 31{
38 OP_OR, 32 OP_OR,
@@ -81,7 +75,6 @@ enum {
81 FILT_ERR_TOO_MANY_PREDS, 75 FILT_ERR_TOO_MANY_PREDS,
82 FILT_ERR_MISSING_FIELD, 76 FILT_ERR_MISSING_FIELD,
83 FILT_ERR_INVALID_FILTER, 77 FILT_ERR_INVALID_FILTER,
84 FILT_ERR_IP_FIELD_ONLY,
85}; 78};
86 79
87static char *err_text[] = { 80static char *err_text[] = {
@@ -97,7 +90,6 @@ static char *err_text[] = {
97 "Too many terms in predicate expression", 90 "Too many terms in predicate expression",
98 "Missing field name and/or value", 91 "Missing field name and/or value",
99 "Meaningless filter expression", 92 "Meaningless filter expression",
100 "Only 'ip' field is supported for function trace",
101}; 93};
102 94
103struct opstack_op { 95struct opstack_op {
@@ -389,63 +381,6 @@ get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
389 return pred; 381 return pred;
390} 382}
391 383
392enum walk_return {
393 WALK_PRED_ABORT,
394 WALK_PRED_PARENT,
395 WALK_PRED_DEFAULT,
396};
397
398typedef int (*filter_pred_walkcb_t) (enum move_type move,
399 struct filter_pred *pred,
400 int *err, void *data);
401
402static int walk_pred_tree(struct filter_pred *preds,
403 struct filter_pred *root,
404 filter_pred_walkcb_t cb, void *data)
405{
406 struct filter_pred *pred = root;
407 enum move_type move = MOVE_DOWN;
408 int done = 0;
409
410 if (!preds)
411 return -EINVAL;
412
413 do {
414 int err = 0, ret;
415
416 ret = cb(move, pred, &err, data);
417 if (ret == WALK_PRED_ABORT)
418 return err;
419 if (ret == WALK_PRED_PARENT)
420 goto get_parent;
421
422 switch (move) {
423 case MOVE_DOWN:
424 if (pred->left != FILTER_PRED_INVALID) {
425 pred = &preds[pred->left];
426 continue;
427 }
428 goto get_parent;
429 case MOVE_UP_FROM_LEFT:
430 pred = &preds[pred->right];
431 move = MOVE_DOWN;
432 continue;
433 case MOVE_UP_FROM_RIGHT:
434 get_parent:
435 if (pred == root)
436 break;
437 pred = get_pred_parent(pred, preds,
438 pred->parent,
439 &move);
440 continue;
441 }
442 done = 1;
443 } while (!done);
444
445 /* We are fine. */
446 return 0;
447}
448
449/* 384/*
450 * A series of AND or ORs where found together. Instead of 385 * A series of AND or ORs where found together. Instead of
451 * climbing up and down the tree branches, an array of the 386 * climbing up and down the tree branches, an array of the
@@ -475,91 +410,99 @@ static int process_ops(struct filter_pred *preds,
475 410
476 for (i = 0; i < op->val; i++) { 411 for (i = 0; i < op->val; i++) {
477 pred = &preds[op->ops[i]]; 412 pred = &preds[op->ops[i]];
478 if (!WARN_ON_ONCE(!pred->fn)) 413 match = pred->fn(pred, rec);
479 match = pred->fn(pred, rec);
480 if (!!match == type) 414 if (!!match == type)
481 return match; 415 return match;
482 } 416 }
483 return match; 417 return match;
484} 418}
485 419
486struct filter_match_preds_data {
487 struct filter_pred *preds;
488 int match;
489 void *rec;
490};
491
492static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred,
493 int *err, void *data)
494{
495 struct filter_match_preds_data *d = data;
496
497 *err = 0;
498 switch (move) {
499 case MOVE_DOWN:
500 /* only AND and OR have children */
501 if (pred->left != FILTER_PRED_INVALID) {
502 /* If ops is set, then it was folded. */
503 if (!pred->ops)
504 return WALK_PRED_DEFAULT;
505 /* We can treat folded ops as a leaf node */
506 d->match = process_ops(d->preds, pred, d->rec);
507 } else {
508 if (!WARN_ON_ONCE(!pred->fn))
509 d->match = pred->fn(pred, d->rec);
510 }
511
512 return WALK_PRED_PARENT;
513 case MOVE_UP_FROM_LEFT:
514 /*
515 * Check for short circuits.
516 *
517 * Optimization: !!match == (pred->op == OP_OR)
518 * is the same as:
519 * if ((match && pred->op == OP_OR) ||
520 * (!match && pred->op == OP_AND))
521 */
522 if (!!d->match == (pred->op == OP_OR))
523 return WALK_PRED_PARENT;
524 break;
525 case MOVE_UP_FROM_RIGHT:
526 break;
527 }
528
529 return WALK_PRED_DEFAULT;
530}
531
532/* return 1 if event matches, 0 otherwise (discard) */ 420/* return 1 if event matches, 0 otherwise (discard) */
533int filter_match_preds(struct event_filter *filter, void *rec) 421int filter_match_preds(struct event_filter *filter, void *rec)
534{ 422{
423 int match = -1;
424 enum move_type move = MOVE_DOWN;
535 struct filter_pred *preds; 425 struct filter_pred *preds;
426 struct filter_pred *pred;
536 struct filter_pred *root; 427 struct filter_pred *root;
537 struct filter_match_preds_data data = { 428 int n_preds;
538 /* match is currently meaningless */ 429 int done = 0;
539 .match = -1,
540 .rec = rec,
541 };
542 int n_preds, ret;
543 430
544 /* no filter is considered a match */ 431 /* no filter is considered a match */
545 if (!filter) 432 if (!filter)
546 return 1; 433 return 1;
547 434
548 n_preds = filter->n_preds; 435 n_preds = filter->n_preds;
436
549 if (!n_preds) 437 if (!n_preds)
550 return 1; 438 return 1;
551 439
552 /* 440 /*
553 * n_preds, root and filter->preds are protect with preemption disabled. 441 * n_preds, root and filter->preds are protect with preemption disabled.
554 */ 442 */
443 preds = rcu_dereference_sched(filter->preds);
555 root = rcu_dereference_sched(filter->root); 444 root = rcu_dereference_sched(filter->root);
556 if (!root) 445 if (!root)
557 return 1; 446 return 1;
558 447
559 data.preds = preds = rcu_dereference_sched(filter->preds); 448 pred = root;
560 ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data); 449
561 WARN_ON(ret); 450 /* match is currently meaningless */
562 return data.match; 451 match = -1;
452
453 do {
454 switch (move) {
455 case MOVE_DOWN:
456 /* only AND and OR have children */
457 if (pred->left != FILTER_PRED_INVALID) {
458 /* If ops is set, then it was folded. */
459 if (!pred->ops) {
460 /* keep going to down the left side */
461 pred = &preds[pred->left];
462 continue;
463 }
464 /* We can treat folded ops as a leaf node */
465 match = process_ops(preds, pred, rec);
466 } else
467 match = pred->fn(pred, rec);
468 /* If this pred is the only pred */
469 if (pred == root)
470 break;
471 pred = get_pred_parent(pred, preds,
472 pred->parent, &move);
473 continue;
474 case MOVE_UP_FROM_LEFT:
475 /*
476 * Check for short circuits.
477 *
478 * Optimization: !!match == (pred->op == OP_OR)
479 * is the same as:
480 * if ((match && pred->op == OP_OR) ||
481 * (!match && pred->op == OP_AND))
482 */
483 if (!!match == (pred->op == OP_OR)) {
484 if (pred == root)
485 break;
486 pred = get_pred_parent(pred, preds,
487 pred->parent, &move);
488 continue;
489 }
490 /* now go down the right side of the tree. */
491 pred = &preds[pred->right];
492 move = MOVE_DOWN;
493 continue;
494 case MOVE_UP_FROM_RIGHT:
495 /* We finished this equation. */
496 if (pred == root)
497 break;
498 pred = get_pred_parent(pred, preds,
499 pred->parent, &move);
500 continue;
501 }
502 done = 1;
503 } while (!done);
504
505 return match;
563} 506}
564EXPORT_SYMBOL_GPL(filter_match_preds); 507EXPORT_SYMBOL_GPL(filter_match_preds);
565 508
@@ -654,7 +597,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
654 if (filter && filter->filter_string) 597 if (filter && filter->filter_string)
655 trace_seq_printf(s, "%s\n", filter->filter_string); 598 trace_seq_printf(s, "%s\n", filter->filter_string);
656 else 599 else
657 trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); 600 trace_seq_printf(s, "none\n");
658 mutex_unlock(&event_mutex); 601 mutex_unlock(&event_mutex);
659} 602}
660 603
@@ -685,9 +628,25 @@ find_event_field(struct ftrace_event_call *call, char *name)
685 return __find_event_field(head, name); 628 return __find_event_field(head, name);
686} 629}
687 630
631static void filter_free_pred(struct filter_pred *pred)
632{
633 if (!pred)
634 return;
635
636 kfree(pred->field_name);
637 kfree(pred);
638}
639
640static void filter_clear_pred(struct filter_pred *pred)
641{
642 kfree(pred->field_name);
643 pred->field_name = NULL;
644 pred->regex.len = 0;
645}
646
688static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) 647static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
689{ 648{
690 stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL); 649 stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
691 if (!stack->preds) 650 if (!stack->preds)
692 return -ENOMEM; 651 return -ENOMEM;
693 stack->index = n_preds; 652 stack->index = n_preds;
@@ -730,13 +689,20 @@ __pop_pred_stack(struct pred_stack *stack)
730static int filter_set_pred(struct event_filter *filter, 689static int filter_set_pred(struct event_filter *filter,
731 int idx, 690 int idx,
732 struct pred_stack *stack, 691 struct pred_stack *stack,
733 struct filter_pred *src) 692 struct filter_pred *src,
693 filter_pred_fn_t fn)
734{ 694{
735 struct filter_pred *dest = &filter->preds[idx]; 695 struct filter_pred *dest = &filter->preds[idx];
736 struct filter_pred *left; 696 struct filter_pred *left;
737 struct filter_pred *right; 697 struct filter_pred *right;
738 698
739 *dest = *src; 699 *dest = *src;
700 if (src->field_name) {
701 dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
702 if (!dest->field_name)
703 return -ENOMEM;
704 }
705 dest->fn = fn;
740 dest->index = idx; 706 dest->index = idx;
741 707
742 if (dest->op == OP_OR || dest->op == OP_AND) { 708 if (dest->op == OP_OR || dest->op == OP_AND) {
@@ -777,7 +743,11 @@ static int filter_set_pred(struct event_filter *filter,
777 743
778static void __free_preds(struct event_filter *filter) 744static void __free_preds(struct event_filter *filter)
779{ 745{
746 int i;
747
780 if (filter->preds) { 748 if (filter->preds) {
749 for (i = 0; i < filter->a_preds; i++)
750 kfree(filter->preds[i].field_name);
781 kfree(filter->preds); 751 kfree(filter->preds);
782 filter->preds = NULL; 752 filter->preds = NULL;
783 } 753 }
@@ -828,7 +798,8 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
828 if (filter->preds) 798 if (filter->preds)
829 __free_preds(filter); 799 __free_preds(filter);
830 800
831 filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL); 801 filter->preds =
802 kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
832 803
833 if (!filter->preds) 804 if (!filter->preds)
834 return -ENOMEM; 805 return -ENOMEM;
@@ -869,19 +840,23 @@ static void filter_free_subsystem_filters(struct event_subsystem *system)
869 } 840 }
870} 841}
871 842
872static int filter_add_pred(struct filter_parse_state *ps, 843static int filter_add_pred_fn(struct filter_parse_state *ps,
873 struct event_filter *filter, 844 struct ftrace_event_call *call,
874 struct filter_pred *pred, 845 struct event_filter *filter,
875 struct pred_stack *stack) 846 struct filter_pred *pred,
847 struct pred_stack *stack,
848 filter_pred_fn_t fn)
876{ 849{
877 int err; 850 int idx, err;
878 851
879 if (WARN_ON(filter->n_preds == filter->a_preds)) { 852 if (WARN_ON(filter->n_preds == filter->a_preds)) {
880 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 853 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
881 return -ENOSPC; 854 return -ENOSPC;
882 } 855 }
883 856
884 err = filter_set_pred(filter, filter->n_preds, stack, pred); 857 idx = filter->n_preds;
858 filter_clear_pred(&filter->preds[idx]);
859 err = filter_set_pred(filter, idx, stack, pred, fn);
885 if (err) 860 if (err)
886 return err; 861 return err;
887 862
@@ -901,11 +876,6 @@ int filter_assign_type(const char *type)
901 return FILTER_OTHER; 876 return FILTER_OTHER;
902} 877}
903 878
904static bool is_function_field(struct ftrace_event_field *field)
905{
906 return field->filter_type == FILTER_TRACE_FN;
907}
908
909static bool is_string_field(struct ftrace_event_field *field) 879static bool is_string_field(struct ftrace_event_field *field)
910{ 880{
911 return field->filter_type == FILTER_DYN_STRING || 881 return field->filter_type == FILTER_DYN_STRING ||
@@ -967,15 +937,31 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
967 return fn; 937 return fn;
968} 938}
969 939
970static int init_pred(struct filter_parse_state *ps, 940static int filter_add_pred(struct filter_parse_state *ps,
971 struct ftrace_event_field *field, 941 struct ftrace_event_call *call,
972 struct filter_pred *pred) 942 struct event_filter *filter,
973 943 struct filter_pred *pred,
944 struct pred_stack *stack,
945 bool dry_run)
974{ 946{
975 filter_pred_fn_t fn = filter_pred_none; 947 struct ftrace_event_field *field;
948 filter_pred_fn_t fn;
976 unsigned long long val; 949 unsigned long long val;
977 int ret; 950 int ret;
978 951
952 fn = pred->fn = filter_pred_none;
953
954 if (pred->op == OP_AND)
955 goto add_pred_fn;
956 else if (pred->op == OP_OR)
957 goto add_pred_fn;
958
959 field = find_event_field(call, pred->field_name);
960 if (!field) {
961 parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
962 return -EINVAL;
963 }
964
979 pred->offset = field->offset; 965 pred->offset = field->offset;
980 966
981 if (!is_legal_op(field, pred->op)) { 967 if (!is_legal_op(field, pred->op)) {
@@ -993,16 +979,11 @@ static int init_pred(struct filter_parse_state *ps,
993 fn = filter_pred_strloc; 979 fn = filter_pred_strloc;
994 else 980 else
995 fn = filter_pred_pchar; 981 fn = filter_pred_pchar;
996 } else if (is_function_field(field)) {
997 if (strcmp(field->name, "ip")) {
998 parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0);
999 return -EINVAL;
1000 }
1001 } else { 982 } else {
1002 if (field->is_signed) 983 if (field->is_signed)
1003 ret = kstrtoll(pred->regex.pattern, 0, &val); 984 ret = strict_strtoll(pred->regex.pattern, 0, &val);
1004 else 985 else
1005 ret = kstrtoull(pred->regex.pattern, 0, &val); 986 ret = strict_strtoull(pred->regex.pattern, 0, &val);
1006 if (ret) { 987 if (ret) {
1007 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 988 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
1008 return -EINVAL; 989 return -EINVAL;
@@ -1020,7 +1001,9 @@ static int init_pred(struct filter_parse_state *ps,
1020 if (pred->op == OP_NE) 1001 if (pred->op == OP_NE)
1021 pred->not = 1; 1002 pred->not = 1;
1022 1003
1023 pred->fn = fn; 1004add_pred_fn:
1005 if (!dry_run)
1006 return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
1024 return 0; 1007 return 0;
1025} 1008}
1026 1009
@@ -1319,34 +1302,39 @@ parse_operand:
1319 return 0; 1302 return 0;
1320} 1303}
1321 1304
1322static struct filter_pred *create_pred(struct filter_parse_state *ps, 1305static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
1323 struct ftrace_event_call *call,
1324 int op, char *operand1, char *operand2)
1325{ 1306{
1326 struct ftrace_event_field *field; 1307 struct filter_pred *pred;
1327 static struct filter_pred pred;
1328
1329 memset(&pred, 0, sizeof(pred));
1330 pred.op = op;
1331 1308
1332 if (op == OP_AND || op == OP_OR) 1309 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
1333 return &pred; 1310 if (!pred)
1311 return NULL;
1334 1312
1335 if (!operand1 || !operand2) { 1313 pred->field_name = kstrdup(operand1, GFP_KERNEL);
1336 parse_error(ps, FILT_ERR_MISSING_FIELD, 0); 1314 if (!pred->field_name) {
1315 kfree(pred);
1337 return NULL; 1316 return NULL;
1338 } 1317 }
1339 1318
1340 field = find_event_field(call, operand1); 1319 strcpy(pred->regex.pattern, operand2);
1341 if (!field) { 1320 pred->regex.len = strlen(pred->regex.pattern);
1342 parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); 1321
1322 pred->op = op;
1323
1324 return pred;
1325}
1326
1327static struct filter_pred *create_logical_pred(int op)
1328{
1329 struct filter_pred *pred;
1330
1331 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
1332 if (!pred)
1343 return NULL; 1333 return NULL;
1344 }
1345 1334
1346 strcpy(pred.regex.pattern, operand2); 1335 pred->op = op;
1347 pred.regex.len = strlen(pred.regex.pattern); 1336
1348 pred.field = field; 1337 return pred;
1349 return init_pred(ps, field, &pred) ? NULL : &pred;
1350} 1338}
1351 1339
1352static int check_preds(struct filter_parse_state *ps) 1340static int check_preds(struct filter_parse_state *ps)
@@ -1387,23 +1375,6 @@ static int count_preds(struct filter_parse_state *ps)
1387 return n_preds; 1375 return n_preds;
1388} 1376}
1389 1377
1390struct check_pred_data {
1391 int count;
1392 int max;
1393};
1394
1395static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred,
1396 int *err, void *data)
1397{
1398 struct check_pred_data *d = data;
1399
1400 if (WARN_ON(d->count++ > d->max)) {
1401 *err = -EINVAL;
1402 return WALK_PRED_ABORT;
1403 }
1404 return WALK_PRED_DEFAULT;
1405}
1406
1407/* 1378/*
1408 * The tree is walked at filtering of an event. If the tree is not correctly 1379 * The tree is walked at filtering of an event. If the tree is not correctly
1409 * built, it may cause an infinite loop. Check here that the tree does 1380 * built, it may cause an infinite loop. Check here that the tree does
@@ -1412,76 +1383,107 @@ static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred,
1412static int check_pred_tree(struct event_filter *filter, 1383static int check_pred_tree(struct event_filter *filter,
1413 struct filter_pred *root) 1384 struct filter_pred *root)
1414{ 1385{
1415 struct check_pred_data data = { 1386 struct filter_pred *preds;
1416 /* 1387 struct filter_pred *pred;
1417 * The max that we can hit a node is three times. 1388 enum move_type move = MOVE_DOWN;
1418 * Once going down, once coming up from left, and 1389 int count = 0;
1419 * once coming up from right. This is more than enough 1390 int done = 0;
1420 * since leafs are only hit a single time. 1391 int max;
1421 */
1422 .max = 3 * filter->n_preds,
1423 .count = 0,
1424 };
1425
1426 return walk_pred_tree(filter->preds, root,
1427 check_pred_tree_cb, &data);
1428}
1429 1392
1430static int count_leafs_cb(enum move_type move, struct filter_pred *pred, 1393 /*
1431 int *err, void *data) 1394 * The max that we can hit a node is three times.
1432{ 1395 * Once going down, once coming up from left, and
1433 int *count = data; 1396 * once coming up from right. This is more than enough
1397 * since leafs are only hit a single time.
1398 */
1399 max = 3 * filter->n_preds;
1434 1400
1435 if ((move == MOVE_DOWN) && 1401 preds = filter->preds;
1436 (pred->left == FILTER_PRED_INVALID)) 1402 if (!preds)
1437 (*count)++; 1403 return -EINVAL;
1404 pred = root;
1438 1405
1439 return WALK_PRED_DEFAULT; 1406 do {
1440} 1407 if (WARN_ON(count++ > max))
1408 return -EINVAL;
1441 1409
1442static int count_leafs(struct filter_pred *preds, struct filter_pred *root) 1410 switch (move) {
1443{ 1411 case MOVE_DOWN:
1444 int count = 0, ret; 1412 if (pred->left != FILTER_PRED_INVALID) {
1413 pred = &preds[pred->left];
1414 continue;
1415 }
1416 /* A leaf at the root is just a leaf in the tree */
1417 if (pred == root)
1418 break;
1419 pred = get_pred_parent(pred, preds,
1420 pred->parent, &move);
1421 continue;
1422 case MOVE_UP_FROM_LEFT:
1423 pred = &preds[pred->right];
1424 move = MOVE_DOWN;
1425 continue;
1426 case MOVE_UP_FROM_RIGHT:
1427 if (pred == root)
1428 break;
1429 pred = get_pred_parent(pred, preds,
1430 pred->parent, &move);
1431 continue;
1432 }
1433 done = 1;
1434 } while (!done);
1445 1435
1446 ret = walk_pred_tree(preds, root, count_leafs_cb, &count); 1436 /* We are fine. */
1447 WARN_ON(ret); 1437 return 0;
1448 return count;
1449} 1438}
1450 1439
1451struct fold_pred_data { 1440static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
1452 struct filter_pred *root;
1453 int count;
1454 int children;
1455};
1456
1457static int fold_pred_cb(enum move_type move, struct filter_pred *pred,
1458 int *err, void *data)
1459{ 1441{
1460 struct fold_pred_data *d = data; 1442 struct filter_pred *pred;
1461 struct filter_pred *root = d->root; 1443 enum move_type move = MOVE_DOWN;
1444 int count = 0;
1445 int done = 0;
1462 1446
1463 if (move != MOVE_DOWN) 1447 pred = root;
1464 return WALK_PRED_DEFAULT;
1465 if (pred->left != FILTER_PRED_INVALID)
1466 return WALK_PRED_DEFAULT;
1467 1448
1468 if (WARN_ON(d->count == d->children)) { 1449 do {
1469 *err = -EINVAL; 1450 switch (move) {
1470 return WALK_PRED_ABORT; 1451 case MOVE_DOWN:
1471 } 1452 if (pred->left != FILTER_PRED_INVALID) {
1453 pred = &preds[pred->left];
1454 continue;
1455 }
1456 /* A leaf at the root is just a leaf in the tree */
1457 if (pred == root)
1458 return 1;
1459 count++;
1460 pred = get_pred_parent(pred, preds,
1461 pred->parent, &move);
1462 continue;
1463 case MOVE_UP_FROM_LEFT:
1464 pred = &preds[pred->right];
1465 move = MOVE_DOWN;
1466 continue;
1467 case MOVE_UP_FROM_RIGHT:
1468 if (pred == root)
1469 break;
1470 pred = get_pred_parent(pred, preds,
1471 pred->parent, &move);
1472 continue;
1473 }
1474 done = 1;
1475 } while (!done);
1472 1476
1473 pred->index &= ~FILTER_PRED_FOLD; 1477 return count;
1474 root->ops[d->count++] = pred->index;
1475 return WALK_PRED_DEFAULT;
1476} 1478}
1477 1479
1478static int fold_pred(struct filter_pred *preds, struct filter_pred *root) 1480static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1479{ 1481{
1480 struct fold_pred_data data = { 1482 struct filter_pred *pred;
1481 .root = root, 1483 enum move_type move = MOVE_DOWN;
1482 .count = 0, 1484 int count = 0;
1483 };
1484 int children; 1485 int children;
1486 int done = 0;
1485 1487
1486 /* No need to keep the fold flag */ 1488 /* No need to keep the fold flag */
1487 root->index &= ~FILTER_PRED_FOLD; 1489 root->index &= ~FILTER_PRED_FOLD;
@@ -1494,31 +1496,42 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1494 children = count_leafs(preds, &preds[root->left]); 1496 children = count_leafs(preds, &preds[root->left]);
1495 children += count_leafs(preds, &preds[root->right]); 1497 children += count_leafs(preds, &preds[root->right]);
1496 1498
1497 root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL); 1499 root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL);
1498 if (!root->ops) 1500 if (!root->ops)
1499 return -ENOMEM; 1501 return -ENOMEM;
1500 1502
1501 root->val = children; 1503 root->val = children;
1502 data.children = children;
1503 return walk_pred_tree(preds, root, fold_pred_cb, &data);
1504}
1505
1506static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred,
1507 int *err, void *data)
1508{
1509 struct filter_pred *preds = data;
1510 1504
1511 if (move != MOVE_DOWN) 1505 pred = root;
1512 return WALK_PRED_DEFAULT; 1506 do {
1513 if (!(pred->index & FILTER_PRED_FOLD)) 1507 switch (move) {
1514 return WALK_PRED_DEFAULT; 1508 case MOVE_DOWN:
1515 1509 if (pred->left != FILTER_PRED_INVALID) {
1516 *err = fold_pred(preds, pred); 1510 pred = &preds[pred->left];
1517 if (*err) 1511 continue;
1518 return WALK_PRED_ABORT; 1512 }
1513 if (WARN_ON(count == children))
1514 return -EINVAL;
1515 pred->index &= ~FILTER_PRED_FOLD;
1516 root->ops[count++] = pred->index;
1517 pred = get_pred_parent(pred, preds,
1518 pred->parent, &move);
1519 continue;
1520 case MOVE_UP_FROM_LEFT:
1521 pred = &preds[pred->right];
1522 move = MOVE_DOWN;
1523 continue;
1524 case MOVE_UP_FROM_RIGHT:
1525 if (pred == root)
1526 break;
1527 pred = get_pred_parent(pred, preds,
1528 pred->parent, &move);
1529 continue;
1530 }
1531 done = 1;
1532 } while (!done);
1519 1533
1520 /* eveyrhing below is folded, continue with parent */ 1534 return 0;
1521 return WALK_PRED_PARENT;
1522} 1535}
1523 1536
1524/* 1537/*
@@ -1529,8 +1542,51 @@ static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred,
1529static int fold_pred_tree(struct event_filter *filter, 1542static int fold_pred_tree(struct event_filter *filter,
1530 struct filter_pred *root) 1543 struct filter_pred *root)
1531{ 1544{
1532 return walk_pred_tree(filter->preds, root, fold_pred_tree_cb, 1545 struct filter_pred *preds;
1533 filter->preds); 1546 struct filter_pred *pred;
1547 enum move_type move = MOVE_DOWN;
1548 int done = 0;
1549 int err;
1550
1551 preds = filter->preds;
1552 if (!preds)
1553 return -EINVAL;
1554 pred = root;
1555
1556 do {
1557 switch (move) {
1558 case MOVE_DOWN:
1559 if (pred->index & FILTER_PRED_FOLD) {
1560 err = fold_pred(preds, pred);
1561 if (err)
1562 return err;
1563 /* Folded nodes are like leafs */
1564 } else if (pred->left != FILTER_PRED_INVALID) {
1565 pred = &preds[pred->left];
1566 continue;
1567 }
1568
1569 /* A leaf at the root is just a leaf in the tree */
1570 if (pred == root)
1571 break;
1572 pred = get_pred_parent(pred, preds,
1573 pred->parent, &move);
1574 continue;
1575 case MOVE_UP_FROM_LEFT:
1576 pred = &preds[pred->right];
1577 move = MOVE_DOWN;
1578 continue;
1579 case MOVE_UP_FROM_RIGHT:
1580 if (pred == root)
1581 break;
1582 pred = get_pred_parent(pred, preds,
1583 pred->parent, &move);
1584 continue;
1585 }
1586 done = 1;
1587 } while (!done);
1588
1589 return 0;
1534} 1590}
1535 1591
1536static int replace_preds(struct ftrace_event_call *call, 1592static int replace_preds(struct ftrace_event_call *call,
@@ -1587,17 +1643,27 @@ static int replace_preds(struct ftrace_event_call *call,
1587 goto fail; 1643 goto fail;
1588 } 1644 }
1589 1645
1590 pred = create_pred(ps, call, elt->op, operand1, operand2); 1646 if (elt->op == OP_AND || elt->op == OP_OR) {
1591 if (!pred) { 1647 pred = create_logical_pred(elt->op);
1648 goto add_pred;
1649 }
1650
1651 if (!operand1 || !operand2) {
1652 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1592 err = -EINVAL; 1653 err = -EINVAL;
1593 goto fail; 1654 goto fail;
1594 } 1655 }
1595 1656
1596 if (!dry_run) { 1657 pred = create_pred(elt->op, operand1, operand2);
1597 err = filter_add_pred(ps, filter, pred, &stack); 1658add_pred:
1598 if (err) 1659 if (!pred) {
1599 goto fail; 1660 err = -ENOMEM;
1661 goto fail;
1600 } 1662 }
1663 err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
1664 filter_free_pred(pred);
1665 if (err)
1666 goto fail;
1601 1667
1602 operand1 = operand2 = NULL; 1668 operand1 = operand2 = NULL;
1603 } 1669 }
@@ -1663,9 +1729,7 @@ static int replace_system_preds(struct event_subsystem *system,
1663 */ 1729 */
1664 err = replace_preds(call, NULL, ps, filter_string, true); 1730 err = replace_preds(call, NULL, ps, filter_string, true);
1665 if (err) 1731 if (err)
1666 call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; 1732 goto fail;
1667 else
1668 call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
1669 } 1733 }
1670 1734
1671 list_for_each_entry(call, &ftrace_events, list) { 1735 list_for_each_entry(call, &ftrace_events, list) {
@@ -1674,9 +1738,6 @@ static int replace_system_preds(struct event_subsystem *system,
1674 if (strcmp(call->class->system, system->name) != 0) 1738 if (strcmp(call->class->system, system->name) != 0)
1675 continue; 1739 continue;
1676 1740
1677 if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)
1678 continue;
1679
1680 filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); 1741 filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
1681 if (!filter_item) 1742 if (!filter_item)
1682 goto fail_mem; 1743 goto fail_mem;
@@ -1746,121 +1807,11 @@ static int replace_system_preds(struct event_subsystem *system,
1746 return -ENOMEM; 1807 return -ENOMEM;
1747} 1808}
1748 1809
1749static int create_filter_start(char *filter_str, bool set_str,
1750 struct filter_parse_state **psp,
1751 struct event_filter **filterp)
1752{
1753 struct event_filter *filter;
1754 struct filter_parse_state *ps = NULL;
1755 int err = 0;
1756
1757 WARN_ON_ONCE(*psp || *filterp);
1758
1759 /* allocate everything, and if any fails, free all and fail */
1760 filter = __alloc_filter();
1761 if (filter && set_str)
1762 err = replace_filter_string(filter, filter_str);
1763
1764 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1765
1766 if (!filter || !ps || err) {
1767 kfree(ps);
1768 __free_filter(filter);
1769 return -ENOMEM;
1770 }
1771
1772 /* we're committed to creating a new filter */
1773 *filterp = filter;
1774 *psp = ps;
1775
1776 parse_init(ps, filter_ops, filter_str);
1777 err = filter_parse(ps);
1778 if (err && set_str)
1779 append_filter_err(ps, filter);
1780 return err;
1781}
1782
1783static void create_filter_finish(struct filter_parse_state *ps)
1784{
1785 if (ps) {
1786 filter_opstack_clear(ps);
1787 postfix_clear(ps);
1788 kfree(ps);
1789 }
1790}
1791
1792/**
1793 * create_filter - create a filter for a ftrace_event_call
1794 * @call: ftrace_event_call to create a filter for
1795 * @filter_str: filter string
1796 * @set_str: remember @filter_str and enable detailed error in filter
1797 * @filterp: out param for created filter (always updated on return)
1798 *
1799 * Creates a filter for @call with @filter_str. If @set_str is %true,
1800 * @filter_str is copied and recorded in the new filter.
1801 *
1802 * On success, returns 0 and *@filterp points to the new filter. On
1803 * failure, returns -errno and *@filterp may point to %NULL or to a new
1804 * filter. In the latter case, the returned filter contains error
1805 * information if @set_str is %true and the caller is responsible for
1806 * freeing it.
1807 */
1808static int create_filter(struct ftrace_event_call *call,
1809 char *filter_str, bool set_str,
1810 struct event_filter **filterp)
1811{
1812 struct event_filter *filter = NULL;
1813 struct filter_parse_state *ps = NULL;
1814 int err;
1815
1816 err = create_filter_start(filter_str, set_str, &ps, &filter);
1817 if (!err) {
1818 err = replace_preds(call, filter, ps, filter_str, false);
1819 if (err && set_str)
1820 append_filter_err(ps, filter);
1821 }
1822 create_filter_finish(ps);
1823
1824 *filterp = filter;
1825 return err;
1826}
1827
1828/**
1829 * create_system_filter - create a filter for an event_subsystem
1830 * @system: event_subsystem to create a filter for
1831 * @filter_str: filter string
1832 * @filterp: out param for created filter (always updated on return)
1833 *
1834 * Identical to create_filter() except that it creates a subsystem filter
1835 * and always remembers @filter_str.
1836 */
1837static int create_system_filter(struct event_subsystem *system,
1838 char *filter_str, struct event_filter **filterp)
1839{
1840 struct event_filter *filter = NULL;
1841 struct filter_parse_state *ps = NULL;
1842 int err;
1843
1844 err = create_filter_start(filter_str, true, &ps, &filter);
1845 if (!err) {
1846 err = replace_system_preds(system, ps, filter_str);
1847 if (!err) {
1848 /* System filters just show a default message */
1849 kfree(filter->filter_string);
1850 filter->filter_string = NULL;
1851 } else {
1852 append_filter_err(ps, filter);
1853 }
1854 }
1855 create_filter_finish(ps);
1856
1857 *filterp = filter;
1858 return err;
1859}
1860
1861int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1810int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1862{ 1811{
1812 struct filter_parse_state *ps;
1863 struct event_filter *filter; 1813 struct event_filter *filter;
1814 struct event_filter *tmp;
1864 int err = 0; 1815 int err = 0;
1865 1816
1866 mutex_lock(&event_mutex); 1817 mutex_lock(&event_mutex);
@@ -1877,30 +1828,49 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1877 goto out_unlock; 1828 goto out_unlock;
1878 } 1829 }
1879 1830
1880 err = create_filter(call, filter_string, true, &filter); 1831 err = -ENOMEM;
1832 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1833 if (!ps)
1834 goto out_unlock;
1835
1836 filter = __alloc_filter();
1837 if (!filter) {
1838 kfree(ps);
1839 goto out_unlock;
1840 }
1841
1842 replace_filter_string(filter, filter_string);
1881 1843
1844 parse_init(ps, filter_ops, filter_string);
1845 err = filter_parse(ps);
1846 if (err) {
1847 append_filter_err(ps, filter);
1848 goto out;
1849 }
1850
1851 err = replace_preds(call, filter, ps, filter_string, false);
1852 if (err) {
1853 filter_disable(call);
1854 append_filter_err(ps, filter);
1855 } else
1856 call->flags |= TRACE_EVENT_FL_FILTERED;
1857out:
1882 /* 1858 /*
1883 * Always swap the call filter with the new filter 1859 * Always swap the call filter with the new filter
1884 * even if there was an error. If there was an error 1860 * even if there was an error. If there was an error
1885 * in the filter, we disable the filter and show the error 1861 * in the filter, we disable the filter and show the error
1886 * string 1862 * string
1887 */ 1863 */
1888 if (filter) { 1864 tmp = call->filter;
1889 struct event_filter *tmp = call->filter; 1865 rcu_assign_pointer(call->filter, filter);
1890 1866 if (tmp) {
1891 if (!err) 1867 /* Make sure the call is done with the filter */
1892 call->flags |= TRACE_EVENT_FL_FILTERED; 1868 synchronize_sched();
1893 else 1869 __free_filter(tmp);
1894 filter_disable(call);
1895
1896 rcu_assign_pointer(call->filter, filter);
1897
1898 if (tmp) {
1899 /* Make sure the call is done with the filter */
1900 synchronize_sched();
1901 __free_filter(tmp);
1902 }
1903 } 1870 }
1871 filter_opstack_clear(ps);
1872 postfix_clear(ps);
1873 kfree(ps);
1904out_unlock: 1874out_unlock:
1905 mutex_unlock(&event_mutex); 1875 mutex_unlock(&event_mutex);
1906 1876
@@ -1910,6 +1880,7 @@ out_unlock:
1910int apply_subsystem_event_filter(struct event_subsystem *system, 1880int apply_subsystem_event_filter(struct event_subsystem *system,
1911 char *filter_string) 1881 char *filter_string)
1912{ 1882{
1883 struct filter_parse_state *ps;
1913 struct event_filter *filter; 1884 struct event_filter *filter;
1914 int err = 0; 1885 int err = 0;
1915 1886
@@ -1933,15 +1904,38 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1933 goto out_unlock; 1904 goto out_unlock;
1934 } 1905 }
1935 1906
1936 err = create_system_filter(system, filter_string, &filter); 1907 err = -ENOMEM;
1937 if (filter) { 1908 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1938 /* 1909 if (!ps)
1939 * No event actually uses the system filter 1910 goto out_unlock;
1940 * we can free it without synchronize_sched(). 1911
1941 */ 1912 filter = __alloc_filter();
1942 __free_filter(system->filter); 1913 if (!filter)
1943 system->filter = filter; 1914 goto out;
1915
1916 replace_filter_string(filter, filter_string);
1917 /*
1918 * No event actually uses the system filter
1919 * we can free it without synchronize_sched().
1920 */
1921 __free_filter(system->filter);
1922 system->filter = filter;
1923
1924 parse_init(ps, filter_ops, filter_string);
1925 err = filter_parse(ps);
1926 if (err) {
1927 append_filter_err(ps, system->filter);
1928 goto out;
1944 } 1929 }
1930
1931 err = replace_system_preds(system, ps, filter_string);
1932 if (err)
1933 append_filter_err(ps, system->filter);
1934
1935out:
1936 filter_opstack_clear(ps);
1937 postfix_clear(ps);
1938 kfree(ps);
1945out_unlock: 1939out_unlock:
1946 mutex_unlock(&event_mutex); 1940 mutex_unlock(&event_mutex);
1947 1941
@@ -1958,178 +1952,56 @@ void ftrace_profile_free_filter(struct perf_event *event)
1958 __free_filter(filter); 1952 __free_filter(filter);
1959} 1953}
1960 1954
1961struct function_filter_data {
1962 struct ftrace_ops *ops;
1963 int first_filter;
1964 int first_notrace;
1965};
1966
1967#ifdef CONFIG_FUNCTION_TRACER
1968static char **
1969ftrace_function_filter_re(char *buf, int len, int *count)
1970{
1971 char *str, *sep, **re;
1972
1973 str = kstrndup(buf, len, GFP_KERNEL);
1974 if (!str)
1975 return NULL;
1976
1977 /*
1978 * The argv_split function takes white space
1979 * as a separator, so convert ',' into spaces.
1980 */
1981 while ((sep = strchr(str, ',')))
1982 *sep = ' ';
1983
1984 re = argv_split(GFP_KERNEL, str, count);
1985 kfree(str);
1986 return re;
1987}
1988
1989static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter,
1990 int reset, char *re, int len)
1991{
1992 int ret;
1993
1994 if (filter)
1995 ret = ftrace_set_filter(ops, re, len, reset);
1996 else
1997 ret = ftrace_set_notrace(ops, re, len, reset);
1998
1999 return ret;
2000}
2001
2002static int __ftrace_function_set_filter(int filter, char *buf, int len,
2003 struct function_filter_data *data)
2004{
2005 int i, re_cnt, ret = -EINVAL;
2006 int *reset;
2007 char **re;
2008
2009 reset = filter ? &data->first_filter : &data->first_notrace;
2010
2011 /*
2012 * The 'ip' field could have multiple filters set, separated
2013 * either by space or comma. We first cut the filter and apply
2014 * all pieces separatelly.
2015 */
2016 re = ftrace_function_filter_re(buf, len, &re_cnt);
2017 if (!re)
2018 return -EINVAL;
2019
2020 for (i = 0; i < re_cnt; i++) {
2021 ret = ftrace_function_set_regexp(data->ops, filter, *reset,
2022 re[i], strlen(re[i]));
2023 if (ret)
2024 break;
2025
2026 if (*reset)
2027 *reset = 0;
2028 }
2029
2030 argv_free(re);
2031 return ret;
2032}
2033
2034static int ftrace_function_check_pred(struct filter_pred *pred, int leaf)
2035{
2036 struct ftrace_event_field *field = pred->field;
2037
2038 if (leaf) {
2039 /*
2040 * Check the leaf predicate for function trace, verify:
2041 * - only '==' and '!=' is used
2042 * - the 'ip' field is used
2043 */
2044 if ((pred->op != OP_EQ) && (pred->op != OP_NE))
2045 return -EINVAL;
2046
2047 if (strcmp(field->name, "ip"))
2048 return -EINVAL;
2049 } else {
2050 /*
2051 * Check the non leaf predicate for function trace, verify:
2052 * - only '||' is used
2053 */
2054 if (pred->op != OP_OR)
2055 return -EINVAL;
2056 }
2057
2058 return 0;
2059}
2060
2061static int ftrace_function_set_filter_cb(enum move_type move,
2062 struct filter_pred *pred,
2063 int *err, void *data)
2064{
2065 /* Checking the node is valid for function trace. */
2066 if ((move != MOVE_DOWN) ||
2067 (pred->left != FILTER_PRED_INVALID)) {
2068 *err = ftrace_function_check_pred(pred, 0);
2069 } else {
2070 *err = ftrace_function_check_pred(pred, 1);
2071 if (*err)
2072 return WALK_PRED_ABORT;
2073
2074 *err = __ftrace_function_set_filter(pred->op == OP_EQ,
2075 pred->regex.pattern,
2076 pred->regex.len,
2077 data);
2078 }
2079
2080 return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT;
2081}
2082
2083static int ftrace_function_set_filter(struct perf_event *event,
2084 struct event_filter *filter)
2085{
2086 struct function_filter_data data = {
2087 .first_filter = 1,
2088 .first_notrace = 1,
2089 .ops = &event->ftrace_ops,
2090 };
2091
2092 return walk_pred_tree(filter->preds, filter->root,
2093 ftrace_function_set_filter_cb, &data);
2094}
2095#else
2096static int ftrace_function_set_filter(struct perf_event *event,
2097 struct event_filter *filter)
2098{
2099 return -ENODEV;
2100}
2101#endif /* CONFIG_FUNCTION_TRACER */
2102
2103int ftrace_profile_set_filter(struct perf_event *event, int event_id, 1955int ftrace_profile_set_filter(struct perf_event *event, int event_id,
2104 char *filter_str) 1956 char *filter_str)
2105{ 1957{
2106 int err; 1958 int err;
2107 struct event_filter *filter; 1959 struct event_filter *filter;
2108 struct ftrace_event_call *call; 1960 struct filter_parse_state *ps;
1961 struct ftrace_event_call *call = NULL;
2109 1962
2110 mutex_lock(&event_mutex); 1963 mutex_lock(&event_mutex);
2111 1964
2112 call = event->tp_event; 1965 list_for_each_entry(call, &ftrace_events, list) {
1966 if (call->event.type == event_id)
1967 break;
1968 }
2113 1969
2114 err = -EINVAL; 1970 err = -EINVAL;
2115 if (!call) 1971 if (&call->list == &ftrace_events)
2116 goto out_unlock; 1972 goto out_unlock;
2117 1973
2118 err = -EEXIST; 1974 err = -EEXIST;
2119 if (event->filter) 1975 if (event->filter)
2120 goto out_unlock; 1976 goto out_unlock;
2121 1977
2122 err = create_filter(call, filter_str, false, &filter); 1978 filter = __alloc_filter();
2123 if (err) 1979 if (!filter) {
1980 err = PTR_ERR(filter);
1981 goto out_unlock;
1982 }
1983
1984 err = -ENOMEM;
1985 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1986 if (!ps)
2124 goto free_filter; 1987 goto free_filter;
2125 1988
2126 if (ftrace_event_is_function(call)) 1989 parse_init(ps, filter_ops, filter_str);
2127 err = ftrace_function_set_filter(event, filter); 1990 err = filter_parse(ps);
2128 else 1991 if (err)
1992 goto free_ps;
1993
1994 err = replace_preds(call, filter, ps, filter_str, false);
1995 if (!err)
2129 event->filter = filter; 1996 event->filter = filter;
2130 1997
1998free_ps:
1999 filter_opstack_clear(ps);
2000 postfix_clear(ps);
2001 kfree(ps);
2002
2131free_filter: 2003free_filter:
2132 if (err || ftrace_event_is_function(call)) 2004 if (err)
2133 __free_filter(filter); 2005 __free_filter(filter);
2134 2006
2135out_unlock: 2007out_unlock:
@@ -2140,179 +2012,3 @@ out_unlock:
2140 2012
2141#endif /* CONFIG_PERF_EVENTS */ 2013#endif /* CONFIG_PERF_EVENTS */
2142 2014
2143#ifdef CONFIG_FTRACE_STARTUP_TEST
2144
2145#include <linux/types.h>
2146#include <linux/tracepoint.h>
2147
2148#define CREATE_TRACE_POINTS
2149#include "trace_events_filter_test.h"
2150
2151#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
2152{ \
2153 .filter = FILTER, \
2154 .rec = { .a = va, .b = vb, .c = vc, .d = vd, \
2155 .e = ve, .f = vf, .g = vg, .h = vh }, \
2156 .match = m, \
2157 .not_visited = nvisit, \
2158}
2159#define YES 1
2160#define NO 0
2161
2162static struct test_filter_data_t {
2163 char *filter;
2164 struct ftrace_raw_ftrace_test_filter rec;
2165 int match;
2166 char *not_visited;
2167} test_filter_data[] = {
2168#define FILTER "a == 1 && b == 1 && c == 1 && d == 1 && " \
2169 "e == 1 && f == 1 && g == 1 && h == 1"
2170 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, ""),
2171 DATA_REC(NO, 0, 1, 1, 1, 1, 1, 1, 1, "bcdefgh"),
2172 DATA_REC(NO, 1, 1, 1, 1, 1, 1, 1, 0, ""),
2173#undef FILTER
2174#define FILTER "a == 1 || b == 1 || c == 1 || d == 1 || " \
2175 "e == 1 || f == 1 || g == 1 || h == 1"
2176 DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""),
2177 DATA_REC(YES, 0, 0, 0, 0, 0, 0, 0, 1, ""),
2178 DATA_REC(YES, 1, 0, 0, 0, 0, 0, 0, 0, "bcdefgh"),
2179#undef FILTER
2180#define FILTER "(a == 1 || b == 1) && (c == 1 || d == 1) && " \
2181 "(e == 1 || f == 1) && (g == 1 || h == 1)"
2182 DATA_REC(NO, 0, 0, 1, 1, 1, 1, 1, 1, "dfh"),
2183 DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""),
2184 DATA_REC(YES, 1, 0, 1, 0, 0, 1, 0, 1, "bd"),
2185 DATA_REC(NO, 1, 0, 1, 0, 0, 1, 0, 0, "bd"),
2186#undef FILTER
2187#define FILTER "(a == 1 && b == 1) || (c == 1 && d == 1) || " \
2188 "(e == 1 && f == 1) || (g == 1 && h == 1)"
2189 DATA_REC(YES, 1, 0, 1, 1, 1, 1, 1, 1, "efgh"),
2190 DATA_REC(YES, 0, 0, 0, 0, 0, 0, 1, 1, ""),
2191 DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""),
2192#undef FILTER
2193#define FILTER "(a == 1 && b == 1) && (c == 1 && d == 1) && " \
2194 "(e == 1 && f == 1) || (g == 1 && h == 1)"
2195 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 0, "gh"),
2196 DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""),
2197 DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, ""),
2198#undef FILTER
2199#define FILTER "((a == 1 || b == 1) || (c == 1 || d == 1) || " \
2200 "(e == 1 || f == 1)) && (g == 1 || h == 1)"
2201 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 1, "bcdef"),
2202 DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""),
2203 DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, "h"),
2204#undef FILTER
2205#define FILTER "((((((((a == 1) && (b == 1)) || (c == 1)) && (d == 1)) || " \
2206 "(e == 1)) && (f == 1)) || (g == 1)) && (h == 1))"
2207 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "ceg"),
2208 DATA_REC(NO, 0, 1, 0, 1, 0, 1, 0, 1, ""),
2209 DATA_REC(NO, 1, 0, 1, 0, 1, 0, 1, 0, ""),
2210#undef FILTER
2211#define FILTER "((((((((a == 1) || (b == 1)) && (c == 1)) || (d == 1)) && " \
2212 "(e == 1)) || (f == 1)) && (g == 1)) || (h == 1))"
2213 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "bdfh"),
2214 DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""),
2215 DATA_REC(YES, 1, 0, 1, 0, 1, 0, 1, 0, "bdfh"),
2216};
2217
2218#undef DATA_REC
2219#undef FILTER
2220#undef YES
2221#undef NO
2222
2223#define DATA_CNT (sizeof(test_filter_data)/sizeof(struct test_filter_data_t))
2224
2225static int test_pred_visited;
2226
2227static int test_pred_visited_fn(struct filter_pred *pred, void *event)
2228{
2229 struct ftrace_event_field *field = pred->field;
2230
2231 test_pred_visited = 1;
2232 printk(KERN_INFO "\npred visited %s\n", field->name);
2233 return 1;
2234}
2235
2236static int test_walk_pred_cb(enum move_type move, struct filter_pred *pred,
2237 int *err, void *data)
2238{
2239 char *fields = data;
2240
2241 if ((move == MOVE_DOWN) &&
2242 (pred->left == FILTER_PRED_INVALID)) {
2243 struct ftrace_event_field *field = pred->field;
2244
2245 if (!field) {
2246 WARN(1, "all leafs should have field defined");
2247 return WALK_PRED_DEFAULT;
2248 }
2249 if (!strchr(fields, *field->name))
2250 return WALK_PRED_DEFAULT;
2251
2252 WARN_ON(!pred->fn);
2253 pred->fn = test_pred_visited_fn;
2254 }
2255 return WALK_PRED_DEFAULT;
2256}
2257
2258static __init int ftrace_test_event_filter(void)
2259{
2260 int i;
2261
2262 printk(KERN_INFO "Testing ftrace filter: ");
2263
2264 for (i = 0; i < DATA_CNT; i++) {
2265 struct event_filter *filter = NULL;
2266 struct test_filter_data_t *d = &test_filter_data[i];
2267 int err;
2268
2269 err = create_filter(&event_ftrace_test_filter, d->filter,
2270 false, &filter);
2271 if (err) {
2272 printk(KERN_INFO
2273 "Failed to get filter for '%s', err %d\n",
2274 d->filter, err);
2275 __free_filter(filter);
2276 break;
2277 }
2278
2279 /*
2280 * The preemption disabling is not really needed for self
2281 * tests, but the rcu dereference will complain without it.
2282 */
2283 preempt_disable();
2284 if (*d->not_visited)
2285 walk_pred_tree(filter->preds, filter->root,
2286 test_walk_pred_cb,
2287 d->not_visited);
2288
2289 test_pred_visited = 0;
2290 err = filter_match_preds(filter, &d->rec);
2291 preempt_enable();
2292
2293 __free_filter(filter);
2294
2295 if (test_pred_visited) {
2296 printk(KERN_INFO
2297 "Failed, unwanted pred visited for filter %s\n",
2298 d->filter);
2299 break;
2300 }
2301
2302 if (err != d->match) {
2303 printk(KERN_INFO
2304 "Failed to match filter '%s', expected %d\n",
2305 d->filter, d->match);
2306 break;
2307 }
2308 }
2309
2310 if (i == DATA_CNT)
2311 printk(KERN_CONT "OK\n");
2312
2313 return 0;
2314}
2315
2316late_initcall(ftrace_test_event_filter);
2317
2318#endif /* CONFIG_FTRACE_STARTUP_TEST */
diff --git a/kernel/trace/trace_events_filter_test.h b/kernel/trace/trace_events_filter_test.h
deleted file mode 100644
index bfd4dba0d60..00000000000
--- a/kernel/trace/trace_events_filter_test.h
+++ /dev/null
@@ -1,50 +0,0 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM test
3
4#if !defined(_TRACE_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_TEST_H
6
7#include <linux/tracepoint.h>
8
9TRACE_EVENT(ftrace_test_filter,
10
11 TP_PROTO(int a, int b, int c, int d, int e, int f, int g, int h),
12
13 TP_ARGS(a, b, c, d, e, f, g, h),
14
15 TP_STRUCT__entry(
16 __field(int, a)
17 __field(int, b)
18 __field(int, c)
19 __field(int, d)
20 __field(int, e)
21 __field(int, f)
22 __field(int, g)
23 __field(int, h)
24 ),
25
26 TP_fast_assign(
27 __entry->a = a;
28 __entry->b = b;
29 __entry->c = c;
30 __entry->d = d;
31 __entry->e = e;
32 __entry->f = f;
33 __entry->g = g;
34 __entry->h = h;
35 ),
36
37 TP_printk("a %d, b %d, c %d, d %d, e %d, f %d, g %d, h %d",
38 __entry->a, __entry->b, __entry->c, __entry->d,
39 __entry->e, __entry->f, __entry->g, __entry->h)
40);
41
42#endif /* _TRACE_TEST_H || TRACE_HEADER_MULTI_READ */
43
44#undef TRACE_INCLUDE_PATH
45#undef TRACE_INCLUDE_FILE
46#define TRACE_INCLUDE_PATH .
47#define TRACE_INCLUDE_FILE trace_events_filter_test
48
49/* This part must be outside protection */
50#include <trace/define_trace.h>
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index e039906b037..bbeec31e0ae 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -18,16 +18,6 @@
18#undef TRACE_SYSTEM 18#undef TRACE_SYSTEM
19#define TRACE_SYSTEM ftrace 19#define TRACE_SYSTEM ftrace
20 20
21/*
22 * The FTRACE_ENTRY_REG macro allows ftrace entry to define register
23 * function and thus become accesible via perf.
24 */
25#undef FTRACE_ENTRY_REG
26#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
27 filter, regfn) \
28 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
29 filter)
30
31/* not needed for this file */ 21/* not needed for this file */
32#undef __field_struct 22#undef __field_struct
33#define __field_struct(type, item) 23#define __field_struct(type, item)
@@ -54,22 +44,21 @@
54#define F_printk(fmt, args...) fmt, args 44#define F_printk(fmt, args...) fmt, args
55 45
56#undef FTRACE_ENTRY 46#undef FTRACE_ENTRY
57#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ 47#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
58struct ____ftrace_##name { \ 48struct ____ftrace_##name { \
59 tstruct \ 49 tstruct \
60}; \ 50}; \
61static void __always_unused ____ftrace_check_##name(void) \ 51static void __always_unused ____ftrace_check_##name(void) \
62{ \ 52{ \
63 struct ____ftrace_##name *__entry = NULL; \ 53 struct ____ftrace_##name *__entry = NULL; \
64 \ 54 \
65 /* force compile-time check on F_printk() */ \ 55 /* force compile-time check on F_printk() */ \
66 printk(print); \ 56 printk(print); \
67} 57}
68 58
69#undef FTRACE_ENTRY_DUP 59#undef FTRACE_ENTRY_DUP
70#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \ 60#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
71 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 61 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
72 filter)
73 62
74#include "trace_entries.h" 63#include "trace_entries.h"
75 64
@@ -78,7 +67,7 @@ static void __always_unused ____ftrace_check_##name(void) \
78 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
79 offsetof(typeof(field), item), \ 68 offsetof(typeof(field), item), \
80 sizeof(field.item), \ 69 sizeof(field.item), \
81 is_signed_type(type), filter_type); \ 70 is_signed_type(type), FILTER_OTHER); \
82 if (ret) \ 71 if (ret) \
83 return ret; 72 return ret;
84 73
@@ -88,7 +77,7 @@ static void __always_unused ____ftrace_check_##name(void) \
88 offsetof(typeof(field), \ 77 offsetof(typeof(field), \
89 container.item), \ 78 container.item), \
90 sizeof(field.container.item), \ 79 sizeof(field.container.item), \
91 is_signed_type(type), filter_type); \ 80 is_signed_type(type), FILTER_OTHER); \
92 if (ret) \ 81 if (ret) \
93 return ret; 82 return ret;
94 83
@@ -102,7 +91,7 @@ static void __always_unused ____ftrace_check_##name(void) \
102 ret = trace_define_field(event_call, event_storage, #item, \ 91 ret = trace_define_field(event_call, event_storage, #item, \
103 offsetof(typeof(field), item), \ 92 offsetof(typeof(field), item), \
104 sizeof(field.item), \ 93 sizeof(field.item), \
105 is_signed_type(type), filter_type); \ 94 is_signed_type(type), FILTER_OTHER); \
106 mutex_unlock(&event_storage_mutex); \ 95 mutex_unlock(&event_storage_mutex); \
107 if (ret) \ 96 if (ret) \
108 return ret; \ 97 return ret; \
@@ -115,7 +104,7 @@ static void __always_unused ____ftrace_check_##name(void) \
115 offsetof(typeof(field), \ 104 offsetof(typeof(field), \
116 container.item), \ 105 container.item), \
117 sizeof(field.container.item), \ 106 sizeof(field.container.item), \
118 is_signed_type(type), filter_type); \ 107 is_signed_type(type), FILTER_OTHER); \
119 if (ret) \ 108 if (ret) \
120 return ret; 109 return ret;
121 110
@@ -123,18 +112,17 @@ static void __always_unused ____ftrace_check_##name(void) \
123#define __dynamic_array(type, item) \ 112#define __dynamic_array(type, item) \
124 ret = trace_define_field(event_call, #type, #item, \ 113 ret = trace_define_field(event_call, #type, #item, \
125 offsetof(typeof(field), item), \ 114 offsetof(typeof(field), item), \
126 0, is_signed_type(type), filter_type);\ 115 0, is_signed_type(type), FILTER_OTHER);\
127 if (ret) \ 116 if (ret) \
128 return ret; 117 return ret;
129 118
130#undef FTRACE_ENTRY 119#undef FTRACE_ENTRY
131#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ 120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
132int \ 121int \
133ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ 122ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
134{ \ 123{ \
135 struct struct_name field; \ 124 struct struct_name field; \
136 int ret; \ 125 int ret; \
137 int filter_type = filter; \
138 \ 126 \
139 tstruct; \ 127 tstruct; \
140 \ 128 \
@@ -162,17 +150,15 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
162#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
163 151
164#undef F_printk 152#undef F_printk
165#define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args) 153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
166 154
167#undef FTRACE_ENTRY_REG 155#undef FTRACE_ENTRY
168#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ 156#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \
169 regfn) \
170 \ 157 \
171struct ftrace_event_class event_class_ftrace_##call = { \ 158struct ftrace_event_class event_class_ftrace_##call = { \
172 .system = __stringify(TRACE_SYSTEM), \ 159 .system = __stringify(TRACE_SYSTEM), \
173 .define_fields = ftrace_define_fields_##call, \ 160 .define_fields = ftrace_define_fields_##call, \
174 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ 161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
175 .reg = regfn, \
176}; \ 162}; \
177 \ 163 \
178struct ftrace_event_call __used event_##call = { \ 164struct ftrace_event_call __used event_##call = { \
@@ -180,19 +166,8 @@ struct ftrace_event_call __used event_##call = { \
180 .event.type = etype, \ 166 .event.type = etype, \
181 .class = &event_class_ftrace_##call, \ 167 .class = &event_class_ftrace_##call, \
182 .print_fmt = print, \ 168 .print_fmt = print, \
183 .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \
184}; \ 169}; \
185struct ftrace_event_call __used \ 170struct ftrace_event_call __used \
186__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; 171__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
187 172
188#undef FTRACE_ENTRY
189#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \
190 FTRACE_ENTRY_REG(call, struct_name, etype, \
191 PARAMS(tstruct), PARAMS(print), filter, NULL)
192
193int ftrace_event_is_function(struct ftrace_event_call *call)
194{
195 return call == &event_function;
196}
197
198#include "trace_entries.h" 173#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8e3ad8082ab..c7b0c6a7db0 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -7,7 +7,7 @@
7 * Based on code from the latency_tracer, that is: 7 * Based on code from the latency_tracer, that is:
8 * 8 *
9 * Copyright (C) 2004-2006 Ingo Molnar 9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 Nadia Yvette Chambers 10 * Copyright (C) 2004 William Lee Irwin III
11 */ 11 */
12#include <linux/ring_buffer.h> 12#include <linux/ring_buffer.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
@@ -48,8 +48,7 @@ static void function_trace_start(struct trace_array *tr)
48} 48}
49 49
50static void 50static void
51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, 51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
52 struct ftrace_ops *op, struct pt_regs *pt_regs)
53{ 52{
54 struct trace_array *tr = func_trace; 53 struct trace_array *tr = func_trace;
55 struct trace_array_cpu *data; 54 struct trace_array_cpu *data;
@@ -75,17 +74,8 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
75 preempt_enable_notrace(); 74 preempt_enable_notrace();
76} 75}
77 76
78/* Our option */
79enum {
80 TRACE_FUNC_OPT_STACK = 0x1,
81};
82
83static struct tracer_flags func_flags;
84
85static void 77static void
86function_trace_call(unsigned long ip, unsigned long parent_ip, 78function_trace_call(unsigned long ip, unsigned long parent_ip)
87 struct ftrace_ops *op, struct pt_regs *pt_regs)
88
89{ 79{
90 struct trace_array *tr = func_trace; 80 struct trace_array *tr = func_trace;
91 struct trace_array_cpu *data; 81 struct trace_array_cpu *data;
@@ -116,8 +106,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
116} 106}
117 107
118static void 108static void
119function_stack_trace_call(unsigned long ip, unsigned long parent_ip, 109function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
120 struct ftrace_ops *op, struct pt_regs *pt_regs)
121{ 110{
122 struct trace_array *tr = func_trace; 111 struct trace_array *tr = func_trace;
123 struct trace_array_cpu *data; 112 struct trace_array_cpu *data;
@@ -160,13 +149,18 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
160static struct ftrace_ops trace_ops __read_mostly = 149static struct ftrace_ops trace_ops __read_mostly =
161{ 150{
162 .func = function_trace_call, 151 .func = function_trace_call,
163 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, 152 .flags = FTRACE_OPS_FL_GLOBAL,
164}; 153};
165 154
166static struct ftrace_ops trace_stack_ops __read_mostly = 155static struct ftrace_ops trace_stack_ops __read_mostly =
167{ 156{
168 .func = function_stack_trace_call, 157 .func = function_stack_trace_call,
169 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, 158 .flags = FTRACE_OPS_FL_GLOBAL,
159};
160
161/* Our two options */
162enum {
163 TRACE_FUNC_OPT_STACK = 0x1,
170}; 164};
171 165
172static struct tracer_opt func_opts[] = { 166static struct tracer_opt func_opts[] = {
@@ -210,11 +204,10 @@ static void tracing_stop_function_trace(void)
210 204
211static int func_set_flag(u32 old_flags, u32 bit, int set) 205static int func_set_flag(u32 old_flags, u32 bit, int set)
212{ 206{
213 switch (bit) { 207 if (bit == TRACE_FUNC_OPT_STACK) {
214 case TRACE_FUNC_OPT_STACK:
215 /* do nothing if already set */ 208 /* do nothing if already set */
216 if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK)) 209 if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
217 break; 210 return 0;
218 211
219 if (set) { 212 if (set) {
220 unregister_ftrace_function(&trace_ops); 213 unregister_ftrace_function(&trace_ops);
@@ -224,12 +217,10 @@ static int func_set_flag(u32 old_flags, u32 bit, int set)
224 register_ftrace_function(&trace_ops); 217 register_ftrace_function(&trace_ops);
225 } 218 }
226 219
227 break; 220 return 0;
228 default:
229 return -EINVAL;
230 } 221 }
231 222
232 return 0; 223 return -EINVAL;
233} 224}
234 225
235static struct tracer function_trace __read_mostly = 226static struct tracer function_trace __read_mostly =
@@ -366,7 +357,7 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash,
366 * We use the callback data field (which is a pointer) 357 * We use the callback data field (which is a pointer)
367 * as our counter. 358 * as our counter.
368 */ 359 */
369 ret = kstrtoul(number, 0, (unsigned long *)&count); 360 ret = strict_strtoul(number, 0, (unsigned long *)&count);
370 if (ret) 361 if (ret)
371 return ret; 362 return ret;
372 363
@@ -411,4 +402,5 @@ static __init int init_function_trace(void)
411 init_func_cmd_traceon(); 402 init_func_cmd_traceon();
412 return register_tracer(&function_trace); 403 return register_tracer(&function_trace);
413} 404}
414core_initcall(init_function_trace); 405device_initcall(init_function_trace);
406
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4edb4b74eb7..a7d2a4c653d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -143,7 +143,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
143 return; 143 return;
144 } 144 }
145 145
146#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY) 146#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
147 /* 147 /*
148 * The arch may choose to record the frame pointer used 148 * The arch may choose to record the frame pointer used
149 * and check it here to make sure that it is what we expect it 149 * and check it here to make sure that it is what we expect it
@@ -154,9 +154,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
154 * 154 *
155 * Currently, x86_32 with optimize for size (-Os) makes the latest 155 * Currently, x86_32 with optimize for size (-Os) makes the latest
156 * gcc do the above. 156 * gcc do the above.
157 *
158 * Note, -mfentry does not use frame pointers, and this test
159 * is not needed if CC_USING_FENTRY is set.
160 */ 157 */
161 if (unlikely(current->ret_stack[index].fp != frame_pointer)) { 158 if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
162 ftrace_graph_stop(); 159 ftrace_graph_stop();
@@ -223,7 +220,7 @@ int __trace_graph_entry(struct trace_array *tr,
223 entry = ring_buffer_event_data(event); 220 entry = ring_buffer_event_data(event);
224 entry->graph_ent = *trace; 221 entry->graph_ent = *trace;
225 if (!filter_current_check_discard(buffer, call, entry, event)) 222 if (!filter_current_check_discard(buffer, call, entry, event))
226 __buffer_unlock_commit(buffer, event); 223 ring_buffer_unlock_commit(buffer, event);
227 224
228 return 1; 225 return 1;
229} 226}
@@ -327,7 +324,7 @@ void __trace_graph_return(struct trace_array *tr,
327 entry = ring_buffer_event_data(event); 324 entry = ring_buffer_event_data(event);
328 entry->ret = *trace; 325 entry->ret = *trace;
329 if (!filter_current_check_discard(buffer, call, entry, event)) 326 if (!filter_current_check_discard(buffer, call, entry, event))
330 __buffer_unlock_commit(buffer, event); 327 ring_buffer_unlock_commit(buffer, event);
331} 328}
332 329
333void trace_graph_return(struct ftrace_graph_ret *trace) 330void trace_graph_return(struct ftrace_graph_ret *trace)
@@ -541,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter,
541 next = &data->ret; 538 next = &data->ret;
542 } else { 539 } else {
543 540
544 ring_iter = trace_buffer_iter(iter, iter->cpu); 541 ring_iter = iter->buffer_iter[iter->cpu];
545 542
546 /* First peek to compare current entry and the next one */ 543 /* First peek to compare current entry and the next one */
547 if (ring_iter) 544 if (ring_iter)
@@ -1474,4 +1471,4 @@ static __init int init_graph_trace(void)
1474 return register_tracer(&graph_trace); 1471 return register_tracer(&graph_trace);
1475} 1472}
1476 1473
1477core_initcall(init_graph_trace); 1474device_initcall(init_graph_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 713a2cac488..667aa8cc0cf 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -7,7 +7,7 @@
7 * From code in the latency_tracer, that is: 7 * From code in the latency_tracer, that is:
8 * 8 *
9 * Copyright (C) 2004-2006 Ingo Molnar 9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 Nadia Yvette Chambers 10 * Copyright (C) 2004 William Lee Irwin III
11 */ 11 */
12#include <linux/kallsyms.h> 12#include <linux/kallsyms.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
@@ -23,7 +23,7 @@ static int tracer_enabled __read_mostly;
23 23
24static DEFINE_PER_CPU(int, tracing_cpu); 24static DEFINE_PER_CPU(int, tracing_cpu);
25 25
26static DEFINE_RAW_SPINLOCK(max_trace_lock); 26static DEFINE_SPINLOCK(max_trace_lock);
27 27
28enum { 28enum {
29 TRACER_IRQS_OFF = (1 << 1), 29 TRACER_IRQS_OFF = (1 << 1),
@@ -136,8 +136,7 @@ static int func_prolog_dec(struct trace_array *tr,
136 * irqsoff uses its own tracer function to keep the overhead down: 136 * irqsoff uses its own tracer function to keep the overhead down:
137 */ 137 */
138static void 138static void
139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, 139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
140 struct ftrace_ops *op, struct pt_regs *pt_regs)
141{ 140{
142 struct trace_array *tr = irqsoff_trace; 141 struct trace_array *tr = irqsoff_trace;
143 struct trace_array_cpu *data; 142 struct trace_array_cpu *data;
@@ -154,7 +153,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
154static struct ftrace_ops trace_ops __read_mostly = 153static struct ftrace_ops trace_ops __read_mostly =
155{ 154{
156 .func = irqsoff_tracer_call, 155 .func = irqsoff_tracer_call,
157 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, 156 .flags = FTRACE_OPS_FL_GLOBAL,
158}; 157};
159#endif /* CONFIG_FUNCTION_TRACER */ 158#endif /* CONFIG_FUNCTION_TRACER */
160 159
@@ -281,20 +280,9 @@ static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
281} 280}
282 281
283static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } 282static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
283static void irqsoff_print_header(struct seq_file *s) { }
284static void irqsoff_trace_open(struct trace_iterator *iter) { } 284static void irqsoff_trace_open(struct trace_iterator *iter) { }
285static void irqsoff_trace_close(struct trace_iterator *iter) { } 285static void irqsoff_trace_close(struct trace_iterator *iter) { }
286
287#ifdef CONFIG_FUNCTION_TRACER
288static void irqsoff_print_header(struct seq_file *s)
289{
290 trace_default_header(s);
291}
292#else
293static void irqsoff_print_header(struct seq_file *s)
294{
295 trace_latency_header(s);
296}
297#endif /* CONFIG_FUNCTION_TRACER */
298#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 286#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
299 287
300/* 288/*
@@ -333,7 +321,7 @@ check_critical_timing(struct trace_array *tr,
333 if (!report_latency(delta)) 321 if (!report_latency(delta))
334 goto out; 322 goto out;
335 323
336 raw_spin_lock_irqsave(&max_trace_lock, flags); 324 spin_lock_irqsave(&max_trace_lock, flags);
337 325
338 /* check if we are still the max latency */ 326 /* check if we are still the max latency */
339 if (!report_latency(delta)) 327 if (!report_latency(delta))
@@ -356,7 +344,7 @@ check_critical_timing(struct trace_array *tr,
356 max_sequence++; 344 max_sequence++;
357 345
358out_unlock: 346out_unlock:
359 raw_spin_unlock_irqrestore(&max_trace_lock, flags); 347 spin_unlock_irqrestore(&max_trace_lock, flags);
360 348
361out: 349out:
362 data->critical_sequence = max_sequence; 350 data->critical_sequence = max_sequence;
@@ -517,13 +505,13 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
517#ifdef CONFIG_PREEMPT_TRACER 505#ifdef CONFIG_PREEMPT_TRACER
518void trace_preempt_on(unsigned long a0, unsigned long a1) 506void trace_preempt_on(unsigned long a0, unsigned long a1)
519{ 507{
520 if (preempt_trace() && !irq_trace()) 508 if (preempt_trace())
521 stop_critical_timing(a0, a1); 509 stop_critical_timing(a0, a1);
522} 510}
523 511
524void trace_preempt_off(unsigned long a0, unsigned long a1) 512void trace_preempt_off(unsigned long a0, unsigned long a1)
525{ 513{
526 if (preempt_trace() && !irq_trace()) 514 if (preempt_trace())
527 start_critical_timing(a0, a1); 515 start_critical_timing(a0, a1);
528} 516}
529#endif /* CONFIG_PREEMPT_TRACER */ 517#endif /* CONFIG_PREEMPT_TRACER */
@@ -604,7 +592,7 @@ static struct tracer irqsoff_tracer __read_mostly =
604 .reset = irqsoff_tracer_reset, 592 .reset = irqsoff_tracer_reset,
605 .start = irqsoff_tracer_start, 593 .start = irqsoff_tracer_start,
606 .stop = irqsoff_tracer_stop, 594 .stop = irqsoff_tracer_stop,
607 .print_max = true, 595 .print_max = 1,
608 .print_header = irqsoff_print_header, 596 .print_header = irqsoff_print_header,
609 .print_line = irqsoff_print_line, 597 .print_line = irqsoff_print_line,
610 .flags = &tracer_flags, 598 .flags = &tracer_flags,
@@ -614,7 +602,7 @@ static struct tracer irqsoff_tracer __read_mostly =
614#endif 602#endif
615 .open = irqsoff_trace_open, 603 .open = irqsoff_trace_open,
616 .close = irqsoff_trace_close, 604 .close = irqsoff_trace_close,
617 .use_max_tr = true, 605 .use_max_tr = 1,
618}; 606};
619# define register_irqsoff(trace) register_tracer(&trace) 607# define register_irqsoff(trace) register_tracer(&trace)
620#else 608#else
@@ -637,7 +625,7 @@ static struct tracer preemptoff_tracer __read_mostly =
637 .reset = irqsoff_tracer_reset, 625 .reset = irqsoff_tracer_reset,
638 .start = irqsoff_tracer_start, 626 .start = irqsoff_tracer_start,
639 .stop = irqsoff_tracer_stop, 627 .stop = irqsoff_tracer_stop,
640 .print_max = true, 628 .print_max = 1,
641 .print_header = irqsoff_print_header, 629 .print_header = irqsoff_print_header,
642 .print_line = irqsoff_print_line, 630 .print_line = irqsoff_print_line,
643 .flags = &tracer_flags, 631 .flags = &tracer_flags,
@@ -647,7 +635,7 @@ static struct tracer preemptoff_tracer __read_mostly =
647#endif 635#endif
648 .open = irqsoff_trace_open, 636 .open = irqsoff_trace_open,
649 .close = irqsoff_trace_close, 637 .close = irqsoff_trace_close,
650 .use_max_tr = true, 638 .use_max_tr = 1,
651}; 639};
652# define register_preemptoff(trace) register_tracer(&trace) 640# define register_preemptoff(trace) register_tracer(&trace)
653#else 641#else
@@ -672,7 +660,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
672 .reset = irqsoff_tracer_reset, 660 .reset = irqsoff_tracer_reset,
673 .start = irqsoff_tracer_start, 661 .start = irqsoff_tracer_start,
674 .stop = irqsoff_tracer_stop, 662 .stop = irqsoff_tracer_stop,
675 .print_max = true, 663 .print_max = 1,
676 .print_header = irqsoff_print_header, 664 .print_header = irqsoff_print_header,
677 .print_line = irqsoff_print_line, 665 .print_line = irqsoff_print_line,
678 .flags = &tracer_flags, 666 .flags = &tracer_flags,
@@ -682,7 +670,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
682#endif 670#endif
683 .open = irqsoff_trace_open, 671 .open = irqsoff_trace_open,
684 .close = irqsoff_trace_close, 672 .close = irqsoff_trace_close,
685 .use_max_tr = true, 673 .use_max_tr = 1,
686}; 674};
687 675
688# define register_preemptirqsoff(trace) register_tracer(&trace) 676# define register_preemptirqsoff(trace) register_tracer(&trace)
@@ -698,4 +686,4 @@ __init static int init_irqsoff_tracer(void)
698 686
699 return 0; 687 return 0;
700} 688}
701core_initcall(init_irqsoff_tracer); 689device_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 1865d5f7653..00d527c945a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -19,15 +19,547 @@
19 19
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <asm/bitsperlong.h>
35
36#include "trace.h"
37#include "trace_output.h"
38
39#define MAX_TRACE_ARGS 128
40#define MAX_ARGSTR_LEN 63
41#define MAX_EVENT_NAME_LEN 64
42#define MAX_STRING_SIZE PATH_MAX
43#define KPROBE_EVENT_SYSTEM "kprobes"
22 44
23#include "trace_probe.h" 45/* Reserved field names */
46#define FIELD_STRING_IP "__probe_ip"
47#define FIELD_STRING_RETIP "__probe_ret_ip"
48#define FIELD_STRING_FUNC "__probe_func"
49
50const char *reserved_field_names[] = {
51 "common_type",
52 "common_flags",
53 "common_preempt_count",
54 "common_pid",
55 "common_tgid",
56 FIELD_STRING_IP,
57 FIELD_STRING_RETIP,
58 FIELD_STRING_FUNC,
59};
24 60
25#define KPROBE_EVENT_SYSTEM "kprobes" 61/* Printing function type */
62typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
63 void *);
64#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
65#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
66
67/* Printing in basic type function template */
68#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
69static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
70 const char *name, \
71 void *data, void *ent)\
72{ \
73 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
74} \
75static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
76
77DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
78DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
80DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
82DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
83DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
84DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
85
86/* data_rloc: data relative location, compatible with u32 */
87#define make_data_rloc(len, roffs) \
88 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
89#define get_rloc_len(dl) ((u32)(dl) >> 16)
90#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
91
92static inline void *get_rloc_data(u32 *dl)
93{
94 return (u8 *)dl + get_rloc_offs(*dl);
95}
96
97/* For data_loc conversion */
98static inline void *get_loc_data(u32 *dl, void *ent)
99{
100 return (u8 *)ent + get_rloc_offs(*dl);
101}
102
103/*
104 * Convert data_rloc to data_loc:
105 * data_rloc stores the offset from data_rloc itself, but data_loc
106 * stores the offset from event entry.
107 */
108#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
109
110/* For defining macros, define string/string_size types */
111typedef u32 string;
112typedef u32 string_size;
113
114/* Print type function for string type */
115static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
116 const char *name,
117 void *data, void *ent)
118{
119 int len = *(u32 *)data >> 16;
120
121 if (!len)
122 return trace_seq_printf(s, " %s=(fault)", name);
123 else
124 return trace_seq_printf(s, " %s=\"%s\"", name,
125 (const char *)get_loc_data(data, ent));
126}
127static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
128
129/* Data fetch function type */
130typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
131
132struct fetch_param {
133 fetch_func_t fn;
134 void *data;
135};
136
137static __kprobes void call_fetch(struct fetch_param *fprm,
138 struct pt_regs *regs, void *dest)
139{
140 return fprm->fn(regs, fprm->data, dest);
141}
142
143#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
144/*
145 * Define macro for basic types - we don't need to define s* types, because
146 * we have to care only about bitwidth at recording time.
147 */
148#define DEFINE_BASIC_FETCH_FUNCS(method) \
149DEFINE_FETCH_##method(u8) \
150DEFINE_FETCH_##method(u16) \
151DEFINE_FETCH_##method(u32) \
152DEFINE_FETCH_##method(u64)
153
154#define CHECK_FETCH_FUNCS(method, fn) \
155 (((FETCH_FUNC_NAME(method, u8) == fn) || \
156 (FETCH_FUNC_NAME(method, u16) == fn) || \
157 (FETCH_FUNC_NAME(method, u32) == fn) || \
158 (FETCH_FUNC_NAME(method, u64) == fn) || \
159 (FETCH_FUNC_NAME(method, string) == fn) || \
160 (FETCH_FUNC_NAME(method, string_size) == fn)) \
161 && (fn != NULL))
162
163/* Data fetch function templates */
164#define DEFINE_FETCH_reg(type) \
165static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
166 void *offset, void *dest) \
167{ \
168 *(type *)dest = (type)regs_get_register(regs, \
169 (unsigned int)((unsigned long)offset)); \
170}
171DEFINE_BASIC_FETCH_FUNCS(reg)
172/* No string on the register */
173#define fetch_reg_string NULL
174#define fetch_reg_string_size NULL
175
176#define DEFINE_FETCH_stack(type) \
177static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
178 void *offset, void *dest) \
179{ \
180 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
181 (unsigned int)((unsigned long)offset)); \
182}
183DEFINE_BASIC_FETCH_FUNCS(stack)
184/* No string on the stack entry */
185#define fetch_stack_string NULL
186#define fetch_stack_string_size NULL
187
188#define DEFINE_FETCH_retval(type) \
189static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
190 void *dummy, void *dest) \
191{ \
192 *(type *)dest = (type)regs_return_value(regs); \
193}
194DEFINE_BASIC_FETCH_FUNCS(retval)
195/* No string on the retval */
196#define fetch_retval_string NULL
197#define fetch_retval_string_size NULL
198
199#define DEFINE_FETCH_memory(type) \
200static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
201 void *addr, void *dest) \
202{ \
203 type retval; \
204 if (probe_kernel_address(addr, retval)) \
205 *(type *)dest = 0; \
206 else \
207 *(type *)dest = retval; \
208}
209DEFINE_BASIC_FETCH_FUNCS(memory)
210/*
211 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
212 * length and relative data location.
213 */
214static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
215 void *addr, void *dest)
216{
217 long ret;
218 int maxlen = get_rloc_len(*(u32 *)dest);
219 u8 *dst = get_rloc_data(dest);
220 u8 *src = addr;
221 mm_segment_t old_fs = get_fs();
222 if (!maxlen)
223 return;
224 /*
225 * Try to get string again, since the string can be changed while
226 * probing.
227 */
228 set_fs(KERNEL_DS);
229 pagefault_disable();
230 do
231 ret = __copy_from_user_inatomic(dst++, src++, 1);
232 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
233 dst[-1] = '\0';
234 pagefault_enable();
235 set_fs(old_fs);
236
237 if (ret < 0) { /* Failed to fetch string */
238 ((u8 *)get_rloc_data(dest))[0] = '\0';
239 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
240 } else
241 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
242 get_rloc_offs(*(u32 *)dest));
243}
244/* Return the length of string -- including null terminal byte */
245static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
246 void *addr, void *dest)
247{
248 int ret, len = 0;
249 u8 c;
250 mm_segment_t old_fs = get_fs();
251
252 set_fs(KERNEL_DS);
253 pagefault_disable();
254 do {
255 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
256 len++;
257 } while (c && ret == 0 && len < MAX_STRING_SIZE);
258 pagefault_enable();
259 set_fs(old_fs);
260
261 if (ret < 0) /* Failed to check the length */
262 *(u32 *)dest = 0;
263 else
264 *(u32 *)dest = len;
265}
266
267/* Memory fetching by symbol */
268struct symbol_cache {
269 char *symbol;
270 long offset;
271 unsigned long addr;
272};
273
274static unsigned long update_symbol_cache(struct symbol_cache *sc)
275{
276 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
277 if (sc->addr)
278 sc->addr += sc->offset;
279 return sc->addr;
280}
281
282static void free_symbol_cache(struct symbol_cache *sc)
283{
284 kfree(sc->symbol);
285 kfree(sc);
286}
287
288static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
289{
290 struct symbol_cache *sc;
291
292 if (!sym || strlen(sym) == 0)
293 return NULL;
294 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
295 if (!sc)
296 return NULL;
297
298 sc->symbol = kstrdup(sym, GFP_KERNEL);
299 if (!sc->symbol) {
300 kfree(sc);
301 return NULL;
302 }
303 sc->offset = offset;
304
305 update_symbol_cache(sc);
306 return sc;
307}
308
309#define DEFINE_FETCH_symbol(type) \
310static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
311 void *data, void *dest) \
312{ \
313 struct symbol_cache *sc = data; \
314 if (sc->addr) \
315 fetch_memory_##type(regs, (void *)sc->addr, dest); \
316 else \
317 *(type *)dest = 0; \
318}
319DEFINE_BASIC_FETCH_FUNCS(symbol)
320DEFINE_FETCH_symbol(string)
321DEFINE_FETCH_symbol(string_size)
322
323/* Dereference memory access function */
324struct deref_fetch_param {
325 struct fetch_param orig;
326 long offset;
327};
328
329#define DEFINE_FETCH_deref(type) \
330static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
331 void *data, void *dest) \
332{ \
333 struct deref_fetch_param *dprm = data; \
334 unsigned long addr; \
335 call_fetch(&dprm->orig, regs, &addr); \
336 if (addr) { \
337 addr += dprm->offset; \
338 fetch_memory_##type(regs, (void *)addr, dest); \
339 } else \
340 *(type *)dest = 0; \
341}
342DEFINE_BASIC_FETCH_FUNCS(deref)
343DEFINE_FETCH_deref(string)
344DEFINE_FETCH_deref(string_size)
345
346static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
347{
348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
349 update_deref_fetch_param(data->orig.data);
350 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
351 update_symbol_cache(data->orig.data);
352}
353
354static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
355{
356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
357 free_deref_fetch_param(data->orig.data);
358 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
359 free_symbol_cache(data->orig.data);
360 kfree(data);
361}
362
363/* Bitfield fetch function */
364struct bitfield_fetch_param {
365 struct fetch_param orig;
366 unsigned char hi_shift;
367 unsigned char low_shift;
368};
369
370#define DEFINE_FETCH_bitfield(type) \
371static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
372 void *data, void *dest) \
373{ \
374 struct bitfield_fetch_param *bprm = data; \
375 type buf = 0; \
376 call_fetch(&bprm->orig, regs, &buf); \
377 if (buf) { \
378 buf <<= bprm->hi_shift; \
379 buf >>= bprm->low_shift; \
380 } \
381 *(type *)dest = buf; \
382}
383DEFINE_BASIC_FETCH_FUNCS(bitfield)
384#define fetch_bitfield_string NULL
385#define fetch_bitfield_string_size NULL
386
387static __kprobes void
388update_bitfield_fetch_param(struct bitfield_fetch_param *data)
389{
390 /*
391 * Don't check the bitfield itself, because this must be the
392 * last fetch function.
393 */
394 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
395 update_deref_fetch_param(data->orig.data);
396 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
397 update_symbol_cache(data->orig.data);
398}
399
400static __kprobes void
401free_bitfield_fetch_param(struct bitfield_fetch_param *data)
402{
403 /*
404 * Don't check the bitfield itself, because this must be the
405 * last fetch function.
406 */
407 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
408 free_deref_fetch_param(data->orig.data);
409 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
410 free_symbol_cache(data->orig.data);
411 kfree(data);
412}
413
414/* Default (unsigned long) fetch type */
415#define __DEFAULT_FETCH_TYPE(t) u##t
416#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
417#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
418#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
419
420/* Fetch types */
421enum {
422 FETCH_MTD_reg = 0,
423 FETCH_MTD_stack,
424 FETCH_MTD_retval,
425 FETCH_MTD_memory,
426 FETCH_MTD_symbol,
427 FETCH_MTD_deref,
428 FETCH_MTD_bitfield,
429 FETCH_MTD_END,
430};
431
432#define ASSIGN_FETCH_FUNC(method, type) \
433 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
434
435#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
436 {.name = _name, \
437 .size = _size, \
438 .is_signed = sign, \
439 .print = PRINT_TYPE_FUNC_NAME(ptype), \
440 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
441 .fmttype = _fmttype, \
442 .fetch = { \
443ASSIGN_FETCH_FUNC(reg, ftype), \
444ASSIGN_FETCH_FUNC(stack, ftype), \
445ASSIGN_FETCH_FUNC(retval, ftype), \
446ASSIGN_FETCH_FUNC(memory, ftype), \
447ASSIGN_FETCH_FUNC(symbol, ftype), \
448ASSIGN_FETCH_FUNC(deref, ftype), \
449ASSIGN_FETCH_FUNC(bitfield, ftype), \
450 } \
451 }
452
453#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
454 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
455
456#define FETCH_TYPE_STRING 0
457#define FETCH_TYPE_STRSIZE 1
458
459/* Fetch type information table */
460static const struct fetch_type {
461 const char *name; /* Name of type */
462 size_t size; /* Byte size of type */
463 int is_signed; /* Signed flag */
464 print_type_func_t print; /* Print functions */
465 const char *fmt; /* Fromat string */
466 const char *fmttype; /* Name in format file */
467 /* Fetch functions */
468 fetch_func_t fetch[FETCH_MTD_END];
469} fetch_type_table[] = {
470 /* Special types */
471 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
472 sizeof(u32), 1, "__data_loc char[]"),
473 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
474 string_size, sizeof(u32), 0, "u32"),
475 /* Basic types */
476 ASSIGN_FETCH_TYPE(u8, u8, 0),
477 ASSIGN_FETCH_TYPE(u16, u16, 0),
478 ASSIGN_FETCH_TYPE(u32, u32, 0),
479 ASSIGN_FETCH_TYPE(u64, u64, 0),
480 ASSIGN_FETCH_TYPE(s8, u8, 1),
481 ASSIGN_FETCH_TYPE(s16, u16, 1),
482 ASSIGN_FETCH_TYPE(s32, u32, 1),
483 ASSIGN_FETCH_TYPE(s64, u64, 1),
484};
485
486static const struct fetch_type *find_fetch_type(const char *type)
487{
488 int i;
489
490 if (!type)
491 type = DEFAULT_FETCH_TYPE_STR;
492
493 /* Special case: bitfield */
494 if (*type == 'b') {
495 unsigned long bs;
496 type = strchr(type, '/');
497 if (!type)
498 goto fail;
499 type++;
500 if (strict_strtoul(type, 0, &bs))
501 goto fail;
502 switch (bs) {
503 case 8:
504 return find_fetch_type("u8");
505 case 16:
506 return find_fetch_type("u16");
507 case 32:
508 return find_fetch_type("u32");
509 case 64:
510 return find_fetch_type("u64");
511 default:
512 goto fail;
513 }
514 }
515
516 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
517 if (strcmp(type, fetch_type_table[i].name) == 0)
518 return &fetch_type_table[i];
519fail:
520 return NULL;
521}
522
523/* Special function : only accept unsigned long */
524static __kprobes void fetch_stack_address(struct pt_regs *regs,
525 void *dummy, void *dest)
526{
527 *(unsigned long *)dest = kernel_stack_pointer(regs);
528}
529
530static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
531 fetch_func_t orig_fn)
532{
533 int i;
534
535 if (type != &fetch_type_table[FETCH_TYPE_STRING])
536 return NULL; /* Only string type needs size function */
537 for (i = 0; i < FETCH_MTD_END; i++)
538 if (type->fetch[i] == orig_fn)
539 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
540
541 WARN_ON(1); /* This should not happen */
542 return NULL;
543}
26 544
27/** 545/**
28 * Kprobe event core functions 546 * Kprobe event core functions
29 */ 547 */
30 548
549struct probe_arg {
550 struct fetch_param fetch;
551 struct fetch_param fetch_size;
552 unsigned int offset; /* Offset from argument entry */
553 const char *name; /* Name of this argument */
554 const char *comm; /* Command of this argument */
555 const struct fetch_type *type; /* Type of this argument */
556};
557
558/* Flags for trace_probe */
559#define TP_FLAG_TRACE 1
560#define TP_FLAG_PROFILE 2
561#define TP_FLAG_REGISTERED 4
562
31struct trace_probe { 563struct trace_probe {
32 struct list_head list; 564 struct list_head list;
33 struct kretprobe rp; /* Use rp.kp for kprobe use */ 565 struct kretprobe rp; /* Use rp.kp for kprobe use */
@@ -99,6 +631,18 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
99static int kretprobe_dispatcher(struct kretprobe_instance *ri, 631static int kretprobe_dispatcher(struct kretprobe_instance *ri,
100 struct pt_regs *regs); 632 struct pt_regs *regs);
101 633
634/* Check the name is good for event/group/fields */
635static int is_good_name(const char *name)
636{
637 if (!isalpha(*name) && *name != '_')
638 return 0;
639 while (*++name != '\0') {
640 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
641 return 0;
642 }
643 return 1;
644}
645
102/* 646/*
103 * Allocate new trace_probe and initialize it (including kprobes). 647 * Allocate new trace_probe and initialize it (including kprobes).
104 */ 648 */
@@ -107,7 +651,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
107 void *addr, 651 void *addr,
108 const char *symbol, 652 const char *symbol,
109 unsigned long offs, 653 unsigned long offs,
110 int nargs, bool is_return) 654 int nargs, int is_return)
111{ 655{
112 struct trace_probe *tp; 656 struct trace_probe *tp;
113 int ret = -ENOMEM; 657 int ret = -ENOMEM;
@@ -158,12 +702,34 @@ error:
158 return ERR_PTR(ret); 702 return ERR_PTR(ret);
159} 703}
160 704
705static void update_probe_arg(struct probe_arg *arg)
706{
707 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
708 update_bitfield_fetch_param(arg->fetch.data);
709 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
710 update_deref_fetch_param(arg->fetch.data);
711 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
712 update_symbol_cache(arg->fetch.data);
713}
714
715static void free_probe_arg(struct probe_arg *arg)
716{
717 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
718 free_bitfield_fetch_param(arg->fetch.data);
719 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
720 free_deref_fetch_param(arg->fetch.data);
721 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
722 free_symbol_cache(arg->fetch.data);
723 kfree(arg->name);
724 kfree(arg->comm);
725}
726
161static void free_trace_probe(struct trace_probe *tp) 727static void free_trace_probe(struct trace_probe *tp)
162{ 728{
163 int i; 729 int i;
164 730
165 for (i = 0; i < tp->nr_args; i++) 731 for (i = 0; i < tp->nr_args; i++)
166 traceprobe_free_probe_arg(&tp->args[i]); 732 free_probe_arg(&tp->args[i]);
167 733
168 kfree(tp->call.class->system); 734 kfree(tp->call.class->system);
169 kfree(tp->call.name); 735 kfree(tp->call.name);
@@ -221,7 +787,7 @@ static int __register_trace_probe(struct trace_probe *tp)
221 return -EINVAL; 787 return -EINVAL;
222 788
223 for (i = 0; i < tp->nr_args; i++) 789 for (i = 0; i < tp->nr_args; i++)
224 traceprobe_update_arg(&tp->args[i]); 790 update_probe_arg(&tp->args[i]);
225 791
226 /* Set/clear disabled flag according to tp->flag */ 792 /* Set/clear disabled flag according to tp->flag */
227 if (trace_probe_is_enabled(tp)) 793 if (trace_probe_is_enabled(tp))
@@ -353,6 +919,227 @@ static struct notifier_block trace_probe_module_nb = {
353 .priority = 1 /* Invoked after kprobe module callback */ 919 .priority = 1 /* Invoked after kprobe module callback */
354}; 920};
355 921
922/* Split symbol and offset. */
923static int split_symbol_offset(char *symbol, unsigned long *offset)
924{
925 char *tmp;
926 int ret;
927
928 if (!offset)
929 return -EINVAL;
930
931 tmp = strchr(symbol, '+');
932 if (tmp) {
933 /* skip sign because strict_strtol doesn't accept '+' */
934 ret = strict_strtoul(tmp + 1, 0, offset);
935 if (ret)
936 return ret;
937 *tmp = '\0';
938 } else
939 *offset = 0;
940 return 0;
941}
942
943#define PARAM_MAX_ARGS 16
944#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
945
946static int parse_probe_vars(char *arg, const struct fetch_type *t,
947 struct fetch_param *f, int is_return)
948{
949 int ret = 0;
950 unsigned long param;
951
952 if (strcmp(arg, "retval") == 0) {
953 if (is_return)
954 f->fn = t->fetch[FETCH_MTD_retval];
955 else
956 ret = -EINVAL;
957 } else if (strncmp(arg, "stack", 5) == 0) {
958 if (arg[5] == '\0') {
959 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
960 f->fn = fetch_stack_address;
961 else
962 ret = -EINVAL;
963 } else if (isdigit(arg[5])) {
964 ret = strict_strtoul(arg + 5, 10, &param);
965 if (ret || param > PARAM_MAX_STACK)
966 ret = -EINVAL;
967 else {
968 f->fn = t->fetch[FETCH_MTD_stack];
969 f->data = (void *)param;
970 }
971 } else
972 ret = -EINVAL;
973 } else
974 ret = -EINVAL;
975 return ret;
976}
977
978/* Recursive argument parser */
979static int __parse_probe_arg(char *arg, const struct fetch_type *t,
980 struct fetch_param *f, int is_return)
981{
982 int ret = 0;
983 unsigned long param;
984 long offset;
985 char *tmp;
986
987 switch (arg[0]) {
988 case '$':
989 ret = parse_probe_vars(arg + 1, t, f, is_return);
990 break;
991 case '%': /* named register */
992 ret = regs_query_register_offset(arg + 1);
993 if (ret >= 0) {
994 f->fn = t->fetch[FETCH_MTD_reg];
995 f->data = (void *)(unsigned long)ret;
996 ret = 0;
997 }
998 break;
999 case '@': /* memory or symbol */
1000 if (isdigit(arg[1])) {
1001 ret = strict_strtoul(arg + 1, 0, &param);
1002 if (ret)
1003 break;
1004 f->fn = t->fetch[FETCH_MTD_memory];
1005 f->data = (void *)param;
1006 } else {
1007 ret = split_symbol_offset(arg + 1, &offset);
1008 if (ret)
1009 break;
1010 f->data = alloc_symbol_cache(arg + 1, offset);
1011 if (f->data)
1012 f->fn = t->fetch[FETCH_MTD_symbol];
1013 }
1014 break;
1015 case '+': /* deref memory */
1016 arg++; /* Skip '+', because strict_strtol() rejects it. */
1017 case '-':
1018 tmp = strchr(arg, '(');
1019 if (!tmp)
1020 break;
1021 *tmp = '\0';
1022 ret = strict_strtol(arg, 0, &offset);
1023 if (ret)
1024 break;
1025 arg = tmp + 1;
1026 tmp = strrchr(arg, ')');
1027 if (tmp) {
1028 struct deref_fetch_param *dprm;
1029 const struct fetch_type *t2 = find_fetch_type(NULL);
1030 *tmp = '\0';
1031 dprm = kzalloc(sizeof(struct deref_fetch_param),
1032 GFP_KERNEL);
1033 if (!dprm)
1034 return -ENOMEM;
1035 dprm->offset = offset;
1036 ret = __parse_probe_arg(arg, t2, &dprm->orig,
1037 is_return);
1038 if (ret)
1039 kfree(dprm);
1040 else {
1041 f->fn = t->fetch[FETCH_MTD_deref];
1042 f->data = (void *)dprm;
1043 }
1044 }
1045 break;
1046 }
1047 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
1048 pr_info("%s type has no corresponding fetch method.\n",
1049 t->name);
1050 ret = -EINVAL;
1051 }
1052 return ret;
1053}
1054
1055#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
1056
1057/* Bitfield type needs to be parsed into a fetch function */
1058static int __parse_bitfield_probe_arg(const char *bf,
1059 const struct fetch_type *t,
1060 struct fetch_param *f)
1061{
1062 struct bitfield_fetch_param *bprm;
1063 unsigned long bw, bo;
1064 char *tail;
1065
1066 if (*bf != 'b')
1067 return 0;
1068
1069 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1070 if (!bprm)
1071 return -ENOMEM;
1072 bprm->orig = *f;
1073 f->fn = t->fetch[FETCH_MTD_bitfield];
1074 f->data = (void *)bprm;
1075
1076 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
1077 if (bw == 0 || *tail != '@')
1078 return -EINVAL;
1079
1080 bf = tail + 1;
1081 bo = simple_strtoul(bf, &tail, 0);
1082 if (tail == bf || *tail != '/')
1083 return -EINVAL;
1084
1085 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
1086 bprm->low_shift = bprm->hi_shift + bo;
1087 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
1088}
1089
1090/* String length checking wrapper */
1091static int parse_probe_arg(char *arg, struct trace_probe *tp,
1092 struct probe_arg *parg, int is_return)
1093{
1094 const char *t;
1095 int ret;
1096
1097 if (strlen(arg) > MAX_ARGSTR_LEN) {
1098 pr_info("Argument is too long.: %s\n", arg);
1099 return -ENOSPC;
1100 }
1101 parg->comm = kstrdup(arg, GFP_KERNEL);
1102 if (!parg->comm) {
1103 pr_info("Failed to allocate memory for command '%s'.\n", arg);
1104 return -ENOMEM;
1105 }
1106 t = strchr(parg->comm, ':');
1107 if (t) {
1108 arg[t - parg->comm] = '\0';
1109 t++;
1110 }
1111 parg->type = find_fetch_type(t);
1112 if (!parg->type) {
1113 pr_info("Unsupported type: %s\n", t);
1114 return -EINVAL;
1115 }
1116 parg->offset = tp->size;
1117 tp->size += parg->type->size;
1118 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
1119 if (ret >= 0 && t != NULL)
1120 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
1121 if (ret >= 0) {
1122 parg->fetch_size.fn = get_fetch_size_function(parg->type,
1123 parg->fetch.fn);
1124 parg->fetch_size.data = parg->fetch.data;
1125 }
1126 return ret;
1127}
1128
1129/* Return 1 if name is reserved or already used by another argument */
1130static int conflict_field_name(const char *name,
1131 struct probe_arg *args, int narg)
1132{
1133 int i;
1134 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
1135 if (strcmp(reserved_field_names[i], name) == 0)
1136 return 1;
1137 for (i = 0; i < narg; i++)
1138 if (strcmp(args[i].name, name) == 0)
1139 return 1;
1140 return 0;
1141}
1142
356static int create_trace_probe(int argc, char **argv) 1143static int create_trace_probe(int argc, char **argv)
357{ 1144{
358 /* 1145 /*
@@ -375,7 +1162,7 @@ static int create_trace_probe(int argc, char **argv)
375 */ 1162 */
376 struct trace_probe *tp; 1163 struct trace_probe *tp;
377 int i, ret = 0; 1164 int i, ret = 0;
378 bool is_return = false, is_delete = false; 1165 int is_return = 0, is_delete = 0;
379 char *symbol = NULL, *event = NULL, *group = NULL; 1166 char *symbol = NULL, *event = NULL, *group = NULL;
380 char *arg; 1167 char *arg;
381 unsigned long offset = 0; 1168 unsigned long offset = 0;
@@ -384,11 +1171,11 @@ static int create_trace_probe(int argc, char **argv)
384 1171
385 /* argc must be >= 1 */ 1172 /* argc must be >= 1 */
386 if (argv[0][0] == 'p') 1173 if (argv[0][0] == 'p')
387 is_return = false; 1174 is_return = 0;
388 else if (argv[0][0] == 'r') 1175 else if (argv[0][0] == 'r')
389 is_return = true; 1176 is_return = 1;
390 else if (argv[0][0] == '-') 1177 else if (argv[0][0] == '-')
391 is_delete = true; 1178 is_delete = 1;
392 else { 1179 else {
393 pr_info("Probe definition must be started with 'p', 'r' or" 1180 pr_info("Probe definition must be started with 'p', 'r' or"
394 " '-'.\n"); 1181 " '-'.\n");
@@ -444,7 +1231,7 @@ static int create_trace_probe(int argc, char **argv)
444 return -EINVAL; 1231 return -EINVAL;
445 } 1232 }
446 /* an address specified */ 1233 /* an address specified */
447 ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); 1234 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
448 if (ret) { 1235 if (ret) {
449 pr_info("Failed to parse address.\n"); 1236 pr_info("Failed to parse address.\n");
450 return ret; 1237 return ret;
@@ -453,7 +1240,7 @@ static int create_trace_probe(int argc, char **argv)
453 /* a symbol specified */ 1240 /* a symbol specified */
454 symbol = argv[1]; 1241 symbol = argv[1];
455 /* TODO: support .init module functions */ 1242 /* TODO: support .init module functions */
456 ret = traceprobe_split_symbol_offset(symbol, &offset); 1243 ret = split_symbol_offset(symbol, &offset);
457 if (ret) { 1244 if (ret) {
458 pr_info("Failed to parse symbol.\n"); 1245 pr_info("Failed to parse symbol.\n");
459 return ret; 1246 return ret;
@@ -515,8 +1302,7 @@ static int create_trace_probe(int argc, char **argv)
515 goto error; 1302 goto error;
516 } 1303 }
517 1304
518 if (traceprobe_conflict_field_name(tp->args[i].name, 1305 if (conflict_field_name(tp->args[i].name, tp->args, i)) {
519 tp->args, i)) {
520 pr_info("Argument[%d] name '%s' conflicts with " 1306 pr_info("Argument[%d] name '%s' conflicts with "
521 "another field.\n", i, argv[i]); 1307 "another field.\n", i, argv[i]);
522 ret = -EINVAL; 1308 ret = -EINVAL;
@@ -524,8 +1310,7 @@ static int create_trace_probe(int argc, char **argv)
524 } 1310 }
525 1311
526 /* Parse fetch argument */ 1312 /* Parse fetch argument */
527 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], 1313 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
528 is_return, true);
529 if (ret) { 1314 if (ret) {
530 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 1315 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
531 goto error; 1316 goto error;
@@ -627,11 +1412,70 @@ static int probes_open(struct inode *inode, struct file *file)
627 return seq_open(file, &probes_seq_op); 1412 return seq_open(file, &probes_seq_op);
628} 1413}
629 1414
1415static int command_trace_probe(const char *buf)
1416{
1417 char **argv;
1418 int argc = 0, ret = 0;
1419
1420 argv = argv_split(GFP_KERNEL, buf, &argc);
1421 if (!argv)
1422 return -ENOMEM;
1423
1424 if (argc)
1425 ret = create_trace_probe(argc, argv);
1426
1427 argv_free(argv);
1428 return ret;
1429}
1430
1431#define WRITE_BUFSIZE 4096
1432
630static ssize_t probes_write(struct file *file, const char __user *buffer, 1433static ssize_t probes_write(struct file *file, const char __user *buffer,
631 size_t count, loff_t *ppos) 1434 size_t count, loff_t *ppos)
632{ 1435{
633 return traceprobe_probes_write(file, buffer, count, ppos, 1436 char *kbuf, *tmp;
634 create_trace_probe); 1437 int ret;
1438 size_t done;
1439 size_t size;
1440
1441 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
1442 if (!kbuf)
1443 return -ENOMEM;
1444
1445 ret = done = 0;
1446 while (done < count) {
1447 size = count - done;
1448 if (size >= WRITE_BUFSIZE)
1449 size = WRITE_BUFSIZE - 1;
1450 if (copy_from_user(kbuf, buffer + done, size)) {
1451 ret = -EFAULT;
1452 goto out;
1453 }
1454 kbuf[size] = '\0';
1455 tmp = strchr(kbuf, '\n');
1456 if (tmp) {
1457 *tmp = '\0';
1458 size = tmp - kbuf + 1;
1459 } else if (done + size < count) {
1460 pr_warning("Line length is too long: "
1461 "Should be less than %d.", WRITE_BUFSIZE);
1462 ret = -EINVAL;
1463 goto out;
1464 }
1465 done += size;
1466 /* Remove comments */
1467 tmp = strchr(kbuf, '#');
1468 if (tmp)
1469 *tmp = '\0';
1470
1471 ret = command_trace_probe(kbuf);
1472 if (ret)
1473 goto out;
1474 }
1475 ret = done;
1476out:
1477 kfree(kbuf);
1478 return ret;
635} 1479}
636 1480
637static const struct file_operations kprobe_events_ops = { 1481static const struct file_operations kprobe_events_ops = {
@@ -751,8 +1595,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
751 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1595 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
752 1596
753 if (!filter_current_check_discard(buffer, call, entry, event)) 1597 if (!filter_current_check_discard(buffer, call, entry, event))
754 trace_buffer_unlock_commit_regs(buffer, event, 1598 trace_nowake_buffer_unlock_commit_regs(buffer, event,
755 irq_flags, pc, regs); 1599 irq_flags, pc, regs);
756} 1600}
757 1601
758/* Kretprobe handler */ 1602/* Kretprobe handler */
@@ -784,8 +1628,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
784 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1628 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
785 1629
786 if (!filter_current_check_discard(buffer, call, entry, event)) 1630 if (!filter_current_check_discard(buffer, call, entry, event))
787 trace_buffer_unlock_commit_regs(buffer, event, 1631 trace_nowake_buffer_unlock_commit_regs(buffer, event,
788 irq_flags, pc, regs); 1632 irq_flags, pc, regs);
789} 1633}
790 1634
791/* Event entry printers */ 1635/* Event entry printers */
@@ -867,6 +1711,16 @@ partial:
867 return TRACE_TYPE_PARTIAL_LINE; 1711 return TRACE_TYPE_PARTIAL_LINE;
868} 1712}
869 1713
1714#undef DEFINE_FIELD
1715#define DEFINE_FIELD(type, item, name, is_signed) \
1716 do { \
1717 ret = trace_define_field(event_call, #type, name, \
1718 offsetof(typeof(field), item), \
1719 sizeof(field.item), is_signed, \
1720 FILTER_OTHER); \
1721 if (ret) \
1722 return ret; \
1723 } while (0)
870 1724
871static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 1725static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
872{ 1726{
@@ -1002,8 +1856,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1002 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1856 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1003 1857
1004 head = this_cpu_ptr(call->perf_events); 1858 head = this_cpu_ptr(call->perf_events);
1005 perf_trace_buf_submit(entry, size, rctx, 1859 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1006 entry->ip, 1, regs, head, NULL);
1007} 1860}
1008 1861
1009/* Kretprobe profile handler */ 1862/* Kretprobe profile handler */
@@ -1034,14 +1887,12 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1034 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1887 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1035 1888
1036 head = this_cpu_ptr(call->perf_events); 1889 head = this_cpu_ptr(call->perf_events);
1037 perf_trace_buf_submit(entry, size, rctx, 1890 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1038 entry->ret_ip, 1, regs, head, NULL);
1039} 1891}
1040#endif /* CONFIG_PERF_EVENTS */ 1892#endif /* CONFIG_PERF_EVENTS */
1041 1893
1042static __kprobes 1894static __kprobes
1043int kprobe_register(struct ftrace_event_call *event, 1895int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1044 enum trace_reg type, void *data)
1045{ 1896{
1046 struct trace_probe *tp = (struct trace_probe *)event->data; 1897 struct trace_probe *tp = (struct trace_probe *)event->data;
1047 1898
@@ -1058,11 +1909,6 @@ int kprobe_register(struct ftrace_event_call *event,
1058 case TRACE_REG_PERF_UNREGISTER: 1909 case TRACE_REG_PERF_UNREGISTER:
1059 disable_trace_probe(tp, TP_FLAG_PROFILE); 1910 disable_trace_probe(tp, TP_FLAG_PROFILE);
1060 return 0; 1911 return 0;
1061 case TRACE_REG_PERF_OPEN:
1062 case TRACE_REG_PERF_CLOSE:
1063 case TRACE_REG_PERF_ADD:
1064 case TRACE_REG_PERF_DEL:
1065 return 0;
1066#endif 1912#endif
1067 } 1913 }
1068 return 0; 1914 return 0;
@@ -1199,9 +2045,8 @@ static __init int kprobe_trace_self_tests_init(void)
1199 2045
1200 pr_info("Testing kprobe tracing: "); 2046 pr_info("Testing kprobe tracing: ");
1201 2047
1202 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target " 2048 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1203 "$stack $stack0 +0($stack)", 2049 "$stack $stack0 +0($stack)");
1204 create_trace_probe);
1205 if (WARN_ON_ONCE(ret)) { 2050 if (WARN_ON_ONCE(ret)) {
1206 pr_warning("error on probing function entry.\n"); 2051 pr_warning("error on probing function entry.\n");
1207 warn++; 2052 warn++;
@@ -1215,8 +2060,8 @@ static __init int kprobe_trace_self_tests_init(void)
1215 enable_trace_probe(tp, TP_FLAG_TRACE); 2060 enable_trace_probe(tp, TP_FLAG_TRACE);
1216 } 2061 }
1217 2062
1218 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " 2063 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1219 "$retval", create_trace_probe); 2064 "$retval");
1220 if (WARN_ON_ONCE(ret)) { 2065 if (WARN_ON_ONCE(ret)) {
1221 pr_warning("error on probing function return.\n"); 2066 pr_warning("error on probing function return.\n");
1222 warn++; 2067 warn++;
@@ -1250,13 +2095,13 @@ static __init int kprobe_trace_self_tests_init(void)
1250 } else 2095 } else
1251 disable_trace_probe(tp, TP_FLAG_TRACE); 2096 disable_trace_probe(tp, TP_FLAG_TRACE);
1252 2097
1253 ret = traceprobe_command("-:testprobe", create_trace_probe); 2098 ret = command_trace_probe("-:testprobe");
1254 if (WARN_ON_ONCE(ret)) { 2099 if (WARN_ON_ONCE(ret)) {
1255 pr_warning("error on deleting a probe.\n"); 2100 pr_warning("error on deleting a probe.\n");
1256 warn++; 2101 warn++;
1257 } 2102 }
1258 2103
1259 ret = traceprobe_command("-:testprobe2", create_trace_probe); 2104 ret = command_trace_probe("-:testprobe2");
1260 if (WARN_ON_ONCE(ret)) { 2105 if (WARN_ON_ONCE(ret)) {
1261 pr_warning("error on deleting a probe.\n"); 2106 pr_warning("error on deleting a probe.\n");
1262 warn++; 2107 warn++;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 194d79602dc..51999309a6c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -264,7 +264,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
264 return ret; 264 return ret;
265} 265}
266 266
267int trace_seq_path(struct trace_seq *s, const struct path *path) 267int trace_seq_path(struct trace_seq *s, struct path *path)
268{ 268{
269 unsigned char *p; 269 unsigned char *p;
270 270
@@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
300 unsigned long mask; 300 unsigned long mask;
301 const char *str; 301 const char *str;
302 const char *ret = p->buffer + p->len; 302 const char *ret = p->buffer + p->len;
303 int i, first = 1; 303 int i;
304 304
305 for (i = 0; flag_array[i].name && flags; i++) { 305 for (i = 0; flag_array[i].name && flags; i++) {
306 306
@@ -310,16 +310,14 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
310 310
311 str = flag_array[i].name; 311 str = flag_array[i].name;
312 flags &= ~mask; 312 flags &= ~mask;
313 if (!first && delim) 313 if (p->len && delim)
314 trace_seq_puts(p, delim); 314 trace_seq_puts(p, delim);
315 else
316 first = 0;
317 trace_seq_puts(p, str); 315 trace_seq_puts(p, str);
318 } 316 }
319 317
320 /* check for left over flags */ 318 /* check for left over flags */
321 if (flags) { 319 if (flags) {
322 if (!first && delim) 320 if (p->len && delim)
323 trace_seq_puts(p, delim); 321 trace_seq_puts(p, delim);
324 trace_seq_printf(p, "0x%lx", flags); 322 trace_seq_printf(p, "0x%lx", flags);
325 } 323 }
@@ -346,7 +344,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
346 break; 344 break;
347 } 345 }
348 346
349 if (ret == (const char *)(p->buffer + p->len)) 347 if (!p->len)
350 trace_seq_printf(p, "0x%lx", val); 348 trace_seq_printf(p, "0x%lx", val);
351 349
352 trace_seq_putc(p, 0); 350 trace_seq_putc(p, 0);
@@ -372,7 +370,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
372 break; 370 break;
373 } 371 }
374 372
375 if (ret == (const char *)(p->buffer + p->len)) 373 if (!p->len)
376 trace_seq_printf(p, "0x%llx", val); 374 trace_seq_printf(p, "0x%llx", val);
377 375
378 trace_seq_putc(p, 0); 376 trace_seq_putc(p, 0);
@@ -610,113 +608,68 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
610 return trace_print_lat_fmt(s, entry); 608 return trace_print_lat_fmt(s, entry);
611} 609}
612 610
613static unsigned long preempt_mark_thresh_us = 100; 611static unsigned long preempt_mark_thresh = 100;
614 612
615static int 613static int
616lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) 614lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
615 unsigned long rel_usecs)
617{ 616{
618 unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE; 617 return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
619 unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; 618 rel_usecs > preempt_mark_thresh ? '!' :
620 unsigned long long abs_ts = iter->ts - iter->tr->time_start; 619 rel_usecs > 1 ? '+' : ' ');
621 unsigned long long rel_ts = next_ts - iter->ts;
622 struct trace_seq *s = &iter->seq;
623
624 if (in_ns) {
625 abs_ts = ns2usecs(abs_ts);
626 rel_ts = ns2usecs(rel_ts);
627 }
628
629 if (verbose && in_ns) {
630 unsigned long abs_usec = do_div(abs_ts, USEC_PER_MSEC);
631 unsigned long abs_msec = (unsigned long)abs_ts;
632 unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC);
633 unsigned long rel_msec = (unsigned long)rel_ts;
634
635 return trace_seq_printf(
636 s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
637 ns2usecs(iter->ts),
638 abs_msec, abs_usec,
639 rel_msec, rel_usec);
640 } else if (verbose && !in_ns) {
641 return trace_seq_printf(
642 s, "[%016llx] %lld (+%lld): ",
643 iter->ts, abs_ts, rel_ts);
644 } else if (!verbose && in_ns) {
645 return trace_seq_printf(
646 s, " %4lldus%c: ",
647 abs_ts,
648 rel_ts > preempt_mark_thresh_us ? '!' :
649 rel_ts > 1 ? '+' : ' ');
650 } else { /* !verbose && !in_ns */
651 return trace_seq_printf(s, " %4lld: ", abs_ts);
652 }
653} 620}
654 621
655int trace_print_context(struct trace_iterator *iter) 622int trace_print_context(struct trace_iterator *iter)
656{ 623{
657 struct trace_seq *s = &iter->seq; 624 struct trace_seq *s = &iter->seq;
658 struct trace_entry *entry = iter->ent; 625 struct trace_entry *entry = iter->ent;
659 unsigned long long t; 626 unsigned long long t = ns2usecs(iter->ts);
660 unsigned long secs, usec_rem; 627 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
628 unsigned long secs = (unsigned long)t;
661 char comm[TASK_COMM_LEN]; 629 char comm[TASK_COMM_LEN];
662 int ret;
663 630
664 trace_find_cmdline(entry->pid, comm); 631 trace_find_cmdline(entry->pid, comm);
665 632
666 ret = trace_seq_printf(s, "%16s-%-5d [%03d] ", 633 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
667 comm, entry->pid, iter->cpu); 634 comm, entry->pid, iter->cpu, secs, usec_rem);
668 if (!ret)
669 return 0;
670
671 if (trace_flags & TRACE_ITER_IRQ_INFO) {
672 ret = trace_print_lat_fmt(s, entry);
673 if (!ret)
674 return 0;
675 }
676
677 if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
678 t = ns2usecs(iter->ts);
679 usec_rem = do_div(t, USEC_PER_SEC);
680 secs = (unsigned long)t;
681 return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem);
682 } else
683 return trace_seq_printf(s, " %12llu: ", iter->ts);
684} 635}
685 636
686int trace_print_lat_context(struct trace_iterator *iter) 637int trace_print_lat_context(struct trace_iterator *iter)
687{ 638{
688 u64 next_ts; 639 u64 next_ts;
689 int ret; 640 int ret;
690 /* trace_find_next_entry will reset ent_size */
691 int ent_size = iter->ent_size;
692 struct trace_seq *s = &iter->seq; 641 struct trace_seq *s = &iter->seq;
693 struct trace_entry *entry = iter->ent, 642 struct trace_entry *entry = iter->ent,
694 *next_entry = trace_find_next_entry(iter, NULL, 643 *next_entry = trace_find_next_entry(iter, NULL,
695 &next_ts); 644 &next_ts);
696 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); 645 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
697 646 unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
698 /* Restore the original ent_size */ 647 unsigned long rel_usecs;
699 iter->ent_size = ent_size;
700 648
701 if (!next_entry) 649 if (!next_entry)
702 next_ts = iter->ts; 650 next_ts = iter->ts;
651 rel_usecs = ns2usecs(next_ts - iter->ts);
703 652
704 if (verbose) { 653 if (verbose) {
705 char comm[TASK_COMM_LEN]; 654 char comm[TASK_COMM_LEN];
706 655
707 trace_find_cmdline(entry->pid, comm); 656 trace_find_cmdline(entry->pid, comm);
708 657
709 ret = trace_seq_printf( 658 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
710 s, "%16s %5d %3d %d %08x %08lx ", 659 " %ld.%03ldms (+%ld.%03ldms): ", comm,
711 comm, entry->pid, iter->cpu, entry->flags, 660 entry->pid, iter->cpu, entry->flags,
712 entry->preempt_count, iter->idx); 661 entry->preempt_count, iter->idx,
662 ns2usecs(iter->ts),
663 abs_usecs / USEC_PER_MSEC,
664 abs_usecs % USEC_PER_MSEC,
665 rel_usecs / USEC_PER_MSEC,
666 rel_usecs % USEC_PER_MSEC);
713 } else { 667 } else {
714 ret = lat_print_generic(s, entry, iter->cpu); 668 ret = lat_print_generic(s, entry, iter->cpu);
669 if (ret)
670 ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
715 } 671 }
716 672
717 if (ret)
718 ret = lat_print_timestamp(iter, next_ts);
719
720 return ret; 673 return ret;
721} 674}
722 675
@@ -1353,4 +1306,4 @@ __init static int init_events(void)
1353 1306
1354 return 0; 1307 return 0;
1355} 1308}
1356early_initcall(init_events); 1309device_initcall(init_events);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index a9077c1b4ad..1f06468a10d 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -51,10 +51,6 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
51 const char **iter; 51 const char **iter;
52 char *fmt; 52 char *fmt;
53 53
54 /* allocate the trace_printk per cpu buffers */
55 if (start != end)
56 trace_printk_init_buffers();
57
58 mutex_lock(&btrace_mutex); 54 mutex_lock(&btrace_mutex);
59 for (iter = start; iter < end; iter++) { 55 for (iter = start; iter < end; iter++) {
60 struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); 56 struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
@@ -63,19 +59,18 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
63 continue; 59 continue;
64 } 60 }
65 61
66 fmt = NULL;
67 tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL); 62 tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL);
68 if (tb_fmt) { 63 if (tb_fmt)
69 fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL); 64 fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL);
70 if (fmt) { 65 if (tb_fmt && fmt) {
71 list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); 66 list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
72 strcpy(fmt, *iter); 67 strcpy(fmt, *iter);
73 tb_fmt->fmt = fmt; 68 tb_fmt->fmt = fmt;
74 } else 69 *iter = tb_fmt->fmt;
75 kfree(tb_fmt); 70 } else {
71 kfree(tb_fmt);
72 *iter = NULL;
76 } 73 }
77 *iter = fmt;
78
79 } 74 }
80 mutex_unlock(&btrace_mutex); 75 mutex_unlock(&btrace_mutex);
81} 76}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
deleted file mode 100644
index 412e959709b..00000000000
--- a/kernel/trace/trace_probe.c
+++ /dev/null
@@ -1,839 +0,0 @@
1/*
2 * Common code for probe-based Dynamic events.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * This code was copied from kernel/trace/trace_kprobe.c written by
18 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
19 *
20 * Updates to make this generic:
21 * Copyright (C) IBM Corporation, 2010-2011
22 * Author: Srikar Dronamraju
23 */
24
25#include "trace_probe.h"
26
27const char *reserved_field_names[] = {
28 "common_type",
29 "common_flags",
30 "common_preempt_count",
31 "common_pid",
32 "common_tgid",
33 FIELD_STRING_IP,
34 FIELD_STRING_RETIP,
35 FIELD_STRING_FUNC,
36};
37
38/* Printing function type */
39#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
40#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
41
42/* Printing in basic type function template */
43#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
44static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
45 const char *name, \
46 void *data, void *ent)\
47{ \
48 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
49} \
50static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
51
52DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
53DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
54DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
55DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
56DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
57DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
58DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
59DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
60
61static inline void *get_rloc_data(u32 *dl)
62{
63 return (u8 *)dl + get_rloc_offs(*dl);
64}
65
66/* For data_loc conversion */
67static inline void *get_loc_data(u32 *dl, void *ent)
68{
69 return (u8 *)ent + get_rloc_offs(*dl);
70}
71
72/* For defining macros, define string/string_size types */
73typedef u32 string;
74typedef u32 string_size;
75
76/* Print type function for string type */
77static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
78 const char *name,
79 void *data, void *ent)
80{
81 int len = *(u32 *)data >> 16;
82
83 if (!len)
84 return trace_seq_printf(s, " %s=(fault)", name);
85 else
86 return trace_seq_printf(s, " %s=\"%s\"", name,
87 (const char *)get_loc_data(data, ent));
88}
89
90static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
91
92#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
93/*
94 * Define macro for basic types - we don't need to define s* types, because
95 * we have to care only about bitwidth at recording time.
96 */
97#define DEFINE_BASIC_FETCH_FUNCS(method) \
98DEFINE_FETCH_##method(u8) \
99DEFINE_FETCH_##method(u16) \
100DEFINE_FETCH_##method(u32) \
101DEFINE_FETCH_##method(u64)
102
103#define CHECK_FETCH_FUNCS(method, fn) \
104 (((FETCH_FUNC_NAME(method, u8) == fn) || \
105 (FETCH_FUNC_NAME(method, u16) == fn) || \
106 (FETCH_FUNC_NAME(method, u32) == fn) || \
107 (FETCH_FUNC_NAME(method, u64) == fn) || \
108 (FETCH_FUNC_NAME(method, string) == fn) || \
109 (FETCH_FUNC_NAME(method, string_size) == fn)) \
110 && (fn != NULL))
111
112/* Data fetch function templates */
113#define DEFINE_FETCH_reg(type) \
114static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
115 void *offset, void *dest) \
116{ \
117 *(type *)dest = (type)regs_get_register(regs, \
118 (unsigned int)((unsigned long)offset)); \
119}
120DEFINE_BASIC_FETCH_FUNCS(reg)
121/* No string on the register */
122#define fetch_reg_string NULL
123#define fetch_reg_string_size NULL
124
125#define DEFINE_FETCH_stack(type) \
126static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
127 void *offset, void *dest) \
128{ \
129 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
130 (unsigned int)((unsigned long)offset)); \
131}
132DEFINE_BASIC_FETCH_FUNCS(stack)
133/* No string on the stack entry */
134#define fetch_stack_string NULL
135#define fetch_stack_string_size NULL
136
137#define DEFINE_FETCH_retval(type) \
138static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
139 void *dummy, void *dest) \
140{ \
141 *(type *)dest = (type)regs_return_value(regs); \
142}
143DEFINE_BASIC_FETCH_FUNCS(retval)
144/* No string on the retval */
145#define fetch_retval_string NULL
146#define fetch_retval_string_size NULL
147
148#define DEFINE_FETCH_memory(type) \
149static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
150 void *addr, void *dest) \
151{ \
152 type retval; \
153 if (probe_kernel_address(addr, retval)) \
154 *(type *)dest = 0; \
155 else \
156 *(type *)dest = retval; \
157}
158DEFINE_BASIC_FETCH_FUNCS(memory)
159/*
160 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
161 * length and relative data location.
162 */
163static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
164 void *addr, void *dest)
165{
166 long ret;
167 int maxlen = get_rloc_len(*(u32 *)dest);
168 u8 *dst = get_rloc_data(dest);
169 u8 *src = addr;
170 mm_segment_t old_fs = get_fs();
171
172 if (!maxlen)
173 return;
174
175 /*
176 * Try to get string again, since the string can be changed while
177 * probing.
178 */
179 set_fs(KERNEL_DS);
180 pagefault_disable();
181
182 do
183 ret = __copy_from_user_inatomic(dst++, src++, 1);
184 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
185
186 dst[-1] = '\0';
187 pagefault_enable();
188 set_fs(old_fs);
189
190 if (ret < 0) { /* Failed to fetch string */
191 ((u8 *)get_rloc_data(dest))[0] = '\0';
192 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
193 } else {
194 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
195 get_rloc_offs(*(u32 *)dest));
196 }
197}
198
199/* Return the length of string -- including null terminal byte */
200static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
201 void *addr, void *dest)
202{
203 mm_segment_t old_fs;
204 int ret, len = 0;
205 u8 c;
206
207 old_fs = get_fs();
208 set_fs(KERNEL_DS);
209 pagefault_disable();
210
211 do {
212 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
213 len++;
214 } while (c && ret == 0 && len < MAX_STRING_SIZE);
215
216 pagefault_enable();
217 set_fs(old_fs);
218
219 if (ret < 0) /* Failed to check the length */
220 *(u32 *)dest = 0;
221 else
222 *(u32 *)dest = len;
223}
224
225/* Memory fetching by symbol */
226struct symbol_cache {
227 char *symbol;
228 long offset;
229 unsigned long addr;
230};
231
232static unsigned long update_symbol_cache(struct symbol_cache *sc)
233{
234 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
235
236 if (sc->addr)
237 sc->addr += sc->offset;
238
239 return sc->addr;
240}
241
242static void free_symbol_cache(struct symbol_cache *sc)
243{
244 kfree(sc->symbol);
245 kfree(sc);
246}
247
248static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
249{
250 struct symbol_cache *sc;
251
252 if (!sym || strlen(sym) == 0)
253 return NULL;
254
255 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
256 if (!sc)
257 return NULL;
258
259 sc->symbol = kstrdup(sym, GFP_KERNEL);
260 if (!sc->symbol) {
261 kfree(sc);
262 return NULL;
263 }
264 sc->offset = offset;
265 update_symbol_cache(sc);
266
267 return sc;
268}
269
270#define DEFINE_FETCH_symbol(type) \
271static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
272 void *data, void *dest) \
273{ \
274 struct symbol_cache *sc = data; \
275 if (sc->addr) \
276 fetch_memory_##type(regs, (void *)sc->addr, dest); \
277 else \
278 *(type *)dest = 0; \
279}
280DEFINE_BASIC_FETCH_FUNCS(symbol)
281DEFINE_FETCH_symbol(string)
282DEFINE_FETCH_symbol(string_size)
283
284/* Dereference memory access function */
285struct deref_fetch_param {
286 struct fetch_param orig;
287 long offset;
288};
289
290#define DEFINE_FETCH_deref(type) \
291static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
292 void *data, void *dest) \
293{ \
294 struct deref_fetch_param *dprm = data; \
295 unsigned long addr; \
296 call_fetch(&dprm->orig, regs, &addr); \
297 if (addr) { \
298 addr += dprm->offset; \
299 fetch_memory_##type(regs, (void *)addr, dest); \
300 } else \
301 *(type *)dest = 0; \
302}
303DEFINE_BASIC_FETCH_FUNCS(deref)
304DEFINE_FETCH_deref(string)
305DEFINE_FETCH_deref(string_size)
306
307static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
308{
309 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
310 update_deref_fetch_param(data->orig.data);
311 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
312 update_symbol_cache(data->orig.data);
313}
314
315static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
316{
317 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
318 free_deref_fetch_param(data->orig.data);
319 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
320 free_symbol_cache(data->orig.data);
321 kfree(data);
322}
323
324/* Bitfield fetch function */
325struct bitfield_fetch_param {
326 struct fetch_param orig;
327 unsigned char hi_shift;
328 unsigned char low_shift;
329};
330
331#define DEFINE_FETCH_bitfield(type) \
332static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
333 void *data, void *dest) \
334{ \
335 struct bitfield_fetch_param *bprm = data; \
336 type buf = 0; \
337 call_fetch(&bprm->orig, regs, &buf); \
338 if (buf) { \
339 buf <<= bprm->hi_shift; \
340 buf >>= bprm->low_shift; \
341 } \
342 *(type *)dest = buf; \
343}
344
345DEFINE_BASIC_FETCH_FUNCS(bitfield)
346#define fetch_bitfield_string NULL
347#define fetch_bitfield_string_size NULL
348
349static __kprobes void
350update_bitfield_fetch_param(struct bitfield_fetch_param *data)
351{
352 /*
353 * Don't check the bitfield itself, because this must be the
354 * last fetch function.
355 */
356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
357 update_deref_fetch_param(data->orig.data);
358 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
359 update_symbol_cache(data->orig.data);
360}
361
362static __kprobes void
363free_bitfield_fetch_param(struct bitfield_fetch_param *data)
364{
365 /*
366 * Don't check the bitfield itself, because this must be the
367 * last fetch function.
368 */
369 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
370 free_deref_fetch_param(data->orig.data);
371 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
372 free_symbol_cache(data->orig.data);
373
374 kfree(data);
375}
376
377/* Default (unsigned long) fetch type */
378#define __DEFAULT_FETCH_TYPE(t) u##t
379#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
380#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
381#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
382
383#define ASSIGN_FETCH_FUNC(method, type) \
384 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
385
386#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
387 {.name = _name, \
388 .size = _size, \
389 .is_signed = sign, \
390 .print = PRINT_TYPE_FUNC_NAME(ptype), \
391 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
392 .fmttype = _fmttype, \
393 .fetch = { \
394ASSIGN_FETCH_FUNC(reg, ftype), \
395ASSIGN_FETCH_FUNC(stack, ftype), \
396ASSIGN_FETCH_FUNC(retval, ftype), \
397ASSIGN_FETCH_FUNC(memory, ftype), \
398ASSIGN_FETCH_FUNC(symbol, ftype), \
399ASSIGN_FETCH_FUNC(deref, ftype), \
400ASSIGN_FETCH_FUNC(bitfield, ftype), \
401 } \
402 }
403
404#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
405 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
406
407#define FETCH_TYPE_STRING 0
408#define FETCH_TYPE_STRSIZE 1
409
410/* Fetch type information table */
411static const struct fetch_type fetch_type_table[] = {
412 /* Special types */
413 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
414 sizeof(u32), 1, "__data_loc char[]"),
415 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
416 string_size, sizeof(u32), 0, "u32"),
417 /* Basic types */
418 ASSIGN_FETCH_TYPE(u8, u8, 0),
419 ASSIGN_FETCH_TYPE(u16, u16, 0),
420 ASSIGN_FETCH_TYPE(u32, u32, 0),
421 ASSIGN_FETCH_TYPE(u64, u64, 0),
422 ASSIGN_FETCH_TYPE(s8, u8, 1),
423 ASSIGN_FETCH_TYPE(s16, u16, 1),
424 ASSIGN_FETCH_TYPE(s32, u32, 1),
425 ASSIGN_FETCH_TYPE(s64, u64, 1),
426};
427
428static const struct fetch_type *find_fetch_type(const char *type)
429{
430 int i;
431
432 if (!type)
433 type = DEFAULT_FETCH_TYPE_STR;
434
435 /* Special case: bitfield */
436 if (*type == 'b') {
437 unsigned long bs;
438
439 type = strchr(type, '/');
440 if (!type)
441 goto fail;
442
443 type++;
444 if (kstrtoul(type, 0, &bs))
445 goto fail;
446
447 switch (bs) {
448 case 8:
449 return find_fetch_type("u8");
450 case 16:
451 return find_fetch_type("u16");
452 case 32:
453 return find_fetch_type("u32");
454 case 64:
455 return find_fetch_type("u64");
456 default:
457 goto fail;
458 }
459 }
460
461 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
462 if (strcmp(type, fetch_type_table[i].name) == 0)
463 return &fetch_type_table[i];
464
465fail:
466 return NULL;
467}
468
469/* Special function : only accept unsigned long */
470static __kprobes void fetch_stack_address(struct pt_regs *regs,
471 void *dummy, void *dest)
472{
473 *(unsigned long *)dest = kernel_stack_pointer(regs);
474}
475
476static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
477 fetch_func_t orig_fn)
478{
479 int i;
480
481 if (type != &fetch_type_table[FETCH_TYPE_STRING])
482 return NULL; /* Only string type needs size function */
483
484 for (i = 0; i < FETCH_MTD_END; i++)
485 if (type->fetch[i] == orig_fn)
486 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
487
488 WARN_ON(1); /* This should not happen */
489
490 return NULL;
491}
492
493/* Split symbol and offset. */
494int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset)
495{
496 char *tmp;
497 int ret;
498
499 if (!offset)
500 return -EINVAL;
501
502 tmp = strchr(symbol, '+');
503 if (tmp) {
504 /* skip sign because kstrtoul doesn't accept '+' */
505 ret = kstrtoul(tmp + 1, 0, offset);
506 if (ret)
507 return ret;
508
509 *tmp = '\0';
510 } else
511 *offset = 0;
512
513 return 0;
514}
515
516#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
517
518static int parse_probe_vars(char *arg, const struct fetch_type *t,
519 struct fetch_param *f, bool is_return)
520{
521 int ret = 0;
522 unsigned long param;
523
524 if (strcmp(arg, "retval") == 0) {
525 if (is_return)
526 f->fn = t->fetch[FETCH_MTD_retval];
527 else
528 ret = -EINVAL;
529 } else if (strncmp(arg, "stack", 5) == 0) {
530 if (arg[5] == '\0') {
531 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
532 f->fn = fetch_stack_address;
533 else
534 ret = -EINVAL;
535 } else if (isdigit(arg[5])) {
536 ret = kstrtoul(arg + 5, 10, &param);
537 if (ret || param > PARAM_MAX_STACK)
538 ret = -EINVAL;
539 else {
540 f->fn = t->fetch[FETCH_MTD_stack];
541 f->data = (void *)param;
542 }
543 } else
544 ret = -EINVAL;
545 } else
546 ret = -EINVAL;
547
548 return ret;
549}
550
551/* Recursive argument parser */
552static int parse_probe_arg(char *arg, const struct fetch_type *t,
553 struct fetch_param *f, bool is_return, bool is_kprobe)
554{
555 unsigned long param;
556 long offset;
557 char *tmp;
558 int ret;
559
560 ret = 0;
561
562 /* Until uprobe_events supports only reg arguments */
563 if (!is_kprobe && arg[0] != '%')
564 return -EINVAL;
565
566 switch (arg[0]) {
567 case '$':
568 ret = parse_probe_vars(arg + 1, t, f, is_return);
569 break;
570
571 case '%': /* named register */
572 ret = regs_query_register_offset(arg + 1);
573 if (ret >= 0) {
574 f->fn = t->fetch[FETCH_MTD_reg];
575 f->data = (void *)(unsigned long)ret;
576 ret = 0;
577 }
578 break;
579
580 case '@': /* memory or symbol */
581 if (isdigit(arg[1])) {
582 ret = kstrtoul(arg + 1, 0, &param);
583 if (ret)
584 break;
585
586 f->fn = t->fetch[FETCH_MTD_memory];
587 f->data = (void *)param;
588 } else {
589 ret = traceprobe_split_symbol_offset(arg + 1, &offset);
590 if (ret)
591 break;
592
593 f->data = alloc_symbol_cache(arg + 1, offset);
594 if (f->data)
595 f->fn = t->fetch[FETCH_MTD_symbol];
596 }
597 break;
598
599 case '+': /* deref memory */
600 arg++; /* Skip '+', because kstrtol() rejects it. */
601 case '-':
602 tmp = strchr(arg, '(');
603 if (!tmp)
604 break;
605
606 *tmp = '\0';
607 ret = kstrtol(arg, 0, &offset);
608
609 if (ret)
610 break;
611
612 arg = tmp + 1;
613 tmp = strrchr(arg, ')');
614
615 if (tmp) {
616 struct deref_fetch_param *dprm;
617 const struct fetch_type *t2;
618
619 t2 = find_fetch_type(NULL);
620 *tmp = '\0';
621 dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL);
622
623 if (!dprm)
624 return -ENOMEM;
625
626 dprm->offset = offset;
627 ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
628 is_kprobe);
629 if (ret)
630 kfree(dprm);
631 else {
632 f->fn = t->fetch[FETCH_MTD_deref];
633 f->data = (void *)dprm;
634 }
635 }
636 break;
637 }
638 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
639 pr_info("%s type has no corresponding fetch method.\n", t->name);
640 ret = -EINVAL;
641 }
642
643 return ret;
644}
645
646#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
647
648/* Bitfield type needs to be parsed into a fetch function */
649static int __parse_bitfield_probe_arg(const char *bf,
650 const struct fetch_type *t,
651 struct fetch_param *f)
652{
653 struct bitfield_fetch_param *bprm;
654 unsigned long bw, bo;
655 char *tail;
656
657 if (*bf != 'b')
658 return 0;
659
660 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
661 if (!bprm)
662 return -ENOMEM;
663
664 bprm->orig = *f;
665 f->fn = t->fetch[FETCH_MTD_bitfield];
666 f->data = (void *)bprm;
667 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
668
669 if (bw == 0 || *tail != '@')
670 return -EINVAL;
671
672 bf = tail + 1;
673 bo = simple_strtoul(bf, &tail, 0);
674
675 if (tail == bf || *tail != '/')
676 return -EINVAL;
677
678 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
679 bprm->low_shift = bprm->hi_shift + bo;
680
681 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
682}
683
684/* String length checking wrapper */
685int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
686 struct probe_arg *parg, bool is_return, bool is_kprobe)
687{
688 const char *t;
689 int ret;
690
691 if (strlen(arg) > MAX_ARGSTR_LEN) {
692 pr_info("Argument is too long.: %s\n", arg);
693 return -ENOSPC;
694 }
695 parg->comm = kstrdup(arg, GFP_KERNEL);
696 if (!parg->comm) {
697 pr_info("Failed to allocate memory for command '%s'.\n", arg);
698 return -ENOMEM;
699 }
700 t = strchr(parg->comm, ':');
701 if (t) {
702 arg[t - parg->comm] = '\0';
703 t++;
704 }
705 parg->type = find_fetch_type(t);
706 if (!parg->type) {
707 pr_info("Unsupported type: %s\n", t);
708 return -EINVAL;
709 }
710 parg->offset = *size;
711 *size += parg->type->size;
712 ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe);
713
714 if (ret >= 0 && t != NULL)
715 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
716
717 if (ret >= 0) {
718 parg->fetch_size.fn = get_fetch_size_function(parg->type,
719 parg->fetch.fn);
720 parg->fetch_size.data = parg->fetch.data;
721 }
722
723 return ret;
724}
725
726/* Return 1 if name is reserved or already used by another argument */
727int traceprobe_conflict_field_name(const char *name,
728 struct probe_arg *args, int narg)
729{
730 int i;
731
732 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
733 if (strcmp(reserved_field_names[i], name) == 0)
734 return 1;
735
736 for (i = 0; i < narg; i++)
737 if (strcmp(args[i].name, name) == 0)
738 return 1;
739
740 return 0;
741}
742
743void traceprobe_update_arg(struct probe_arg *arg)
744{
745 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
746 update_bitfield_fetch_param(arg->fetch.data);
747 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
748 update_deref_fetch_param(arg->fetch.data);
749 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
750 update_symbol_cache(arg->fetch.data);
751}
752
753void traceprobe_free_probe_arg(struct probe_arg *arg)
754{
755 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
756 free_bitfield_fetch_param(arg->fetch.data);
757 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
758 free_deref_fetch_param(arg->fetch.data);
759 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
760 free_symbol_cache(arg->fetch.data);
761
762 kfree(arg->name);
763 kfree(arg->comm);
764}
765
766int traceprobe_command(const char *buf, int (*createfn)(int, char **))
767{
768 char **argv;
769 int argc, ret;
770
771 argc = 0;
772 ret = 0;
773 argv = argv_split(GFP_KERNEL, buf, &argc);
774 if (!argv)
775 return -ENOMEM;
776
777 if (argc)
778 ret = createfn(argc, argv);
779
780 argv_free(argv);
781
782 return ret;
783}
784
785#define WRITE_BUFSIZE 4096
786
787ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
788 size_t count, loff_t *ppos,
789 int (*createfn)(int, char **))
790{
791 char *kbuf, *tmp;
792 int ret = 0;
793 size_t done = 0;
794 size_t size;
795
796 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
797 if (!kbuf)
798 return -ENOMEM;
799
800 while (done < count) {
801 size = count - done;
802
803 if (size >= WRITE_BUFSIZE)
804 size = WRITE_BUFSIZE - 1;
805
806 if (copy_from_user(kbuf, buffer + done, size)) {
807 ret = -EFAULT;
808 goto out;
809 }
810 kbuf[size] = '\0';
811 tmp = strchr(kbuf, '\n');
812
813 if (tmp) {
814 *tmp = '\0';
815 size = tmp - kbuf + 1;
816 } else if (done + size < count) {
817 pr_warning("Line length is too long: "
818 "Should be less than %d.", WRITE_BUFSIZE);
819 ret = -EINVAL;
820 goto out;
821 }
822 done += size;
823 /* Remove comments */
824 tmp = strchr(kbuf, '#');
825
826 if (tmp)
827 *tmp = '\0';
828
829 ret = traceprobe_command(kbuf, createfn);
830 if (ret)
831 goto out;
832 }
833 ret = done;
834
835out:
836 kfree(kbuf);
837
838 return ret;
839}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
deleted file mode 100644
index 93370867781..00000000000
--- a/kernel/trace/trace_probe.h
+++ /dev/null
@@ -1,161 +0,0 @@
1/*
2 * Common header file for probe-based Dynamic events.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * This code was copied from kernel/trace/trace_kprobe.h written by
18 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
19 *
20 * Updates to make this generic:
21 * Copyright (C) IBM Corporation, 2010-2011
22 * Author: Srikar Dronamraju
23 */
24
25#include <linux/seq_file.h>
26#include <linux/slab.h>
27#include <linux/smp.h>
28#include <linux/debugfs.h>
29#include <linux/types.h>
30#include <linux/string.h>
31#include <linux/ctype.h>
32#include <linux/ptrace.h>
33#include <linux/perf_event.h>
34#include <linux/kprobes.h>
35#include <linux/stringify.h>
36#include <linux/limits.h>
37#include <linux/uaccess.h>
38#include <asm/bitsperlong.h>
39
40#include "trace.h"
41#include "trace_output.h"
42
43#define MAX_TRACE_ARGS 128
44#define MAX_ARGSTR_LEN 63
45#define MAX_EVENT_NAME_LEN 64
46#define MAX_STRING_SIZE PATH_MAX
47
48/* Reserved field names */
49#define FIELD_STRING_IP "__probe_ip"
50#define FIELD_STRING_RETIP "__probe_ret_ip"
51#define FIELD_STRING_FUNC "__probe_func"
52
53#undef DEFINE_FIELD
54#define DEFINE_FIELD(type, item, name, is_signed) \
55 do { \
56 ret = trace_define_field(event_call, #type, name, \
57 offsetof(typeof(field), item), \
58 sizeof(field.item), is_signed, \
59 FILTER_OTHER); \
60 if (ret) \
61 return ret; \
62 } while (0)
63
64
65/* Flags for trace_probe */
66#define TP_FLAG_TRACE 1
67#define TP_FLAG_PROFILE 2
68#define TP_FLAG_REGISTERED 4
69#define TP_FLAG_UPROBE 8
70
71
72/* data_rloc: data relative location, compatible with u32 */
73#define make_data_rloc(len, roffs) \
74 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
75#define get_rloc_len(dl) ((u32)(dl) >> 16)
76#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
77
78/*
79 * Convert data_rloc to data_loc:
80 * data_rloc stores the offset from data_rloc itself, but data_loc
81 * stores the offset from event entry.
82 */
83#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
84
85/* Data fetch function type */
86typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
87/* Printing function type */
88typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *);
89
90/* Fetch types */
91enum {
92 FETCH_MTD_reg = 0,
93 FETCH_MTD_stack,
94 FETCH_MTD_retval,
95 FETCH_MTD_memory,
96 FETCH_MTD_symbol,
97 FETCH_MTD_deref,
98 FETCH_MTD_bitfield,
99 FETCH_MTD_END,
100};
101
102/* Fetch type information table */
103struct fetch_type {
104 const char *name; /* Name of type */
105 size_t size; /* Byte size of type */
106 int is_signed; /* Signed flag */
107 print_type_func_t print; /* Print functions */
108 const char *fmt; /* Fromat string */
109 const char *fmttype; /* Name in format file */
110 /* Fetch functions */
111 fetch_func_t fetch[FETCH_MTD_END];
112};
113
114struct fetch_param {
115 fetch_func_t fn;
116 void *data;
117};
118
119struct probe_arg {
120 struct fetch_param fetch;
121 struct fetch_param fetch_size;
122 unsigned int offset; /* Offset from argument entry */
123 const char *name; /* Name of this argument */
124 const char *comm; /* Command of this argument */
125 const struct fetch_type *type; /* Type of this argument */
126};
127
128static inline __kprobes void call_fetch(struct fetch_param *fprm,
129 struct pt_regs *regs, void *dest)
130{
131 return fprm->fn(regs, fprm->data, dest);
132}
133
134/* Check the name is good for event/group/fields */
135static inline int is_good_name(const char *name)
136{
137 if (!isalpha(*name) && *name != '_')
138 return 0;
139 while (*++name != '\0') {
140 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
141 return 0;
142 }
143 return 1;
144}
145
146extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
147 struct probe_arg *parg, bool is_return, bool is_kprobe);
148
149extern int traceprobe_conflict_field_name(const char *name,
150 struct probe_arg *args, int narg);
151
152extern void traceprobe_update_arg(struct probe_arg *arg);
153extern void traceprobe_free_probe_arg(struct probe_arg *arg);
154
155extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset);
156
157extern ssize_t traceprobe_probes_write(struct file *file,
158 const char __user *buffer, size_t count, loff_t *ppos,
159 int (*createfn)(int, char**));
160
161extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 3374c792ccd..7e62c0a1845 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -102,7 +102,9 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
102 entry->next_cpu = task_cpu(wakee); 102 entry->next_cpu = task_cpu(wakee);
103 103
104 if (!filter_check_discard(call, entry, buffer, event)) 104 if (!filter_check_discard(call, entry, buffer, event))
105 trace_buffer_unlock_commit(buffer, event, flags, pc); 105 ring_buffer_unlock_commit(buffer, event);
106 ftrace_trace_stack(tr->buffer, flags, 6, pc);
107 ftrace_trace_userstack(tr->buffer, flags, pc);
106} 108}
107 109
108static void 110static void
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 9fe45fcefca..e4a70c0c71b 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -7,7 +7,7 @@
7 * Based on code from the latency_tracer, that is: 7 * Based on code from the latency_tracer, that is:
8 * 8 *
9 * Copyright (C) 2004-2006 Ingo Molnar 9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 Nadia Yvette Chambers 10 * Copyright (C) 2004 William Lee Irwin III
11 */ 11 */
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
@@ -108,8 +108,7 @@ out_enable:
108 * wakeup uses its own tracer function to keep the overhead down: 108 * wakeup uses its own tracer function to keep the overhead down:
109 */ 109 */
110static void 110static void
111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, 111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
112 struct ftrace_ops *op, struct pt_regs *pt_regs)
113{ 112{
114 struct trace_array *tr = wakeup_trace; 113 struct trace_array *tr = wakeup_trace;
115 struct trace_array_cpu *data; 114 struct trace_array_cpu *data;
@@ -130,7 +129,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
130static struct ftrace_ops trace_ops __read_mostly = 129static struct ftrace_ops trace_ops __read_mostly =
131{ 130{
132 .func = wakeup_tracer_call, 131 .func = wakeup_tracer_call,
133 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, 132 .flags = FTRACE_OPS_FL_GLOBAL,
134}; 133};
135#endif /* CONFIG_FUNCTION_TRACER */ 134#endif /* CONFIG_FUNCTION_TRACER */
136 135
@@ -281,20 +280,9 @@ static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
281} 280}
282 281
283static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } 282static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
283static void wakeup_print_header(struct seq_file *s) { }
284static void wakeup_trace_open(struct trace_iterator *iter) { } 284static void wakeup_trace_open(struct trace_iterator *iter) { }
285static void wakeup_trace_close(struct trace_iterator *iter) { } 285static void wakeup_trace_close(struct trace_iterator *iter) { }
286
287#ifdef CONFIG_FUNCTION_TRACER
288static void wakeup_print_header(struct seq_file *s)
289{
290 trace_default_header(s);
291}
292#else
293static void wakeup_print_header(struct seq_file *s)
294{
295 trace_latency_header(s);
296}
297#endif /* CONFIG_FUNCTION_TRACER */
298#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 286#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
299 287
300/* 288/*
@@ -589,7 +577,7 @@ static struct tracer wakeup_tracer __read_mostly =
589 .reset = wakeup_tracer_reset, 577 .reset = wakeup_tracer_reset,
590 .start = wakeup_tracer_start, 578 .start = wakeup_tracer_start,
591 .stop = wakeup_tracer_stop, 579 .stop = wakeup_tracer_stop,
592 .print_max = true, 580 .print_max = 1,
593 .print_header = wakeup_print_header, 581 .print_header = wakeup_print_header,
594 .print_line = wakeup_print_line, 582 .print_line = wakeup_print_line,
595 .flags = &tracer_flags, 583 .flags = &tracer_flags,
@@ -599,7 +587,7 @@ static struct tracer wakeup_tracer __read_mostly =
599#endif 587#endif
600 .open = wakeup_trace_open, 588 .open = wakeup_trace_open,
601 .close = wakeup_trace_close, 589 .close = wakeup_trace_close,
602 .use_max_tr = true, 590 .use_max_tr = 1,
603}; 591};
604 592
605static struct tracer wakeup_rt_tracer __read_mostly = 593static struct tracer wakeup_rt_tracer __read_mostly =
@@ -610,7 +598,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
610 .start = wakeup_tracer_start, 598 .start = wakeup_tracer_start,
611 .stop = wakeup_tracer_stop, 599 .stop = wakeup_tracer_stop,
612 .wait_pipe = poll_wait_pipe, 600 .wait_pipe = poll_wait_pipe,
613 .print_max = true, 601 .print_max = 1,
614 .print_header = wakeup_print_header, 602 .print_header = wakeup_print_header,
615 .print_line = wakeup_print_line, 603 .print_line = wakeup_print_line,
616 .flags = &tracer_flags, 604 .flags = &tracer_flags,
@@ -620,7 +608,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
620#endif 608#endif
621 .open = wakeup_trace_open, 609 .open = wakeup_trace_open,
622 .close = wakeup_trace_close, 610 .close = wakeup_trace_close,
623 .use_max_tr = true, 611 .use_max_tr = 1,
624}; 612};
625 613
626__init static int init_wakeup_tracer(void) 614__init static int init_wakeup_tracer(void)
@@ -637,4 +625,4 @@ __init static int init_wakeup_tracer(void)
637 625
638 return 0; 626 return 0;
639} 627}
640core_initcall(init_wakeup_tracer); 628device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 47623169a81..288541f977f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -103,67 +103,54 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
103 103
104static int trace_selftest_test_probe1_cnt; 104static int trace_selftest_test_probe1_cnt;
105static void trace_selftest_test_probe1_func(unsigned long ip, 105static void trace_selftest_test_probe1_func(unsigned long ip,
106 unsigned long pip, 106 unsigned long pip)
107 struct ftrace_ops *op,
108 struct pt_regs *pt_regs)
109{ 107{
110 trace_selftest_test_probe1_cnt++; 108 trace_selftest_test_probe1_cnt++;
111} 109}
112 110
113static int trace_selftest_test_probe2_cnt; 111static int trace_selftest_test_probe2_cnt;
114static void trace_selftest_test_probe2_func(unsigned long ip, 112static void trace_selftest_test_probe2_func(unsigned long ip,
115 unsigned long pip, 113 unsigned long pip)
116 struct ftrace_ops *op,
117 struct pt_regs *pt_regs)
118{ 114{
119 trace_selftest_test_probe2_cnt++; 115 trace_selftest_test_probe2_cnt++;
120} 116}
121 117
122static int trace_selftest_test_probe3_cnt; 118static int trace_selftest_test_probe3_cnt;
123static void trace_selftest_test_probe3_func(unsigned long ip, 119static void trace_selftest_test_probe3_func(unsigned long ip,
124 unsigned long pip, 120 unsigned long pip)
125 struct ftrace_ops *op,
126 struct pt_regs *pt_regs)
127{ 121{
128 trace_selftest_test_probe3_cnt++; 122 trace_selftest_test_probe3_cnt++;
129} 123}
130 124
131static int trace_selftest_test_global_cnt; 125static int trace_selftest_test_global_cnt;
132static void trace_selftest_test_global_func(unsigned long ip, 126static void trace_selftest_test_global_func(unsigned long ip,
133 unsigned long pip, 127 unsigned long pip)
134 struct ftrace_ops *op,
135 struct pt_regs *pt_regs)
136{ 128{
137 trace_selftest_test_global_cnt++; 129 trace_selftest_test_global_cnt++;
138} 130}
139 131
140static int trace_selftest_test_dyn_cnt; 132static int trace_selftest_test_dyn_cnt;
141static void trace_selftest_test_dyn_func(unsigned long ip, 133static void trace_selftest_test_dyn_func(unsigned long ip,
142 unsigned long pip, 134 unsigned long pip)
143 struct ftrace_ops *op,
144 struct pt_regs *pt_regs)
145{ 135{
146 trace_selftest_test_dyn_cnt++; 136 trace_selftest_test_dyn_cnt++;
147} 137}
148 138
149static struct ftrace_ops test_probe1 = { 139static struct ftrace_ops test_probe1 = {
150 .func = trace_selftest_test_probe1_func, 140 .func = trace_selftest_test_probe1_func,
151 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
152}; 141};
153 142
154static struct ftrace_ops test_probe2 = { 143static struct ftrace_ops test_probe2 = {
155 .func = trace_selftest_test_probe2_func, 144 .func = trace_selftest_test_probe2_func,
156 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
157}; 145};
158 146
159static struct ftrace_ops test_probe3 = { 147static struct ftrace_ops test_probe3 = {
160 .func = trace_selftest_test_probe3_func, 148 .func = trace_selftest_test_probe3_func,
161 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
162}; 149};
163 150
164static struct ftrace_ops test_global = { 151static struct ftrace_ops test_global = {
165 .func = trace_selftest_test_global_func, 152 .func = trace_selftest_test_global_func,
166 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, 153 .flags = FTRACE_OPS_FL_GLOBAL,
167}; 154};
168 155
169static void print_counts(void) 156static void print_counts(void)
@@ -320,6 +307,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
320 int (*func)(void)) 307 int (*func)(void))
321{ 308{
322 int save_ftrace_enabled = ftrace_enabled; 309 int save_ftrace_enabled = ftrace_enabled;
310 int save_tracer_enabled = tracer_enabled;
323 unsigned long count; 311 unsigned long count;
324 char *func_name; 312 char *func_name;
325 int ret; 313 int ret;
@@ -330,6 +318,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
330 318
331 /* enable tracing, and record the filter function */ 319 /* enable tracing, and record the filter function */
332 ftrace_enabled = 1; 320 ftrace_enabled = 1;
321 tracer_enabled = 1;
333 322
334 /* passed in by parameter to fool gcc from optimizing */ 323 /* passed in by parameter to fool gcc from optimizing */
335 func(); 324 func();
@@ -393,6 +382,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
393 382
394 out: 383 out:
395 ftrace_enabled = save_ftrace_enabled; 384 ftrace_enabled = save_ftrace_enabled;
385 tracer_enabled = save_tracer_enabled;
396 386
397 /* Enable tracing on all functions again */ 387 /* Enable tracing on all functions again */
398 ftrace_set_global_filter(NULL, 0, 1); 388 ftrace_set_global_filter(NULL, 0, 1);
@@ -403,247 +393,10 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
403 393
404 return ret; 394 return ret;
405} 395}
406
407static int trace_selftest_recursion_cnt;
408static void trace_selftest_test_recursion_func(unsigned long ip,
409 unsigned long pip,
410 struct ftrace_ops *op,
411 struct pt_regs *pt_regs)
412{
413 /*
414 * This function is registered without the recursion safe flag.
415 * The ftrace infrastructure should provide the recursion
416 * protection. If not, this will crash the kernel!
417 */
418 trace_selftest_recursion_cnt++;
419 DYN_FTRACE_TEST_NAME();
420}
421
422static void trace_selftest_test_recursion_safe_func(unsigned long ip,
423 unsigned long pip,
424 struct ftrace_ops *op,
425 struct pt_regs *pt_regs)
426{
427 /*
428 * We said we would provide our own recursion. By calling
429 * this function again, we should recurse back into this function
430 * and count again. But this only happens if the arch supports
431 * all of ftrace features and nothing else is using the function
432 * tracing utility.
433 */
434 if (trace_selftest_recursion_cnt++)
435 return;
436 DYN_FTRACE_TEST_NAME();
437}
438
439static struct ftrace_ops test_rec_probe = {
440 .func = trace_selftest_test_recursion_func,
441};
442
443static struct ftrace_ops test_recsafe_probe = {
444 .func = trace_selftest_test_recursion_safe_func,
445 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
446};
447
448static int
449trace_selftest_function_recursion(void)
450{
451 int save_ftrace_enabled = ftrace_enabled;
452 char *func_name;
453 int len;
454 int ret;
455 int cnt;
456
457 /* The previous test PASSED */
458 pr_cont("PASSED\n");
459 pr_info("Testing ftrace recursion: ");
460
461
462 /* enable tracing, and record the filter function */
463 ftrace_enabled = 1;
464
465 /* Handle PPC64 '.' name */
466 func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
467 len = strlen(func_name);
468
469 ret = ftrace_set_filter(&test_rec_probe, func_name, len, 1);
470 if (ret) {
471 pr_cont("*Could not set filter* ");
472 goto out;
473 }
474
475 ret = register_ftrace_function(&test_rec_probe);
476 if (ret) {
477 pr_cont("*could not register callback* ");
478 goto out;
479 }
480
481 DYN_FTRACE_TEST_NAME();
482
483 unregister_ftrace_function(&test_rec_probe);
484
485 ret = -1;
486 if (trace_selftest_recursion_cnt != 1) {
487 pr_cont("*callback not called once (%d)* ",
488 trace_selftest_recursion_cnt);
489 goto out;
490 }
491
492 trace_selftest_recursion_cnt = 1;
493
494 pr_cont("PASSED\n");
495 pr_info("Testing ftrace recursion safe: ");
496
497 ret = ftrace_set_filter(&test_recsafe_probe, func_name, len, 1);
498 if (ret) {
499 pr_cont("*Could not set filter* ");
500 goto out;
501 }
502
503 ret = register_ftrace_function(&test_recsafe_probe);
504 if (ret) {
505 pr_cont("*could not register callback* ");
506 goto out;
507 }
508
509 DYN_FTRACE_TEST_NAME();
510
511 unregister_ftrace_function(&test_recsafe_probe);
512
513 /*
514 * If arch supports all ftrace features, and no other task
515 * was on the list, we should be fine.
516 */
517 if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
518 cnt = 2; /* Should have recursed */
519 else
520 cnt = 1;
521
522 ret = -1;
523 if (trace_selftest_recursion_cnt != cnt) {
524 pr_cont("*callback not called expected %d times (%d)* ",
525 cnt, trace_selftest_recursion_cnt);
526 goto out;
527 }
528
529 ret = 0;
530out:
531 ftrace_enabled = save_ftrace_enabled;
532
533 return ret;
534}
535#else 396#else
536# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) 397# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
537# define trace_selftest_function_recursion() ({ 0; })
538#endif /* CONFIG_DYNAMIC_FTRACE */ 398#endif /* CONFIG_DYNAMIC_FTRACE */
539 399
540static enum {
541 TRACE_SELFTEST_REGS_START,
542 TRACE_SELFTEST_REGS_FOUND,
543 TRACE_SELFTEST_REGS_NOT_FOUND,
544} trace_selftest_regs_stat;
545
546static void trace_selftest_test_regs_func(unsigned long ip,
547 unsigned long pip,
548 struct ftrace_ops *op,
549 struct pt_regs *pt_regs)
550{
551 if (pt_regs)
552 trace_selftest_regs_stat = TRACE_SELFTEST_REGS_FOUND;
553 else
554 trace_selftest_regs_stat = TRACE_SELFTEST_REGS_NOT_FOUND;
555}
556
557static struct ftrace_ops test_regs_probe = {
558 .func = trace_selftest_test_regs_func,
559 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_SAVE_REGS,
560};
561
562static int
563trace_selftest_function_regs(void)
564{
565 int save_ftrace_enabled = ftrace_enabled;
566 char *func_name;
567 int len;
568 int ret;
569 int supported = 0;
570
571#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
572 supported = 1;
573#endif
574
575 /* The previous test PASSED */
576 pr_cont("PASSED\n");
577 pr_info("Testing ftrace regs%s: ",
578 !supported ? "(no arch support)" : "");
579
580 /* enable tracing, and record the filter function */
581 ftrace_enabled = 1;
582
583 /* Handle PPC64 '.' name */
584 func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
585 len = strlen(func_name);
586
587 ret = ftrace_set_filter(&test_regs_probe, func_name, len, 1);
588 /*
589 * If DYNAMIC_FTRACE is not set, then we just trace all functions.
590 * This test really doesn't care.
591 */
592 if (ret && ret != -ENODEV) {
593 pr_cont("*Could not set filter* ");
594 goto out;
595 }
596
597 ret = register_ftrace_function(&test_regs_probe);
598 /*
599 * Now if the arch does not support passing regs, then this should
600 * have failed.
601 */
602 if (!supported) {
603 if (!ret) {
604 pr_cont("*registered save-regs without arch support* ");
605 goto out;
606 }
607 test_regs_probe.flags |= FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED;
608 ret = register_ftrace_function(&test_regs_probe);
609 }
610 if (ret) {
611 pr_cont("*could not register callback* ");
612 goto out;
613 }
614
615
616 DYN_FTRACE_TEST_NAME();
617
618 unregister_ftrace_function(&test_regs_probe);
619
620 ret = -1;
621
622 switch (trace_selftest_regs_stat) {
623 case TRACE_SELFTEST_REGS_START:
624 pr_cont("*callback never called* ");
625 goto out;
626
627 case TRACE_SELFTEST_REGS_FOUND:
628 if (supported)
629 break;
630 pr_cont("*callback received regs without arch support* ");
631 goto out;
632
633 case TRACE_SELFTEST_REGS_NOT_FOUND:
634 if (!supported)
635 break;
636 pr_cont("*callback received NULL regs* ");
637 goto out;
638 }
639
640 ret = 0;
641out:
642 ftrace_enabled = save_ftrace_enabled;
643
644 return ret;
645}
646
647/* 400/*
648 * Simple verification test of ftrace function tracer. 401 * Simple verification test of ftrace function tracer.
649 * Enable ftrace, sleep 1/10 second, and then read the trace 402 * Enable ftrace, sleep 1/10 second, and then read the trace
@@ -653,6 +406,7 @@ int
653trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) 406trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
654{ 407{
655 int save_ftrace_enabled = ftrace_enabled; 408 int save_ftrace_enabled = ftrace_enabled;
409 int save_tracer_enabled = tracer_enabled;
656 unsigned long count; 410 unsigned long count;
657 int ret; 411 int ret;
658 412
@@ -661,6 +415,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
661 415
662 /* start the tracing */ 416 /* start the tracing */
663 ftrace_enabled = 1; 417 ftrace_enabled = 1;
418 tracer_enabled = 1;
664 419
665 ret = tracer_init(trace, tr); 420 ret = tracer_init(trace, tr);
666 if (ret) { 421 if (ret) {
@@ -687,16 +442,10 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
687 442
688 ret = trace_selftest_startup_dynamic_tracing(trace, tr, 443 ret = trace_selftest_startup_dynamic_tracing(trace, tr,
689 DYN_FTRACE_TEST_NAME); 444 DYN_FTRACE_TEST_NAME);
690 if (ret)
691 goto out;
692
693 ret = trace_selftest_function_recursion();
694 if (ret)
695 goto out;
696 445
697 ret = trace_selftest_function_regs();
698 out: 446 out:
699 ftrace_enabled = save_ftrace_enabled; 447 ftrace_enabled = save_ftrace_enabled;
448 tracer_enabled = save_tracer_enabled;
700 449
701 /* kill ftrace totally if we failed */ 450 /* kill ftrace totally if we failed */
702 if (ret) 451 if (ret)
@@ -1029,8 +778,6 @@ static int trace_wakeup_test_thread(void *data)
1029 set_current_state(TASK_INTERRUPTIBLE); 778 set_current_state(TASK_INTERRUPTIBLE);
1030 schedule(); 779 schedule();
1031 780
1032 complete(x);
1033
1034 /* we are awake, now wait to disappear */ 781 /* we are awake, now wait to disappear */
1035 while (!kthread_should_stop()) { 782 while (!kthread_should_stop()) {
1036 /* 783 /*
@@ -1074,27 +821,29 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
1074 /* reset the max latency */ 821 /* reset the max latency */
1075 tracing_max_latency = 0; 822 tracing_max_latency = 0;
1076 823
1077 while (p->on_rq) { 824 /* sleep to let the RT thread sleep too */
1078 /* 825 msleep(100);
1079 * Sleep to make sure the RT thread is asleep too.
1080 * On virtual machines we can't rely on timings,
1081 * but we want to make sure this test still works.
1082 */
1083 msleep(100);
1084 }
1085 826
1086 init_completion(&isrt); 827 /*
828 * Yes this is slightly racy. It is possible that for some
829 * strange reason that the RT thread we created, did not
830 * call schedule for 100ms after doing the completion,
831 * and we do a wakeup on a task that already is awake.
832 * But that is extremely unlikely, and the worst thing that
833 * happens in such a case, is that we disable tracing.
834 * Honestly, if this race does happen something is horrible
835 * wrong with the system.
836 */
1087 837
1088 wake_up_process(p); 838 wake_up_process(p);
1089 839
1090 /* Wait for the task to wake up */ 840 /* give a little time to let the thread wake up */
1091 wait_for_completion(&isrt); 841 msleep(100);
1092 842
1093 /* stop the tracing. */ 843 /* stop the tracing. */
1094 tracing_stop(); 844 tracing_stop();
1095 /* check both trace buffers */ 845 /* check both trace buffers */
1096 ret = trace_test_buffer(tr, NULL); 846 ret = trace_test_buffer(tr, NULL);
1097 printk("ret = %d\n", ret);
1098 if (!ret) 847 if (!ret)
1099 ret = trace_test_buffer(&max_tr, &count); 848 ret = trace_test_buffer(&max_tr, &count);
1100 849
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 42ca822fc70..77575b386d9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -13,9 +13,6 @@
13#include <linux/sysctl.h> 13#include <linux/sysctl.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16
17#include <asm/setup.h>
18
19#include "trace.h" 16#include "trace.h"
20 17
21#define STACK_TRACE_ENTRIES 500 18#define STACK_TRACE_ENTRIES 500
@@ -33,6 +30,7 @@ static unsigned long max_stack_size;
33static arch_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
34 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
35 32
33static int stack_trace_disabled __read_mostly;
36static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
37static DEFINE_MUTEX(stack_sysctl_mutex); 35static DEFINE_MUTEX(stack_sysctl_mutex);
38 36
@@ -110,11 +108,13 @@ static inline void check_stack(void)
110} 108}
111 109
112static void 110static void
113stack_trace_call(unsigned long ip, unsigned long parent_ip, 111stack_trace_call(unsigned long ip, unsigned long parent_ip)
114 struct ftrace_ops *op, struct pt_regs *pt_regs)
115{ 112{
116 int cpu; 113 int cpu;
117 114
115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
116 return;
117
118 preempt_disable_notrace(); 118 preempt_disable_notrace();
119 119
120 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
@@ -133,7 +133,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
133static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
134{ 134{
135 .func = stack_trace_call, 135 .func = stack_trace_call,
136 .flags = FTRACE_OPS_FL_RECURSION_SAFE, 136 .flags = FTRACE_OPS_FL_GLOBAL,
137}; 137};
138 138
139static ssize_t 139static ssize_t
@@ -311,21 +311,6 @@ static const struct file_operations stack_trace_fops = {
311 .release = seq_release, 311 .release = seq_release,
312}; 312};
313 313
314static int
315stack_trace_filter_open(struct inode *inode, struct file *file)
316{
317 return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
318 inode, file);
319}
320
321static const struct file_operations stack_trace_filter_fops = {
322 .open = stack_trace_filter_open,
323 .read = seq_read,
324 .write = ftrace_filter_write,
325 .llseek = ftrace_regex_lseek,
326 .release = ftrace_regex_release,
327};
328
329int 314int
330stack_trace_sysctl(struct ctl_table *table, int write, 315stack_trace_sysctl(struct ctl_table *table, int write,
331 void __user *buffer, size_t *lenp, 316 void __user *buffer, size_t *lenp,
@@ -353,13 +338,8 @@ stack_trace_sysctl(struct ctl_table *table, int write,
353 return ret; 338 return ret;
354} 339}
355 340
356static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
357
358static __init int enable_stacktrace(char *str) 341static __init int enable_stacktrace(char *str)
359{ 342{
360 if (strncmp(str, "_filter=", 8) == 0)
361 strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
362
363 stack_tracer_enabled = 1; 343 stack_tracer_enabled = 1;
364 last_stack_tracer_enabled = 1; 344 last_stack_tracer_enabled = 1;
365 return 1; 345 return 1;
@@ -378,12 +358,6 @@ static __init int stack_trace_init(void)
378 trace_create_file("stack_trace", 0444, d_tracer, 358 trace_create_file("stack_trace", 0444, d_tracer,
379 NULL, &stack_trace_fops); 359 NULL, &stack_trace_fops);
380 360
381 trace_create_file("stack_trace_filter", 0444, d_tracer,
382 NULL, &stack_trace_filter_fops);
383
384 if (stack_trace_filter_buf[0])
385 ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
386
387 if (stack_tracer_enabled) 361 if (stack_tracer_enabled)
388 register_ftrace_function(&trace_ops); 362 register_ftrace_function(&trace_ops);
389 363
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7609dd6714c..ee7b5a0bb9f 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -2,7 +2,6 @@
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/slab.h> 3#include <linux/slab.h>
4#include <linux/kernel.h> 4#include <linux/kernel.h>
5#include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
6#include <linux/ftrace.h> 5#include <linux/ftrace.h>
7#include <linux/perf_event.h> 6#include <linux/perf_event.h>
8#include <asm/syscall.h> 7#include <asm/syscall.h>
@@ -17,9 +16,12 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
17static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
18 17
19static int syscall_enter_register(struct ftrace_event_call *event, 18static int syscall_enter_register(struct ftrace_event_call *event,
20 enum trace_reg type, void *data); 19 enum trace_reg type);
21static int syscall_exit_register(struct ftrace_event_call *event, 20static int syscall_exit_register(struct ftrace_event_call *event,
22 enum trace_reg type, void *data); 21 enum trace_reg type);
22
23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call);
23 25
24static struct list_head * 26static struct list_head *
25syscall_get_enter_fields(struct ftrace_event_call *call) 27syscall_get_enter_fields(struct ftrace_event_call *call)
@@ -29,6 +31,30 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
29 return &entry->enter_fields; 31 return &entry->enter_fields;
30} 32}
31 33
34struct trace_event_functions enter_syscall_print_funcs = {
35 .trace = print_syscall_enter,
36};
37
38struct trace_event_functions exit_syscall_print_funcs = {
39 .trace = print_syscall_exit,
40};
41
42struct ftrace_event_class event_class_syscall_enter = {
43 .system = "syscalls",
44 .reg = syscall_enter_register,
45 .define_fields = syscall_enter_define_fields,
46 .get_fields = syscall_get_enter_fields,
47 .raw_init = init_syscall_trace,
48};
49
50struct ftrace_event_class event_class_syscall_exit = {
51 .system = "syscalls",
52 .reg = syscall_exit_register,
53 .define_fields = syscall_exit_define_fields,
54 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
55 .raw_init = init_syscall_trace,
56};
57
32extern struct syscall_metadata *__start_syscalls_metadata[]; 58extern struct syscall_metadata *__start_syscalls_metadata[];
33extern struct syscall_metadata *__stop_syscalls_metadata[]; 59extern struct syscall_metadata *__stop_syscalls_metadata[];
34 60
@@ -405,7 +431,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
405 mutex_unlock(&syscall_trace_lock); 431 mutex_unlock(&syscall_trace_lock);
406} 432}
407 433
408static int init_syscall_trace(struct ftrace_event_call *call) 434int init_syscall_trace(struct ftrace_event_call *call)
409{ 435{
410 int id; 436 int id;
411 int num; 437 int num;
@@ -430,30 +456,6 @@ static int init_syscall_trace(struct ftrace_event_call *call)
430 return id; 456 return id;
431} 457}
432 458
433struct trace_event_functions enter_syscall_print_funcs = {
434 .trace = print_syscall_enter,
435};
436
437struct trace_event_functions exit_syscall_print_funcs = {
438 .trace = print_syscall_exit,
439};
440
441struct ftrace_event_class event_class_syscall_enter = {
442 .system = "syscalls",
443 .reg = syscall_enter_register,
444 .define_fields = syscall_enter_define_fields,
445 .get_fields = syscall_get_enter_fields,
446 .raw_init = init_syscall_trace,
447};
448
449struct ftrace_event_class event_class_syscall_exit = {
450 .system = "syscalls",
451 .reg = syscall_exit_register,
452 .define_fields = syscall_exit_define_fields,
453 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
454 .raw_init = init_syscall_trace,
455};
456
457unsigned long __init __weak arch_syscall_addr(int nr) 459unsigned long __init __weak arch_syscall_addr(int nr)
458{ 460{
459 return (unsigned long)sys_call_table[nr]; 461 return (unsigned long)sys_call_table[nr];
@@ -465,8 +467,8 @@ int __init init_ftrace_syscalls(void)
465 unsigned long addr; 467 unsigned long addr;
466 int i; 468 int i;
467 469
468 syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata), 470 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
469 GFP_KERNEL); 471 NR_syscalls, GFP_KERNEL);
470 if (!syscalls_metadata) { 472 if (!syscalls_metadata) {
471 WARN_ON(1); 473 WARN_ON(1);
472 return -ENOMEM; 474 return -ENOMEM;
@@ -484,7 +486,7 @@ int __init init_ftrace_syscalls(void)
484 486
485 return 0; 487 return 0;
486} 488}
487early_initcall(init_ftrace_syscalls); 489core_initcall(init_ftrace_syscalls);
488 490
489#ifdef CONFIG_PERF_EVENTS 491#ifdef CONFIG_PERF_EVENTS
490 492
@@ -503,8 +505,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
503 int size; 505 int size;
504 506
505 syscall_nr = syscall_get_nr(current, regs); 507 syscall_nr = syscall_get_nr(current, regs);
506 if (syscall_nr < 0)
507 return;
508 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 508 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
509 return; 509 return;
510 510
@@ -531,10 +531,10 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
531 (unsigned long *)&rec->args); 531 (unsigned long *)&rec->args);
532 532
533 head = this_cpu_ptr(sys_data->enter_event->perf_events); 533 head = this_cpu_ptr(sys_data->enter_event->perf_events);
534 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); 534 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
535} 535}
536 536
537static int perf_sysenter_enable(struct ftrace_event_call *call) 537int perf_sysenter_enable(struct ftrace_event_call *call)
538{ 538{
539 int ret = 0; 539 int ret = 0;
540 int num; 540 int num;
@@ -555,7 +555,7 @@ static int perf_sysenter_enable(struct ftrace_event_call *call)
555 return ret; 555 return ret;
556} 556}
557 557
558static void perf_sysenter_disable(struct ftrace_event_call *call) 558void perf_sysenter_disable(struct ftrace_event_call *call)
559{ 559{
560 int num; 560 int num;
561 561
@@ -579,8 +579,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
579 int size; 579 int size;
580 580
581 syscall_nr = syscall_get_nr(current, regs); 581 syscall_nr = syscall_get_nr(current, regs);
582 if (syscall_nr < 0)
583 return;
584 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 582 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
585 return; 583 return;
586 584
@@ -609,10 +607,10 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
609 rec->ret = syscall_get_return_value(current, regs); 607 rec->ret = syscall_get_return_value(current, regs);
610 608
611 head = this_cpu_ptr(sys_data->exit_event->perf_events); 609 head = this_cpu_ptr(sys_data->exit_event->perf_events);
612 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); 610 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
613} 611}
614 612
615static int perf_sysexit_enable(struct ftrace_event_call *call) 613int perf_sysexit_enable(struct ftrace_event_call *call)
616{ 614{
617 int ret = 0; 615 int ret = 0;
618 int num; 616 int num;
@@ -633,7 +631,7 @@ static int perf_sysexit_enable(struct ftrace_event_call *call)
633 return ret; 631 return ret;
634} 632}
635 633
636static void perf_sysexit_disable(struct ftrace_event_call *call) 634void perf_sysexit_disable(struct ftrace_event_call *call)
637{ 635{
638 int num; 636 int num;
639 637
@@ -650,7 +648,7 @@ static void perf_sysexit_disable(struct ftrace_event_call *call)
650#endif /* CONFIG_PERF_EVENTS */ 648#endif /* CONFIG_PERF_EVENTS */
651 649
652static int syscall_enter_register(struct ftrace_event_call *event, 650static int syscall_enter_register(struct ftrace_event_call *event,
653 enum trace_reg type, void *data) 651 enum trace_reg type)
654{ 652{
655 switch (type) { 653 switch (type) {
656 case TRACE_REG_REGISTER: 654 case TRACE_REG_REGISTER:
@@ -665,18 +663,13 @@ static int syscall_enter_register(struct ftrace_event_call *event,
665 case TRACE_REG_PERF_UNREGISTER: 663 case TRACE_REG_PERF_UNREGISTER:
666 perf_sysenter_disable(event); 664 perf_sysenter_disable(event);
667 return 0; 665 return 0;
668 case TRACE_REG_PERF_OPEN:
669 case TRACE_REG_PERF_CLOSE:
670 case TRACE_REG_PERF_ADD:
671 case TRACE_REG_PERF_DEL:
672 return 0;
673#endif 666#endif
674 } 667 }
675 return 0; 668 return 0;
676} 669}
677 670
678static int syscall_exit_register(struct ftrace_event_call *event, 671static int syscall_exit_register(struct ftrace_event_call *event,
679 enum trace_reg type, void *data) 672 enum trace_reg type)
680{ 673{
681 switch (type) { 674 switch (type) {
682 case TRACE_REG_REGISTER: 675 case TRACE_REG_REGISTER:
@@ -691,11 +684,6 @@ static int syscall_exit_register(struct ftrace_event_call *event,
691 case TRACE_REG_PERF_UNREGISTER: 684 case TRACE_REG_PERF_UNREGISTER:
692 perf_sysexit_disable(event); 685 perf_sysexit_disable(event);
693 return 0; 686 return 0;
694 case TRACE_REG_PERF_OPEN:
695 case TRACE_REG_PERF_CLOSE:
696 case TRACE_REG_PERF_ADD:
697 case TRACE_REG_PERF_DEL:
698 return 0;
699#endif 687#endif
700 } 688 }
701 return 0; 689 return 0;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
deleted file mode 100644
index c86e6d4f67f..00000000000
--- a/kernel/trace/trace_uprobe.c
+++ /dev/null
@@ -1,788 +0,0 @@
1/*
2 * uprobes-based tracing events
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 *
17 * Copyright (C) IBM Corporation, 2010-2012
18 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
19 */
20
21#include <linux/module.h>
22#include <linux/uaccess.h>
23#include <linux/uprobes.h>
24#include <linux/namei.h>
25#include <linux/string.h>
26
27#include "trace_probe.h"
28
29#define UPROBE_EVENT_SYSTEM "uprobes"
30
31/*
32 * uprobe event core functions
33 */
34struct trace_uprobe;
35struct uprobe_trace_consumer {
36 struct uprobe_consumer cons;
37 struct trace_uprobe *tu;
38};
39
40struct trace_uprobe {
41 struct list_head list;
42 struct ftrace_event_class class;
43 struct ftrace_event_call call;
44 struct uprobe_trace_consumer *consumer;
45 struct inode *inode;
46 char *filename;
47 unsigned long offset;
48 unsigned long nhit;
49 unsigned int flags; /* For TP_FLAG_* */
50 ssize_t size; /* trace entry size */
51 unsigned int nr_args;
52 struct probe_arg args[];
53};
54
55#define SIZEOF_TRACE_UPROBE(n) \
56 (offsetof(struct trace_uprobe, args) + \
57 (sizeof(struct probe_arg) * (n)))
58
59static int register_uprobe_event(struct trace_uprobe *tu);
60static void unregister_uprobe_event(struct trace_uprobe *tu);
61
62static DEFINE_MUTEX(uprobe_lock);
63static LIST_HEAD(uprobe_list);
64
65static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
66
67/*
68 * Allocate new trace_uprobe and initialize it (including uprobes).
69 */
70static struct trace_uprobe *
71alloc_trace_uprobe(const char *group, const char *event, int nargs)
72{
73 struct trace_uprobe *tu;
74
75 if (!event || !is_good_name(event))
76 return ERR_PTR(-EINVAL);
77
78 if (!group || !is_good_name(group))
79 return ERR_PTR(-EINVAL);
80
81 tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
82 if (!tu)
83 return ERR_PTR(-ENOMEM);
84
85 tu->call.class = &tu->class;
86 tu->call.name = kstrdup(event, GFP_KERNEL);
87 if (!tu->call.name)
88 goto error;
89
90 tu->class.system = kstrdup(group, GFP_KERNEL);
91 if (!tu->class.system)
92 goto error;
93
94 INIT_LIST_HEAD(&tu->list);
95 return tu;
96
97error:
98 kfree(tu->call.name);
99 kfree(tu);
100
101 return ERR_PTR(-ENOMEM);
102}
103
104static void free_trace_uprobe(struct trace_uprobe *tu)
105{
106 int i;
107
108 for (i = 0; i < tu->nr_args; i++)
109 traceprobe_free_probe_arg(&tu->args[i]);
110
111 iput(tu->inode);
112 kfree(tu->call.class->system);
113 kfree(tu->call.name);
114 kfree(tu->filename);
115 kfree(tu);
116}
117
118static struct trace_uprobe *find_probe_event(const char *event, const char *group)
119{
120 struct trace_uprobe *tu;
121
122 list_for_each_entry(tu, &uprobe_list, list)
123 if (strcmp(tu->call.name, event) == 0 &&
124 strcmp(tu->call.class->system, group) == 0)
125 return tu;
126
127 return NULL;
128}
129
130/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
131static void unregister_trace_uprobe(struct trace_uprobe *tu)
132{
133 list_del(&tu->list);
134 unregister_uprobe_event(tu);
135 free_trace_uprobe(tu);
136}
137
138/* Register a trace_uprobe and probe_event */
139static int register_trace_uprobe(struct trace_uprobe *tu)
140{
141 struct trace_uprobe *old_tp;
142 int ret;
143
144 mutex_lock(&uprobe_lock);
145
146 /* register as an event */
147 old_tp = find_probe_event(tu->call.name, tu->call.class->system);
148 if (old_tp)
149 /* delete old event */
150 unregister_trace_uprobe(old_tp);
151
152 ret = register_uprobe_event(tu);
153 if (ret) {
154 pr_warning("Failed to register probe event(%d)\n", ret);
155 goto end;
156 }
157
158 list_add_tail(&tu->list, &uprobe_list);
159
160end:
161 mutex_unlock(&uprobe_lock);
162
163 return ret;
164}
165
166/*
167 * Argument syntax:
168 * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS]
169 *
170 * - Remove uprobe: -:[GRP/]EVENT
171 */
172static int create_trace_uprobe(int argc, char **argv)
173{
174 struct trace_uprobe *tu;
175 struct inode *inode;
176 char *arg, *event, *group, *filename;
177 char buf[MAX_EVENT_NAME_LEN];
178 struct path path;
179 unsigned long offset;
180 bool is_delete;
181 int i, ret;
182
183 inode = NULL;
184 ret = 0;
185 is_delete = false;
186 event = NULL;
187 group = NULL;
188
189 /* argc must be >= 1 */
190 if (argv[0][0] == '-')
191 is_delete = true;
192 else if (argv[0][0] != 'p') {
193 pr_info("Probe definition must be started with 'p' or '-'.\n");
194 return -EINVAL;
195 }
196
197 if (argv[0][1] == ':') {
198 event = &argv[0][2];
199 arg = strchr(event, '/');
200
201 if (arg) {
202 group = event;
203 event = arg + 1;
204 event[-1] = '\0';
205
206 if (strlen(group) == 0) {
207 pr_info("Group name is not specified\n");
208 return -EINVAL;
209 }
210 }
211 if (strlen(event) == 0) {
212 pr_info("Event name is not specified\n");
213 return -EINVAL;
214 }
215 }
216 if (!group)
217 group = UPROBE_EVENT_SYSTEM;
218
219 if (is_delete) {
220 if (!event) {
221 pr_info("Delete command needs an event name.\n");
222 return -EINVAL;
223 }
224 mutex_lock(&uprobe_lock);
225 tu = find_probe_event(event, group);
226
227 if (!tu) {
228 mutex_unlock(&uprobe_lock);
229 pr_info("Event %s/%s doesn't exist.\n", group, event);
230 return -ENOENT;
231 }
232 /* delete an event */
233 unregister_trace_uprobe(tu);
234 mutex_unlock(&uprobe_lock);
235 return 0;
236 }
237
238 if (argc < 2) {
239 pr_info("Probe point is not specified.\n");
240 return -EINVAL;
241 }
242 if (isdigit(argv[1][0])) {
243 pr_info("probe point must be have a filename.\n");
244 return -EINVAL;
245 }
246 arg = strchr(argv[1], ':');
247 if (!arg)
248 goto fail_address_parse;
249
250 *arg++ = '\0';
251 filename = argv[1];
252 ret = kern_path(filename, LOOKUP_FOLLOW, &path);
253 if (ret)
254 goto fail_address_parse;
255
256 ret = kstrtoul(arg, 0, &offset);
257 if (ret)
258 goto fail_address_parse;
259
260 inode = igrab(path.dentry->d_inode);
261
262 argc -= 2;
263 argv += 2;
264
265 /* setup a probe */
266 if (!event) {
267 char *tail;
268 char *ptr;
269
270 tail = kstrdup(kbasename(filename), GFP_KERNEL);
271 if (!tail) {
272 ret = -ENOMEM;
273 goto fail_address_parse;
274 }
275
276 ptr = strpbrk(tail, ".-_");
277 if (ptr)
278 *ptr = '\0';
279
280 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
281 event = buf;
282 kfree(tail);
283 }
284
285 tu = alloc_trace_uprobe(group, event, argc);
286 if (IS_ERR(tu)) {
287 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
288 ret = PTR_ERR(tu);
289 goto fail_address_parse;
290 }
291 tu->offset = offset;
292 tu->inode = inode;
293 tu->filename = kstrdup(filename, GFP_KERNEL);
294
295 if (!tu->filename) {
296 pr_info("Failed to allocate filename.\n");
297 ret = -ENOMEM;
298 goto error;
299 }
300
301 /* parse arguments */
302 ret = 0;
303 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
304 /* Increment count for freeing args in error case */
305 tu->nr_args++;
306
307 /* Parse argument name */
308 arg = strchr(argv[i], '=');
309 if (arg) {
310 *arg++ = '\0';
311 tu->args[i].name = kstrdup(argv[i], GFP_KERNEL);
312 } else {
313 arg = argv[i];
314 /* If argument name is omitted, set "argN" */
315 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
316 tu->args[i].name = kstrdup(buf, GFP_KERNEL);
317 }
318
319 if (!tu->args[i].name) {
320 pr_info("Failed to allocate argument[%d] name.\n", i);
321 ret = -ENOMEM;
322 goto error;
323 }
324
325 if (!is_good_name(tu->args[i].name)) {
326 pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name);
327 ret = -EINVAL;
328 goto error;
329 }
330
331 if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) {
332 pr_info("Argument[%d] name '%s' conflicts with "
333 "another field.\n", i, argv[i]);
334 ret = -EINVAL;
335 goto error;
336 }
337
338 /* Parse fetch argument */
339 ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false);
340 if (ret) {
341 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
342 goto error;
343 }
344 }
345
346 ret = register_trace_uprobe(tu);
347 if (ret)
348 goto error;
349 return 0;
350
351error:
352 free_trace_uprobe(tu);
353 return ret;
354
355fail_address_parse:
356 if (inode)
357 iput(inode);
358
359 pr_info("Failed to parse address.\n");
360
361 return ret;
362}
363
364static void cleanup_all_probes(void)
365{
366 struct trace_uprobe *tu;
367
368 mutex_lock(&uprobe_lock);
369 while (!list_empty(&uprobe_list)) {
370 tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
371 unregister_trace_uprobe(tu);
372 }
373 mutex_unlock(&uprobe_lock);
374}
375
376/* Probes listing interfaces */
377static void *probes_seq_start(struct seq_file *m, loff_t *pos)
378{
379 mutex_lock(&uprobe_lock);
380 return seq_list_start(&uprobe_list, *pos);
381}
382
383static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
384{
385 return seq_list_next(v, &uprobe_list, pos);
386}
387
388static void probes_seq_stop(struct seq_file *m, void *v)
389{
390 mutex_unlock(&uprobe_lock);
391}
392
393static int probes_seq_show(struct seq_file *m, void *v)
394{
395 struct trace_uprobe *tu = v;
396 int i;
397
398 seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name);
399 seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
400
401 for (i = 0; i < tu->nr_args; i++)
402 seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm);
403
404 seq_printf(m, "\n");
405 return 0;
406}
407
408static const struct seq_operations probes_seq_op = {
409 .start = probes_seq_start,
410 .next = probes_seq_next,
411 .stop = probes_seq_stop,
412 .show = probes_seq_show
413};
414
415static int probes_open(struct inode *inode, struct file *file)
416{
417 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
418 cleanup_all_probes();
419
420 return seq_open(file, &probes_seq_op);
421}
422
423static ssize_t probes_write(struct file *file, const char __user *buffer,
424 size_t count, loff_t *ppos)
425{
426 return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe);
427}
428
429static const struct file_operations uprobe_events_ops = {
430 .owner = THIS_MODULE,
431 .open = probes_open,
432 .read = seq_read,
433 .llseek = seq_lseek,
434 .release = seq_release,
435 .write = probes_write,
436};
437
438/* Probes profiling interfaces */
439static int probes_profile_seq_show(struct seq_file *m, void *v)
440{
441 struct trace_uprobe *tu = v;
442
443 seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit);
444 return 0;
445}
446
447static const struct seq_operations profile_seq_op = {
448 .start = probes_seq_start,
449 .next = probes_seq_next,
450 .stop = probes_seq_stop,
451 .show = probes_profile_seq_show
452};
453
454static int profile_open(struct inode *inode, struct file *file)
455{
456 return seq_open(file, &profile_seq_op);
457}
458
459static const struct file_operations uprobe_profile_ops = {
460 .owner = THIS_MODULE,
461 .open = profile_open,
462 .read = seq_read,
463 .llseek = seq_lseek,
464 .release = seq_release,
465};
466
467/* uprobe handler */
468static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
469{
470 struct uprobe_trace_entry_head *entry;
471 struct ring_buffer_event *event;
472 struct ring_buffer *buffer;
473 u8 *data;
474 int size, i, pc;
475 unsigned long irq_flags;
476 struct ftrace_event_call *call = &tu->call;
477
478 tu->nhit++;
479
480 local_save_flags(irq_flags);
481 pc = preempt_count();
482
483 size = sizeof(*entry) + tu->size;
484
485 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
486 size, irq_flags, pc);
487 if (!event)
488 return;
489
490 entry = ring_buffer_event_data(event);
491 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
492 data = (u8 *)&entry[1];
493 for (i = 0; i < tu->nr_args; i++)
494 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
495
496 if (!filter_current_check_discard(buffer, call, entry, event))
497 trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
498}
499
500/* Event entry printers */
501static enum print_line_t
502print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
503{
504 struct uprobe_trace_entry_head *field;
505 struct trace_seq *s = &iter->seq;
506 struct trace_uprobe *tu;
507 u8 *data;
508 int i;
509
510 field = (struct uprobe_trace_entry_head *)iter->ent;
511 tu = container_of(event, struct trace_uprobe, call.event);
512
513 if (!trace_seq_printf(s, "%s: (", tu->call.name))
514 goto partial;
515
516 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
517 goto partial;
518
519 if (!trace_seq_puts(s, ")"))
520 goto partial;
521
522 data = (u8 *)&field[1];
523 for (i = 0; i < tu->nr_args; i++) {
524 if (!tu->args[i].type->print(s, tu->args[i].name,
525 data + tu->args[i].offset, field))
526 goto partial;
527 }
528
529 if (trace_seq_puts(s, "\n"))
530 return TRACE_TYPE_HANDLED;
531
532partial:
533 return TRACE_TYPE_PARTIAL_LINE;
534}
535
536static int probe_event_enable(struct trace_uprobe *tu, int flag)
537{
538 struct uprobe_trace_consumer *utc;
539 int ret = 0;
540
541 if (!tu->inode || tu->consumer)
542 return -EINTR;
543
544 utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL);
545 if (!utc)
546 return -EINTR;
547
548 utc->cons.handler = uprobe_dispatcher;
549 utc->cons.filter = NULL;
550 ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
551 if (ret) {
552 kfree(utc);
553 return ret;
554 }
555
556 tu->flags |= flag;
557 utc->tu = tu;
558 tu->consumer = utc;
559
560 return 0;
561}
562
563static void probe_event_disable(struct trace_uprobe *tu, int flag)
564{
565 if (!tu->inode || !tu->consumer)
566 return;
567
568 uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons);
569 tu->flags &= ~flag;
570 kfree(tu->consumer);
571 tu->consumer = NULL;
572}
573
574static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
575{
576 int ret, i;
577 struct uprobe_trace_entry_head field;
578 struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data;
579
580 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
581 /* Set argument names as fields */
582 for (i = 0; i < tu->nr_args; i++) {
583 ret = trace_define_field(event_call, tu->args[i].type->fmttype,
584 tu->args[i].name,
585 sizeof(field) + tu->args[i].offset,
586 tu->args[i].type->size,
587 tu->args[i].type->is_signed,
588 FILTER_OTHER);
589
590 if (ret)
591 return ret;
592 }
593 return 0;
594}
595
596#define LEN_OR_ZERO (len ? len - pos : 0)
597static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
598{
599 const char *fmt, *arg;
600 int i;
601 int pos = 0;
602
603 fmt = "(%lx)";
604 arg = "REC->" FIELD_STRING_IP;
605
606 /* When len=0, we just calculate the needed length */
607
608 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
609
610 for (i = 0; i < tu->nr_args; i++) {
611 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
612 tu->args[i].name, tu->args[i].type->fmt);
613 }
614
615 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
616
617 for (i = 0; i < tu->nr_args; i++) {
618 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
619 tu->args[i].name);
620 }
621
622 return pos; /* return the length of print_fmt */
623}
624#undef LEN_OR_ZERO
625
626static int set_print_fmt(struct trace_uprobe *tu)
627{
628 char *print_fmt;
629 int len;
630
631 /* First: called with 0 length to calculate the needed length */
632 len = __set_print_fmt(tu, NULL, 0);
633 print_fmt = kmalloc(len + 1, GFP_KERNEL);
634 if (!print_fmt)
635 return -ENOMEM;
636
637 /* Second: actually write the @print_fmt */
638 __set_print_fmt(tu, print_fmt, len + 1);
639 tu->call.print_fmt = print_fmt;
640
641 return 0;
642}
643
644#ifdef CONFIG_PERF_EVENTS
645/* uprobe profile handler */
646static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
647{
648 struct ftrace_event_call *call = &tu->call;
649 struct uprobe_trace_entry_head *entry;
650 struct hlist_head *head;
651 u8 *data;
652 int size, __size, i;
653 int rctx;
654
655 __size = sizeof(*entry) + tu->size;
656 size = ALIGN(__size + sizeof(u32), sizeof(u64));
657 size -= sizeof(u32);
658 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
659 return;
660
661 preempt_disable();
662
663 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
664 if (!entry)
665 goto out;
666
667 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
668 data = (u8 *)&entry[1];
669 for (i = 0; i < tu->nr_args; i++)
670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
671
672 head = this_cpu_ptr(call->perf_events);
673 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
674
675 out:
676 preempt_enable();
677}
678#endif /* CONFIG_PERF_EVENTS */
679
680static
681int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
682{
683 struct trace_uprobe *tu = (struct trace_uprobe *)event->data;
684
685 switch (type) {
686 case TRACE_REG_REGISTER:
687 return probe_event_enable(tu, TP_FLAG_TRACE);
688
689 case TRACE_REG_UNREGISTER:
690 probe_event_disable(tu, TP_FLAG_TRACE);
691 return 0;
692
693#ifdef CONFIG_PERF_EVENTS
694 case TRACE_REG_PERF_REGISTER:
695 return probe_event_enable(tu, TP_FLAG_PROFILE);
696
697 case TRACE_REG_PERF_UNREGISTER:
698 probe_event_disable(tu, TP_FLAG_PROFILE);
699 return 0;
700#endif
701 default:
702 return 0;
703 }
704 return 0;
705}
706
707static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
708{
709 struct uprobe_trace_consumer *utc;
710 struct trace_uprobe *tu;
711
712 utc = container_of(con, struct uprobe_trace_consumer, cons);
713 tu = utc->tu;
714 if (!tu || tu->consumer != utc)
715 return 0;
716
717 if (tu->flags & TP_FLAG_TRACE)
718 uprobe_trace_func(tu, regs);
719
720#ifdef CONFIG_PERF_EVENTS
721 if (tu->flags & TP_FLAG_PROFILE)
722 uprobe_perf_func(tu, regs);
723#endif
724 return 0;
725}
726
727static struct trace_event_functions uprobe_funcs = {
728 .trace = print_uprobe_event
729};
730
731static int register_uprobe_event(struct trace_uprobe *tu)
732{
733 struct ftrace_event_call *call = &tu->call;
734 int ret;
735
736 /* Initialize ftrace_event_call */
737 INIT_LIST_HEAD(&call->class->fields);
738 call->event.funcs = &uprobe_funcs;
739 call->class->define_fields = uprobe_event_define_fields;
740
741 if (set_print_fmt(tu) < 0)
742 return -ENOMEM;
743
744 ret = register_ftrace_event(&call->event);
745 if (!ret) {
746 kfree(call->print_fmt);
747 return -ENODEV;
748 }
749 call->flags = 0;
750 call->class->reg = trace_uprobe_register;
751 call->data = tu;
752 ret = trace_add_event_call(call);
753
754 if (ret) {
755 pr_info("Failed to register uprobe event: %s\n", call->name);
756 kfree(call->print_fmt);
757 unregister_ftrace_event(&call->event);
758 }
759
760 return ret;
761}
762
763static void unregister_uprobe_event(struct trace_uprobe *tu)
764{
765 /* tu->event is unregistered in trace_remove_event_call() */
766 trace_remove_event_call(&tu->call);
767 kfree(tu->call.print_fmt);
768 tu->call.print_fmt = NULL;
769}
770
771/* Make a trace interface for controling probe points */
772static __init int init_uprobe_trace(void)
773{
774 struct dentry *d_tracer;
775
776 d_tracer = tracing_init_dentry();
777 if (!d_tracer)
778 return 0;
779
780 trace_create_file("uprobe_events", 0644, d_tracer,
781 NULL, &uprobe_events_ops);
782 /* Profile interface */
783 trace_create_file("uprobe_profile", 0444, d_tracer,
784 NULL, &uprobe_profile_ops);
785 return 0;
786}
787
788fs_initcall(init_uprobe_trace);