aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/extable.c4
-rw-r--r--kernel/irq/handle.c6
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/sched_clock.c53
-rw-r--r--kernel/softirq.c13
-rw-r--r--kernel/trace/Kconfig81
-rw-r--r--kernel/trace/Makefile8
-rw-r--r--kernel/trace/blktrace.c1538
-rw-r--r--kernel/trace/events.c17
-rw-r--r--kernel/trace/ftrace.c1016
-rw-r--r--kernel/trace/kmemtrace.c339
-rw-r--r--kernel/trace/ring_buffer.c173
-rw-r--r--kernel/trace/trace.c2181
-rw-r--r--kernel/trace/trace.h175
-rw-r--r--kernel/trace/trace_boot.c36
-rw-r--r--kernel/trace/trace_branch.c279
-rw-r--r--kernel/trace/trace_clock.c101
-rw-r--r--kernel/trace/trace_events.c731
-rw-r--r--kernel/trace/trace_events_stage_1.h36
-rw-r--r--kernel/trace/trace_events_stage_2.h130
-rw-r--r--kernel/trace/trace_events_stage_3.h235
-rw-r--r--kernel/trace/trace_functions.c369
-rw-r--r--kernel/trace/trace_functions_graph.c474
-rw-r--r--kernel/trace/trace_hw_branches.c185
-rw-r--r--kernel/trace/trace_irqsoff.c46
-rw-r--r--kernel/trace/trace_mmiotrace.c41
-rw-r--r--kernel/trace/trace_nop.c5
-rw-r--r--kernel/trace/trace_output.c919
-rw-r--r--kernel/trace/trace_output.h62
-rw-r--r--kernel/trace/trace_power.c188
-rw-r--r--kernel/trace/trace_sched_switch.c15
-rw-r--r--kernel/trace/trace_sched_wakeup.c88
-rw-r--r--kernel/trace/trace_selftest.c85
-rw-r--r--kernel/trace/trace_stat.c319
-rw-r--r--kernel/trace/trace_stat.h31
-rw-r--r--kernel/trace/trace_sysprof.c21
-rw-r--r--kernel/trace/trace_workqueue.c281
-rw-r--r--kernel/workqueue.c16
40 files changed, 8240 insertions, 2071 deletions
diff --git a/kernel/extable.c b/kernel/extable.c
index e136ed8d82ba..0df6253730be 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -41,7 +41,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
41 return e; 41 return e;
42} 42}
43 43
44__notrace_funcgraph int core_kernel_text(unsigned long addr) 44int core_kernel_text(unsigned long addr)
45{ 45{
46 if (addr >= (unsigned long)_stext && 46 if (addr >= (unsigned long)_stext &&
47 addr <= (unsigned long)_etext) 47 addr <= (unsigned long)_etext)
@@ -54,7 +54,7 @@ __notrace_funcgraph int core_kernel_text(unsigned long addr)
54 return 0; 54 return 0;
55} 55}
56 56
57__notrace_funcgraph int __kernel_text_address(unsigned long addr) 57int __kernel_text_address(unsigned long addr)
58{ 58{
59 if (core_kernel_text(addr)) 59 if (core_kernel_text(addr))
60 return 1; 60 return 1;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3aba8d12f328..4709a7c870d7 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -17,6 +17,7 @@
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18#include <linux/rculist.h> 18#include <linux/rculist.h>
19#include <linux/hash.h> 19#include <linux/hash.h>
20#include <trace/irq.h>
20 21
21#include "internals.h" 22#include "internals.h"
22 23
@@ -316,6 +317,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
316 return IRQ_NONE; 317 return IRQ_NONE;
317} 318}
318 319
320DEFINE_TRACE(irq_handler_entry);
321DEFINE_TRACE(irq_handler_exit);
322
319/** 323/**
320 * handle_IRQ_event - irq action chain handler 324 * handle_IRQ_event - irq action chain handler
321 * @irq: the interrupt number 325 * @irq: the interrupt number
@@ -332,7 +336,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
332 local_irq_enable_in_hardirq(); 336 local_irq_enable_in_hardirq();
333 337
334 do { 338 do {
339 trace_irq_handler_entry(irq, action);
335 ret = action->handler(irq, action->dev_id); 340 ret = action->handler(irq, action->dev_id);
341 trace_irq_handler_exit(irq, action, ret);
336 if (ret == IRQ_HANDLED) 342 if (ret == IRQ_HANDLED)
337 status |= action->flags; 343 status |= action->flags;
338 retval |= ret; 344 retval |= ret;
diff --git a/kernel/module.c b/kernel/module.c
index ba22484a987e..22d7379709da 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2735,7 +2735,7 @@ int is_module_address(unsigned long addr)
2735 2735
2736 2736
2737/* Is this a valid kernel address? */ 2737/* Is this a valid kernel address? */
2738__notrace_funcgraph struct module *__module_text_address(unsigned long addr) 2738struct module *__module_text_address(unsigned long addr)
2739{ 2739{
2740 struct module *mod; 2740 struct module *mod;
2741 2741
diff --git a/kernel/relay.c b/kernel/relay.c
index 9d79b7854fa6..edc0ba6d8160 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -677,9 +677,7 @@ int relay_late_setup_files(struct rchan *chan,
677 */ 677 */
678 for_each_online_cpu(i) { 678 for_each_online_cpu(i) {
679 if (unlikely(!chan->buf[i])) { 679 if (unlikely(!chan->buf[i])) {
680 printk(KERN_ERR "relay_late_setup_files: CPU %u " 680 WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
681 "has no buffer, it must have!\n", i);
682 BUG();
683 err = -EINVAL; 681 err = -EINVAL;
684 break; 682 break;
685 } 683 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 8e2558c2ba67..328f9c7448a5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4404,10 +4404,7 @@ void scheduler_tick(void)
4404#endif 4404#endif
4405} 4405}
4406 4406
4407#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 4407unsigned long get_parent_ip(unsigned long addr)
4408 defined(CONFIG_PREEMPT_TRACER))
4409
4410static inline unsigned long get_parent_ip(unsigned long addr)
4411{ 4408{
4412 if (in_lock_functions(addr)) { 4409 if (in_lock_functions(addr)) {
4413 addr = CALLER_ADDR2; 4410 addr = CALLER_ADDR2;
@@ -4417,6 +4414,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
4417 return addr; 4414 return addr;
4418} 4415}
4419 4416
4417#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4418 defined(CONFIG_PREEMPT_TRACER))
4419
4420void __kprobes add_preempt_count(int val) 4420void __kprobes add_preempt_count(int val)
4421{ 4421{
4422#ifdef CONFIG_DEBUG_PREEMPT 4422#ifdef CONFIG_DEBUG_PREEMPT
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..7ec82c1c61c5 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -24,11 +24,12 @@
24 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat 24 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
25 * consistent between cpus (never more than 2 jiffies difference). 25 * consistent between cpus (never more than 2 jiffies difference).
26 */ 26 */
27#include <linux/sched.h>
28#include <linux/percpu.h>
29#include <linux/spinlock.h> 27#include <linux/spinlock.h>
30#include <linux/ktime.h> 28#include <linux/hardirq.h>
31#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/percpu.h>
31#include <linux/ktime.h>
32#include <linux/sched.h>
32 33
33/* 34/*
34 * Scheduler clock - returns current time in nanosec units. 35 * Scheduler clock - returns current time in nanosec units.
@@ -43,6 +44,10 @@ unsigned long long __attribute__((weak)) sched_clock(void)
43static __read_mostly int sched_clock_running; 44static __read_mostly int sched_clock_running;
44 45
45#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 46#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
47__read_mostly int sched_clock_stable;
48#else
49static const int sched_clock_stable = 1;
50#endif
46 51
47struct sched_clock_data { 52struct sched_clock_data {
48 /* 53 /*
@@ -87,7 +92,7 @@ void sched_clock_init(void)
87} 92}
88 93
89/* 94/*
90 * min,max except they take wrapping into account 95 * min, max except they take wrapping into account
91 */ 96 */
92 97
93static inline u64 wrap_min(u64 x, u64 y) 98static inline u64 wrap_min(u64 x, u64 y)
@@ -116,10 +121,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
116 if (unlikely(delta < 0)) 121 if (unlikely(delta < 0))
117 delta = 0; 122 delta = 0;
118 123
124 if (unlikely(!sched_clock_running))
125 return 0ull;
126
119 /* 127 /*
120 * scd->clock = clamp(scd->tick_gtod + delta, 128 * scd->clock = clamp(scd->tick_gtod + delta,
121 * max(scd->tick_gtod, scd->clock), 129 * max(scd->tick_gtod, scd->clock),
122 * scd->tick_gtod + TICK_NSEC); 130 * scd->tick_gtod + TICK_NSEC);
123 */ 131 */
124 132
125 clock = scd->tick_gtod + delta; 133 clock = scd->tick_gtod + delta;
@@ -148,8 +156,20 @@ static void lock_double_clock(struct sched_clock_data *data1,
148 156
149u64 sched_clock_cpu(int cpu) 157u64 sched_clock_cpu(int cpu)
150{ 158{
151 struct sched_clock_data *scd = cpu_sdc(cpu);
152 u64 now, clock, this_clock, remote_clock; 159 u64 now, clock, this_clock, remote_clock;
160 struct sched_clock_data *scd;
161
162 if (sched_clock_stable)
163 return sched_clock();
164
165 scd = cpu_sdc(cpu);
166
167 /*
168 * Normally this is not called in NMI context - but if it is,
169 * trying to do any locking here is totally lethal.
170 */
171 if (unlikely(in_nmi()))
172 return scd->clock;
153 173
154 if (unlikely(!sched_clock_running)) 174 if (unlikely(!sched_clock_running))
155 return 0ull; 175 return 0ull;
@@ -193,6 +213,8 @@ u64 sched_clock_cpu(int cpu)
193 return clock; 213 return clock;
194} 214}
195 215
216#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
217
196void sched_clock_tick(void) 218void sched_clock_tick(void)
197{ 219{
198 struct sched_clock_data *scd = this_scd(); 220 struct sched_clock_data *scd = this_scd();
@@ -235,22 +257,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
235} 257}
236EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 258EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
237 259
238#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 260#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
239
240void sched_clock_init(void)
241{
242 sched_clock_running = 1;
243}
244
245u64 sched_clock_cpu(int cpu)
246{
247 if (unlikely(!sched_clock_running))
248 return 0;
249
250 return sched_clock();
251}
252
253#endif
254 261
255unsigned long long cpu_clock(int cpu) 262unsigned long long cpu_clock(int cpu)
256{ 263{
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de9cd8d..6edfc2c11d99 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -21,6 +21,7 @@
21#include <linux/freezer.h> 21#include <linux/freezer.h>
22#include <linux/kthread.h> 22#include <linux/kthread.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/ftrace.h>
24#include <linux/smp.h> 25#include <linux/smp.h>
25#include <linux/tick.h> 26#include <linux/tick.h>
26 27
@@ -79,13 +80,23 @@ static void __local_bh_disable(unsigned long ip)
79 WARN_ON_ONCE(in_irq()); 80 WARN_ON_ONCE(in_irq());
80 81
81 raw_local_irq_save(flags); 82 raw_local_irq_save(flags);
82 add_preempt_count(SOFTIRQ_OFFSET); 83 /*
84 * The preempt tracer hooks into add_preempt_count and will break
85 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
86 * is set and before current->softirq_enabled is cleared.
87 * We must manually increment preempt_count here and manually
88 * call the trace_preempt_off later.
89 */
90 preempt_count() += SOFTIRQ_OFFSET;
83 /* 91 /*
84 * Were softirqs turned off above: 92 * Were softirqs turned off above:
85 */ 93 */
86 if (softirq_count() == SOFTIRQ_OFFSET) 94 if (softirq_count() == SOFTIRQ_OFFSET)
87 trace_softirqs_off(ip); 95 trace_softirqs_off(ip);
88 raw_local_irq_restore(flags); 96 raw_local_irq_restore(flags);
97
98 if (preempt_count() == SOFTIRQ_OFFSET)
99 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
89} 100}
90#else /* !CONFIG_TRACE_IRQFLAGS */ 101#else /* !CONFIG_TRACE_IRQFLAGS */
91static inline void __local_bh_disable(unsigned long ip) 102static inline void __local_bh_disable(unsigned long ip)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 34e707e5ab87..999c6a2485df 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,9 @@ config USER_STACKTRACE_SUPPORT
9config NOP_TRACER 9config NOP_TRACER
10 bool 10 bool
11 11
12config HAVE_FTRACE_NMI_ENTER
13 bool
14
12config HAVE_FUNCTION_TRACER 15config HAVE_FUNCTION_TRACER
13 bool 16 bool
14 17
@@ -37,6 +40,11 @@ config TRACER_MAX_TRACE
37config RING_BUFFER 40config RING_BUFFER
38 bool 41 bool
39 42
43config FTRACE_NMI_ENTER
44 bool
45 depends on HAVE_FTRACE_NMI_ENTER
46 default y
47
40config TRACING 48config TRACING
41 bool 49 bool
42 select DEBUG_FS 50 select DEBUG_FS
@@ -127,6 +135,7 @@ config SYSPROF_TRACER
127 bool "Sysprof Tracer" 135 bool "Sysprof Tracer"
128 depends on X86 136 depends on X86
129 select TRACING 137 select TRACING
138 select CONTEXT_SWITCH_TRACER
130 help 139 help
131 This tracer provides the trace needed by the 'Sysprof' userspace 140 This tracer provides the trace needed by the 'Sysprof' userspace
132 tool. 141 tool.
@@ -150,6 +159,15 @@ config CONTEXT_SWITCH_TRACER
150 This tracer gets called from the context switch and records 159 This tracer gets called from the context switch and records
151 all switching of tasks. 160 all switching of tasks.
152 161
162config EVENT_TRACER
163 bool "Trace various events in the kernel"
164 depends on DEBUG_KERNEL
165 select TRACING
166 help
167 This tracer hooks to various trace points in the kernel
168 allowing the user to pick and choose which trace point they
169 want to trace.
170
153config BOOT_TRACER 171config BOOT_TRACER
154 bool "Trace boot initcalls" 172 bool "Trace boot initcalls"
155 depends on DEBUG_KERNEL 173 depends on DEBUG_KERNEL
@@ -165,9 +183,8 @@ config BOOT_TRACER
165 representation of the delays during initcalls - but the raw 183 representation of the delays during initcalls - but the raw
166 /debug/tracing/trace text output is readable too. 184 /debug/tracing/trace text output is readable too.
167 185
168 ( Note that tracing self tests can't be enabled if this tracer is 186 You must pass in ftrace=initcall to the kernel command line
169 selected, because the self-tests are an initcall as well and that 187 to enable this on bootup.
170 would invalidate the boot trace. )
171 188
172config TRACE_BRANCH_PROFILING 189config TRACE_BRANCH_PROFILING
173 bool "Trace likely/unlikely profiler" 190 bool "Trace likely/unlikely profiler"
@@ -266,6 +283,62 @@ config HW_BRANCH_TRACER
266 This tracer records all branches on the system in a circular 283 This tracer records all branches on the system in a circular
267 buffer giving access to the last N branches for each cpu. 284 buffer giving access to the last N branches for each cpu.
268 285
286config KMEMTRACE
287 bool "Trace SLAB allocations"
288 select TRACING
289 help
290 kmemtrace provides tracing for slab allocator functions, such as
291 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
292 data is then fed to the userspace application in order to analyse
293 allocation hotspots, internal fragmentation and so on, making it
294 possible to see how well an allocator performs, as well as debug
295 and profile kernel code.
296
297 This requires an userspace application to use. See
298 Documentation/vm/kmemtrace.txt for more information.
299
300 Saying Y will make the kernel somewhat larger and slower. However,
301 if you disable kmemtrace at run-time or boot-time, the performance
302 impact is minimal (depending on the arch the kernel is built for).
303
304 If unsure, say N.
305
306config WORKQUEUE_TRACER
307 bool "Trace workqueues"
308 select TRACING
309 help
310 The workqueue tracer provides some statistical informations
311 about each cpu workqueue thread such as the number of the
312 works inserted and executed since their creation. It can help
313 to evaluate the amount of work each of them have to perform.
314 For example it can help a developer to decide whether he should
315 choose a per cpu workqueue instead of a singlethreaded one.
316
317config BLK_DEV_IO_TRACE
318 bool "Support for tracing block io actions"
319 depends on SYSFS
320 depends on BLOCK
321 select RELAY
322 select DEBUG_FS
323 select TRACEPOINTS
324 select TRACING
325 select STACKTRACE
326 help
327 Say Y here if you want to be able to trace the block layer actions
328 on a given queue. Tracing allows you to see any traffic happening
329 on a block device queue. For more information (and the userspace
330 support tools needed), fetch the blktrace tools from:
331
332 git://git.kernel.dk/blktrace.git
333
334 Tracing also is possible using the ftrace interface, e.g.:
335
336 echo 1 > /sys/block/sda/sda1/trace/enable
337 echo blk > /sys/kernel/debug/tracing/current_tracer
338 cat /sys/kernel/debug/tracing/trace_pipe
339
340 If unsure, say N.
341
269config DYNAMIC_FTRACE 342config DYNAMIC_FTRACE
270 bool "enable/disable ftrace tracepoints dynamically" 343 bool "enable/disable ftrace tracepoints dynamically"
271 depends on FUNCTION_TRACER 344 depends on FUNCTION_TRACER
@@ -296,7 +369,7 @@ config FTRACE_SELFTEST
296 369
297config FTRACE_STARTUP_TEST 370config FTRACE_STARTUP_TEST
298 bool "Perform a startup test on ftrace" 371 bool "Perform a startup test on ftrace"
299 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER 372 depends on TRACING && DEBUG_KERNEL
300 select FTRACE_SELFTEST 373 select FTRACE_SELFTEST
301 help 374 help
302 This option performs a series of startup tests on ftrace. On bootup 375 This option performs a series of startup tests on ftrace. On bootup
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a93653f..c931fe0560cb 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,9 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
20 20
21obj-$(CONFIG_TRACING) += trace.o 21obj-$(CONFIG_TRACING) += trace.o
22obj-$(CONFIG_TRACING) += trace_clock.o
23obj-$(CONFIG_TRACING) += trace_output.o
24obj-$(CONFIG_TRACING) += trace_stat.o
22obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 25obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
23obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 26obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
24obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o 27obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
@@ -33,5 +36,10 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 36obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o 37obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o 38obj-$(CONFIG_POWER_TRACER) += trace_power.o
39obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
40obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
41obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
42obj-$(CONFIG_EVENT_TRACER) += trace_events.o
43obj-$(CONFIG_EVENT_TRACER) += events.o
36 44
37libftrace-y := ftrace.o 45libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
new file mode 100644
index 000000000000..e82cb9e930cc
--- /dev/null
+++ b/kernel/trace/blktrace.c
@@ -0,0 +1,1538 @@
1/*
2 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 *
17 */
18#include <linux/kernel.h>
19#include <linux/blkdev.h>
20#include <linux/blktrace_api.h>
21#include <linux/percpu.h>
22#include <linux/init.h>
23#include <linux/mutex.h>
24#include <linux/debugfs.h>
25#include <linux/time.h>
26#include <trace/block.h>
27#include <linux/uaccess.h>
28#include "trace_output.h"
29
30static unsigned int blktrace_seq __read_mostly = 1;
31
32static struct trace_array *blk_tr;
33static int __read_mostly blk_tracer_enabled;
34
35/* Select an alternative, minimalistic output than the original one */
36#define TRACE_BLK_OPT_CLASSIC 0x1
37
38static struct tracer_opt blk_tracer_opts[] = {
39 /* Default disable the minimalistic output */
40 { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
41 { }
42};
43
44static struct tracer_flags blk_tracer_flags = {
45 .val = 0,
46 .opts = blk_tracer_opts,
47};
48
49/* Global reference count of probes */
50static DEFINE_MUTEX(blk_probe_mutex);
51static atomic_t blk_probes_ref = ATOMIC_INIT(0);
52
53static int blk_register_tracepoints(void);
54static void blk_unregister_tracepoints(void);
55
56/*
57 * Send out a notify message.
58 */
59static void trace_note(struct blk_trace *bt, pid_t pid, int action,
60 const void *data, size_t len)
61{
62 struct blk_io_trace *t;
63
64 if (!bt->rchan)
65 return;
66
67 t = relay_reserve(bt->rchan, sizeof(*t) + len);
68 if (t) {
69 const int cpu = smp_processor_id();
70
71 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
72 t->time = ktime_to_ns(ktime_get());
73 t->device = bt->dev;
74 t->action = action;
75 t->pid = pid;
76 t->cpu = cpu;
77 t->pdu_len = len;
78 memcpy((void *) t + sizeof(*t), data, len);
79 }
80}
81
82/*
83 * Send out a notify for this process, if we haven't done so since a trace
84 * started
85 */
86static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
87{
88 tsk->btrace_seq = blktrace_seq;
89 trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
90}
91
92static void trace_note_time(struct blk_trace *bt)
93{
94 struct timespec now;
95 unsigned long flags;
96 u32 words[2];
97
98 getnstimeofday(&now);
99 words[0] = now.tv_sec;
100 words[1] = now.tv_nsec;
101
102 local_irq_save(flags);
103 trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
104 local_irq_restore(flags);
105}
106
107void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
108{
109 int n;
110 va_list args;
111 unsigned long flags;
112 char *buf;
113
114 if (blk_tr) {
115 va_start(args, fmt);
116 ftrace_vprintk(fmt, args);
117 va_end(args);
118 return;
119 }
120
121 if (!bt->msg_data)
122 return;
123
124 local_irq_save(flags);
125 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
126 va_start(args, fmt);
127 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
128 va_end(args);
129
130 trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
131 local_irq_restore(flags);
132}
133EXPORT_SYMBOL_GPL(__trace_note_message);
134
135static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
136 pid_t pid)
137{
138 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
139 return 1;
140 if (sector < bt->start_lba || sector > bt->end_lba)
141 return 1;
142 if (bt->pid && pid != bt->pid)
143 return 1;
144
145 return 0;
146}
147
148/*
149 * Data direction bit lookup
150 */
151static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ),
152 BLK_TC_ACT(BLK_TC_WRITE) };
153
154/* The ilog2() calls fall out because they're constant */
155#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
156 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
157
158/*
159 * The worker for the various blk_add_trace*() types. Fills out a
160 * blk_io_trace structure and places it in a per-cpu subbuffer.
161 */
162static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
163 int rw, u32 what, int error, int pdu_len, void *pdu_data)
164{
165 struct task_struct *tsk = current;
166 struct ring_buffer_event *event = NULL;
167 struct blk_io_trace *t;
168 unsigned long flags = 0;
169 unsigned long *sequence;
170 pid_t pid;
171 int cpu, pc = 0;
172
173 if (unlikely(bt->trace_state != Blktrace_running ||
174 !blk_tracer_enabled))
175 return;
176
177 what |= ddir_act[rw & WRITE];
178 what |= MASK_TC_BIT(rw, BARRIER);
179 what |= MASK_TC_BIT(rw, SYNCIO);
180 what |= MASK_TC_BIT(rw, AHEAD);
181 what |= MASK_TC_BIT(rw, META);
182 what |= MASK_TC_BIT(rw, DISCARD);
183
184 pid = tsk->pid;
185 if (unlikely(act_log_check(bt, what, sector, pid)))
186 return;
187 cpu = raw_smp_processor_id();
188
189 if (blk_tr) {
190 tracing_record_cmdline(current);
191
192 pc = preempt_count();
193 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
194 sizeof(*t) + pdu_len,
195 0, pc);
196 if (!event)
197 return;
198 t = ring_buffer_event_data(event);
199 goto record_it;
200 }
201
202 /*
203 * A word about the locking here - we disable interrupts to reserve
204 * some space in the relay per-cpu buffer, to prevent an irq
205 * from coming in and stepping on our toes.
206 */
207 local_irq_save(flags);
208
209 if (unlikely(tsk->btrace_seq != blktrace_seq))
210 trace_note_tsk(bt, tsk);
211
212 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
213 if (t) {
214 sequence = per_cpu_ptr(bt->sequence, cpu);
215
216 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
217 t->sequence = ++(*sequence);
218 t->time = ktime_to_ns(ktime_get());
219record_it:
220 /*
221 * These two are not needed in ftrace as they are in the
222 * generic trace_entry, filled by tracing_generic_entry_update,
223 * but for the trace_event->bin() synthesizer benefit we do it
224 * here too.
225 */
226 t->cpu = cpu;
227 t->pid = pid;
228
229 t->sector = sector;
230 t->bytes = bytes;
231 t->action = what;
232 t->device = bt->dev;
233 t->error = error;
234 t->pdu_len = pdu_len;
235
236 if (pdu_len)
237 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
238
239 if (blk_tr) {
240 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
241 return;
242 }
243 }
244
245 local_irq_restore(flags);
246}
247
248static struct dentry *blk_tree_root;
249static DEFINE_MUTEX(blk_tree_mutex);
250
251static void blk_trace_cleanup(struct blk_trace *bt)
252{
253 debugfs_remove(bt->msg_file);
254 debugfs_remove(bt->dropped_file);
255 relay_close(bt->rchan);
256 free_percpu(bt->sequence);
257 free_percpu(bt->msg_data);
258 kfree(bt);
259 mutex_lock(&blk_probe_mutex);
260 if (atomic_dec_and_test(&blk_probes_ref))
261 blk_unregister_tracepoints();
262 mutex_unlock(&blk_probe_mutex);
263}
264
265int blk_trace_remove(struct request_queue *q)
266{
267 struct blk_trace *bt;
268
269 bt = xchg(&q->blk_trace, NULL);
270 if (!bt)
271 return -EINVAL;
272
273 if (bt->trace_state == Blktrace_setup ||
274 bt->trace_state == Blktrace_stopped)
275 blk_trace_cleanup(bt);
276
277 return 0;
278}
279EXPORT_SYMBOL_GPL(blk_trace_remove);
280
281static int blk_dropped_open(struct inode *inode, struct file *filp)
282{
283 filp->private_data = inode->i_private;
284
285 return 0;
286}
287
288static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
289 size_t count, loff_t *ppos)
290{
291 struct blk_trace *bt = filp->private_data;
292 char buf[16];
293
294 snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
295
296 return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
297}
298
299static const struct file_operations blk_dropped_fops = {
300 .owner = THIS_MODULE,
301 .open = blk_dropped_open,
302 .read = blk_dropped_read,
303};
304
305static int blk_msg_open(struct inode *inode, struct file *filp)
306{
307 filp->private_data = inode->i_private;
308
309 return 0;
310}
311
312static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
313 size_t count, loff_t *ppos)
314{
315 char *msg;
316 struct blk_trace *bt;
317
318 if (count > BLK_TN_MAX_MSG)
319 return -EINVAL;
320
321 msg = kmalloc(count, GFP_KERNEL);
322 if (msg == NULL)
323 return -ENOMEM;
324
325 if (copy_from_user(msg, buffer, count)) {
326 kfree(msg);
327 return -EFAULT;
328 }
329
330 bt = filp->private_data;
331 __trace_note_message(bt, "%s", msg);
332 kfree(msg);
333
334 return count;
335}
336
337static const struct file_operations blk_msg_fops = {
338 .owner = THIS_MODULE,
339 .open = blk_msg_open,
340 .write = blk_msg_write,
341};
342
343/*
344 * Keep track of how many times we encountered a full subbuffer, to aid
345 * the user space app in telling how many lost events there were.
346 */
347static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
348 void *prev_subbuf, size_t prev_padding)
349{
350 struct blk_trace *bt;
351
352 if (!relay_buf_full(buf))
353 return 1;
354
355 bt = buf->chan->private_data;
356 atomic_inc(&bt->dropped);
357 return 0;
358}
359
360static int blk_remove_buf_file_callback(struct dentry *dentry)
361{
362 struct dentry *parent = dentry->d_parent;
363 debugfs_remove(dentry);
364
365 /*
366 * this will fail for all but the last file, but that is ok. what we
367 * care about is the top level buts->name directory going away, when
368 * the last trace file is gone. Then we don't have to rmdir() that
369 * manually on trace stop, so it nicely solves the issue with
370 * force killing of running traces.
371 */
372
373 debugfs_remove(parent);
374 return 0;
375}
376
377static struct dentry *blk_create_buf_file_callback(const char *filename,
378 struct dentry *parent,
379 int mode,
380 struct rchan_buf *buf,
381 int *is_global)
382{
383 return debugfs_create_file(filename, mode, parent, buf,
384 &relay_file_operations);
385}
386
387static struct rchan_callbacks blk_relay_callbacks = {
388 .subbuf_start = blk_subbuf_start_callback,
389 .create_buf_file = blk_create_buf_file_callback,
390 .remove_buf_file = blk_remove_buf_file_callback,
391};
392
393/*
394 * Setup everything required to start tracing
395 */
396int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
397 struct blk_user_trace_setup *buts)
398{
399 struct blk_trace *old_bt, *bt = NULL;
400 struct dentry *dir = NULL;
401 int ret, i;
402
403 if (!buts->buf_size || !buts->buf_nr)
404 return -EINVAL;
405
406 strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
407 buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
408
409 /*
410 * some device names have larger paths - convert the slashes
411 * to underscores for this to work as expected
412 */
413 for (i = 0; i < strlen(buts->name); i++)
414 if (buts->name[i] == '/')
415 buts->name[i] = '_';
416
417 ret = -ENOMEM;
418 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
419 if (!bt)
420 goto err;
421
422 bt->sequence = alloc_percpu(unsigned long);
423 if (!bt->sequence)
424 goto err;
425
426 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
427 if (!bt->msg_data)
428 goto err;
429
430 ret = -ENOENT;
431
432 if (!blk_tree_root) {
433 blk_tree_root = debugfs_create_dir("block", NULL);
434 if (!blk_tree_root)
435 return -ENOMEM;
436 }
437
438 dir = debugfs_create_dir(buts->name, blk_tree_root);
439
440 if (!dir)
441 goto err;
442
443 bt->dir = dir;
444 bt->dev = dev;
445 atomic_set(&bt->dropped, 0);
446
447 ret = -EIO;
448 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
449 &blk_dropped_fops);
450 if (!bt->dropped_file)
451 goto err;
452
453 bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
454 if (!bt->msg_file)
455 goto err;
456
457 bt->rchan = relay_open("trace", dir, buts->buf_size,
458 buts->buf_nr, &blk_relay_callbacks, bt);
459 if (!bt->rchan)
460 goto err;
461
462 bt->act_mask = buts->act_mask;
463 if (!bt->act_mask)
464 bt->act_mask = (u16) -1;
465
466 bt->start_lba = buts->start_lba;
467 bt->end_lba = buts->end_lba;
468 if (!bt->end_lba)
469 bt->end_lba = -1ULL;
470
471 bt->pid = buts->pid;
472 bt->trace_state = Blktrace_setup;
473
474 mutex_lock(&blk_probe_mutex);
475 if (atomic_add_return(1, &blk_probes_ref) == 1) {
476 ret = blk_register_tracepoints();
477 if (ret)
478 goto probe_err;
479 }
480 mutex_unlock(&blk_probe_mutex);
481
482 ret = -EBUSY;
483 old_bt = xchg(&q->blk_trace, bt);
484 if (old_bt) {
485 (void) xchg(&q->blk_trace, old_bt);
486 goto err;
487 }
488
489 return 0;
490probe_err:
491 atomic_dec(&blk_probes_ref);
492 mutex_unlock(&blk_probe_mutex);
493err:
494 if (bt) {
495 if (bt->msg_file)
496 debugfs_remove(bt->msg_file);
497 if (bt->dropped_file)
498 debugfs_remove(bt->dropped_file);
499 free_percpu(bt->sequence);
500 free_percpu(bt->msg_data);
501 if (bt->rchan)
502 relay_close(bt->rchan);
503 kfree(bt);
504 }
505 return ret;
506}
507
508int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
509 char __user *arg)
510{
511 struct blk_user_trace_setup buts;
512 int ret;
513
514 ret = copy_from_user(&buts, arg, sizeof(buts));
515 if (ret)
516 return -EFAULT;
517
518 ret = do_blk_trace_setup(q, name, dev, &buts);
519 if (ret)
520 return ret;
521
522 if (copy_to_user(arg, &buts, sizeof(buts)))
523 return -EFAULT;
524
525 return 0;
526}
527EXPORT_SYMBOL_GPL(blk_trace_setup);
528
529int blk_trace_startstop(struct request_queue *q, int start)
530{
531 int ret;
532 struct blk_trace *bt = q->blk_trace;
533
534 if (bt == NULL)
535 return -EINVAL;
536
537 /*
538 * For starting a trace, we can transition from a setup or stopped
539 * trace. For stopping a trace, the state must be running
540 */
541 ret = -EINVAL;
542 if (start) {
543 if (bt->trace_state == Blktrace_setup ||
544 bt->trace_state == Blktrace_stopped) {
545 blktrace_seq++;
546 smp_mb();
547 bt->trace_state = Blktrace_running;
548
549 trace_note_time(bt);
550 ret = 0;
551 }
552 } else {
553 if (bt->trace_state == Blktrace_running) {
554 bt->trace_state = Blktrace_stopped;
555 relay_flush(bt->rchan);
556 ret = 0;
557 }
558 }
559
560 return ret;
561}
562EXPORT_SYMBOL_GPL(blk_trace_startstop);
563
564/**
565 * blk_trace_ioctl: - handle the ioctls associated with tracing
566 * @bdev: the block device
567 * @cmd: the ioctl cmd
568 * @arg: the argument data, if any
569 *
570 **/
571int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
572{
573 struct request_queue *q;
574 int ret, start = 0;
575 char b[BDEVNAME_SIZE];
576
577 q = bdev_get_queue(bdev);
578 if (!q)
579 return -ENXIO;
580
581 mutex_lock(&bdev->bd_mutex);
582
583 switch (cmd) {
584 case BLKTRACESETUP:
585 bdevname(bdev, b);
586 ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
587 break;
588 case BLKTRACESTART:
589 start = 1;
590 case BLKTRACESTOP:
591 ret = blk_trace_startstop(q, start);
592 break;
593 case BLKTRACETEARDOWN:
594 ret = blk_trace_remove(q);
595 break;
596 default:
597 ret = -ENOTTY;
598 break;
599 }
600
601 mutex_unlock(&bdev->bd_mutex);
602 return ret;
603}
604
605/**
606 * blk_trace_shutdown: - stop and cleanup trace structures
607 * @q: the request queue associated with the device
608 *
609 **/
610void blk_trace_shutdown(struct request_queue *q)
611{
612 if (q->blk_trace) {
613 blk_trace_startstop(q, 0);
614 blk_trace_remove(q);
615 }
616}
617
618/*
619 * blktrace probes
620 */
621
622/**
623 * blk_add_trace_rq - Add a trace for a request oriented action
624 * @q: queue the io is for
625 * @rq: the source request
626 * @what: the action
627 *
628 * Description:
629 * Records an action against a request. Will log the bio offset + size.
630 *
631 **/
632static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
633 u32 what)
634{
635 struct blk_trace *bt = q->blk_trace;
636 int rw = rq->cmd_flags & 0x03;
637
638 if (likely(!bt))
639 return;
640
641 if (blk_discard_rq(rq))
642 rw |= (1 << BIO_RW_DISCARD);
643
644 if (blk_pc_request(rq)) {
645 what |= BLK_TC_ACT(BLK_TC_PC);
646 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
647 sizeof(rq->cmd), rq->cmd);
648 } else {
649 what |= BLK_TC_ACT(BLK_TC_FS);
650 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
651 rw, what, rq->errors, 0, NULL);
652 }
653}
654
655static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
656{
657 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
658}
659
660static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
661{
662 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
663}
664
665static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
666{
667 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
668}
669
670static void blk_add_trace_rq_requeue(struct request_queue *q,
671 struct request *rq)
672{
673 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
674}
675
676static void blk_add_trace_rq_complete(struct request_queue *q,
677 struct request *rq)
678{
679 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
680}
681
682/**
683 * blk_add_trace_bio - Add a trace for a bio oriented action
684 * @q: queue the io is for
685 * @bio: the source bio
686 * @what: the action
687 *
688 * Description:
689 * Records an action against a bio. Will log the bio offset + size.
690 *
691 **/
692static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
693 u32 what)
694{
695 struct blk_trace *bt = q->blk_trace;
696
697 if (likely(!bt))
698 return;
699
700 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
701 !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
702}
703
704static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
705{
706 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
707}
708
709static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
710{
711 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
712}
713
714static void blk_add_trace_bio_backmerge(struct request_queue *q,
715 struct bio *bio)
716{
717 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
718}
719
720static void blk_add_trace_bio_frontmerge(struct request_queue *q,
721 struct bio *bio)
722{
723 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
724}
725
726static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
727{
728 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
729}
730
731static void blk_add_trace_getrq(struct request_queue *q,
732 struct bio *bio, int rw)
733{
734 if (bio)
735 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
736 else {
737 struct blk_trace *bt = q->blk_trace;
738
739 if (bt)
740 __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
741 }
742}
743
744
745static void blk_add_trace_sleeprq(struct request_queue *q,
746 struct bio *bio, int rw)
747{
748 if (bio)
749 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
750 else {
751 struct blk_trace *bt = q->blk_trace;
752
753 if (bt)
754 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
755 0, 0, NULL);
756 }
757}
758
759static void blk_add_trace_plug(struct request_queue *q)
760{
761 struct blk_trace *bt = q->blk_trace;
762
763 if (bt)
764 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
765}
766
767static void blk_add_trace_unplug_io(struct request_queue *q)
768{
769 struct blk_trace *bt = q->blk_trace;
770
771 if (bt) {
772 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
773 __be64 rpdu = cpu_to_be64(pdu);
774
775 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
776 sizeof(rpdu), &rpdu);
777 }
778}
779
780static void blk_add_trace_unplug_timer(struct request_queue *q)
781{
782 struct blk_trace *bt = q->blk_trace;
783
784 if (bt) {
785 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
786 __be64 rpdu = cpu_to_be64(pdu);
787
788 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
789 sizeof(rpdu), &rpdu);
790 }
791}
792
793static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
794 unsigned int pdu)
795{
796 struct blk_trace *bt = q->blk_trace;
797
798 if (bt) {
799 __be64 rpdu = cpu_to_be64(pdu);
800
801 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
802 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
803 sizeof(rpdu), &rpdu);
804 }
805}
806
807/**
808 * blk_add_trace_remap - Add a trace for a remap operation
809 * @q: queue the io is for
810 * @bio: the source bio
811 * @dev: target device
812 * @from: source sector
813 * @to: target sector
814 *
815 * Description:
816 * Device mapper or raid target sometimes need to split a bio because
817 * it spans a stripe (or similar). Add a trace for that action.
818 *
819 **/
820static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
821 dev_t dev, sector_t from, sector_t to)
822{
823 struct blk_trace *bt = q->blk_trace;
824 struct blk_io_trace_remap r;
825
826 if (likely(!bt))
827 return;
828
829 r.device = cpu_to_be32(dev);
830 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
831 r.sector = cpu_to_be64(to);
832
833 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
834 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
835}
836
837/**
838 * blk_add_driver_data - Add binary message with driver-specific data
839 * @q: queue the io is for
840 * @rq: io request
841 * @data: driver-specific data
842 * @len: length of driver-specific data
843 *
844 * Description:
845 * Some drivers might want to write driver-specific data per request.
846 *
847 **/
848void blk_add_driver_data(struct request_queue *q,
849 struct request *rq,
850 void *data, size_t len)
851{
852 struct blk_trace *bt = q->blk_trace;
853
854 if (likely(!bt))
855 return;
856
857 if (blk_pc_request(rq))
858 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
859 rq->errors, len, data);
860 else
861 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
862 0, BLK_TA_DRV_DATA, rq->errors, len, data);
863}
864EXPORT_SYMBOL_GPL(blk_add_driver_data);
865
866static int blk_register_tracepoints(void)
867{
868 int ret;
869
870 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
871 WARN_ON(ret);
872 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
873 WARN_ON(ret);
874 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
875 WARN_ON(ret);
876 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
877 WARN_ON(ret);
878 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
879 WARN_ON(ret);
880 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
881 WARN_ON(ret);
882 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
883 WARN_ON(ret);
884 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
885 WARN_ON(ret);
886 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
887 WARN_ON(ret);
888 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
889 WARN_ON(ret);
890 ret = register_trace_block_getrq(blk_add_trace_getrq);
891 WARN_ON(ret);
892 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
893 WARN_ON(ret);
894 ret = register_trace_block_plug(blk_add_trace_plug);
895 WARN_ON(ret);
896 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
897 WARN_ON(ret);
898 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
899 WARN_ON(ret);
900 ret = register_trace_block_split(blk_add_trace_split);
901 WARN_ON(ret);
902 ret = register_trace_block_remap(blk_add_trace_remap);
903 WARN_ON(ret);
904 return 0;
905}
906
907static void blk_unregister_tracepoints(void)
908{
909 unregister_trace_block_remap(blk_add_trace_remap);
910 unregister_trace_block_split(blk_add_trace_split);
911 unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
912 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
913 unregister_trace_block_plug(blk_add_trace_plug);
914 unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
915 unregister_trace_block_getrq(blk_add_trace_getrq);
916 unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
917 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
918 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
919 unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
920 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
921 unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
922 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
923 unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
924 unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
925 unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
926
927 tracepoint_synchronize_unregister();
928}
929
930/*
931 * struct blk_io_tracer formatting routines
932 */
933
934static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
935{
936 int i = 0;
937
938 if (t->action & BLK_TC_DISCARD)
939 rwbs[i++] = 'D';
940 else if (t->action & BLK_TC_WRITE)
941 rwbs[i++] = 'W';
942 else if (t->bytes)
943 rwbs[i++] = 'R';
944 else
945 rwbs[i++] = 'N';
946
947 if (t->action & BLK_TC_AHEAD)
948 rwbs[i++] = 'A';
949 if (t->action & BLK_TC_BARRIER)
950 rwbs[i++] = 'B';
951 if (t->action & BLK_TC_SYNC)
952 rwbs[i++] = 'S';
953 if (t->action & BLK_TC_META)
954 rwbs[i++] = 'M';
955
956 rwbs[i] = '\0';
957}
958
959static inline
960const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
961{
962 return (const struct blk_io_trace *)ent;
963}
964
965static inline const void *pdu_start(const struct trace_entry *ent)
966{
967 return te_blk_io_trace(ent) + 1;
968}
969
970static inline u32 t_sec(const struct trace_entry *ent)
971{
972 return te_blk_io_trace(ent)->bytes >> 9;
973}
974
975static inline unsigned long long t_sector(const struct trace_entry *ent)
976{
977 return te_blk_io_trace(ent)->sector;
978}
979
980static inline __u16 t_error(const struct trace_entry *ent)
981{
982 return te_blk_io_trace(ent)->sector;
983}
984
985static __u64 get_pdu_int(const struct trace_entry *ent)
986{
987 const __u64 *val = pdu_start(ent);
988 return be64_to_cpu(*val);
989}
990
991static void get_pdu_remap(const struct trace_entry *ent,
992 struct blk_io_trace_remap *r)
993{
994 const struct blk_io_trace_remap *__r = pdu_start(ent);
995 __u64 sector = __r->sector;
996
997 r->device = be32_to_cpu(__r->device);
998 r->device_from = be32_to_cpu(__r->device_from);
999 r->sector = be64_to_cpu(sector);
1000}
1001
1002static int blk_log_action_iter(struct trace_iterator *iter, const char *act)
1003{
1004 char rwbs[6];
1005 unsigned long long ts = ns2usecs(iter->ts);
1006 unsigned long usec_rem = do_div(ts, USEC_PER_SEC);
1007 unsigned secs = (unsigned long)ts;
1008 const struct trace_entry *ent = iter->ent;
1009 const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
1010
1011 fill_rwbs(rwbs, t);
1012
1013 return trace_seq_printf(&iter->seq,
1014 "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ",
1015 MAJOR(t->device), MINOR(t->device), iter->cpu,
1016 secs, usec_rem, ent->pid, act, rwbs);
1017}
1018
1019static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t,
1020 const char *act)
1021{
1022 char rwbs[6];
1023 fill_rwbs(rwbs, t);
1024 return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
1025 MAJOR(t->device), MINOR(t->device), act, rwbs);
1026}
1027
1028static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1029{
1030 const char *cmd = trace_find_cmdline(ent->pid);
1031
1032 if (t_sec(ent))
1033 return trace_seq_printf(s, "%llu + %u [%s]\n",
1034 t_sector(ent), t_sec(ent), cmd);
1035 return trace_seq_printf(s, "[%s]\n", cmd);
1036}
1037
1038static int blk_log_with_error(struct trace_seq *s,
1039 const struct trace_entry *ent)
1040{
1041 if (t_sec(ent))
1042 return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
1043 t_sec(ent), t_error(ent));
1044 return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
1045}
1046
1047static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1048{
1049 struct blk_io_trace_remap r = { .device = 0, };
1050
1051 get_pdu_remap(ent, &r);
1052 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
1053 t_sector(ent),
1054 t_sec(ent), MAJOR(r.device), MINOR(r.device),
1055 (unsigned long long)r.sector);
1056}
1057
1058static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
1059{
1060 return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid));
1061}
1062
1063static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
1064{
1065 return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid),
1066 get_pdu_int(ent));
1067}
1068
1069static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1070{
1071 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
1072 get_pdu_int(ent), trace_find_cmdline(ent->pid));
1073}
1074
1075/*
1076 * struct tracer operations
1077 */
1078
1079static void blk_tracer_print_header(struct seq_file *m)
1080{
1081 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1082 return;
1083 seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n"
1084 "# | | | | | |\n");
1085}
1086
1087static void blk_tracer_start(struct trace_array *tr)
1088{
1089 mutex_lock(&blk_probe_mutex);
1090 if (atomic_add_return(1, &blk_probes_ref) == 1)
1091 if (blk_register_tracepoints())
1092 atomic_dec(&blk_probes_ref);
1093 mutex_unlock(&blk_probe_mutex);
1094 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1095}
1096
1097static int blk_tracer_init(struct trace_array *tr)
1098{
1099 blk_tr = tr;
1100 blk_tracer_start(tr);
1101 mutex_lock(&blk_probe_mutex);
1102 blk_tracer_enabled++;
1103 mutex_unlock(&blk_probe_mutex);
1104 return 0;
1105}
1106
1107static void blk_tracer_stop(struct trace_array *tr)
1108{
1109 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1110 mutex_lock(&blk_probe_mutex);
1111 if (atomic_dec_and_test(&blk_probes_ref))
1112 blk_unregister_tracepoints();
1113 mutex_unlock(&blk_probe_mutex);
1114}
1115
1116static void blk_tracer_reset(struct trace_array *tr)
1117{
1118 if (!atomic_read(&blk_probes_ref))
1119 return;
1120
1121 mutex_lock(&blk_probe_mutex);
1122 blk_tracer_enabled--;
1123 WARN_ON(blk_tracer_enabled < 0);
1124 mutex_unlock(&blk_probe_mutex);
1125
1126 blk_tracer_stop(tr);
1127}
1128
1129static struct {
1130 const char *act[2];
1131 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1132} what2act[] __read_mostly = {
1133 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1134 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1135 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
1136 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic },
1137 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic },
1138 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error },
1139 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic },
1140 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
1141 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
1142 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
1143 [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
1144 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
1145 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
1146 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
1147 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1148};
1149
1150static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1151 int flags)
1152{
1153 struct trace_seq *s = &iter->seq;
1154 const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1155 const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1156 int ret;
1157
1158 if (!trace_print_context(iter))
1159 return TRACE_TYPE_PARTIAL_LINE;
1160
1161 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1162 ret = trace_seq_printf(s, "Bad pc action %x\n", what);
1163 else {
1164 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1165 ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
1166 if (ret)
1167 ret = what2act[what].print(s, iter->ent);
1168 }
1169
1170 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1171}
1172
1173static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1174{
1175 struct trace_seq *s = &iter->seq;
1176 struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1177 const int offset = offsetof(struct blk_io_trace, sector);
1178 struct blk_io_trace old = {
1179 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1180 .time = ns2usecs(iter->ts),
1181 };
1182
1183 if (!trace_seq_putmem(s, &old, offset))
1184 return 0;
1185 return trace_seq_putmem(s, &t->sector,
1186 sizeof(old) - offset + t->pdu_len);
1187}
1188
1189static enum print_line_t
1190blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
1191{
1192 return blk_trace_synthesize_old_trace(iter) ?
1193 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1194}
1195
1196static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1197{
1198 const struct blk_io_trace *t;
1199 u16 what;
1200 int ret;
1201
1202 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1203 return TRACE_TYPE_UNHANDLED;
1204
1205 t = (const struct blk_io_trace *)iter->ent;
1206 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1207
1208 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1209 ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
1210 else {
1211 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1212 ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
1213 if (ret)
1214 ret = what2act[what].print(&iter->seq, iter->ent);
1215 }
1216
1217 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1218}
1219
1220static struct tracer blk_tracer __read_mostly = {
1221 .name = "blk",
1222 .init = blk_tracer_init,
1223 .reset = blk_tracer_reset,
1224 .start = blk_tracer_start,
1225 .stop = blk_tracer_stop,
1226 .print_header = blk_tracer_print_header,
1227 .print_line = blk_tracer_print_line,
1228 .flags = &blk_tracer_flags,
1229};
1230
1231static struct trace_event trace_blk_event = {
1232 .type = TRACE_BLK,
1233 .trace = blk_trace_event_print,
1234 .latency_trace = blk_trace_event_print,
1235 .binary = blk_trace_event_print_binary,
1236};
1237
1238static int __init init_blk_tracer(void)
1239{
1240 if (!register_ftrace_event(&trace_blk_event)) {
1241 pr_warning("Warning: could not register block events\n");
1242 return 1;
1243 }
1244
1245 if (register_tracer(&blk_tracer) != 0) {
1246 pr_warning("Warning: could not register the block tracer\n");
1247 unregister_ftrace_event(&trace_blk_event);
1248 return 1;
1249 }
1250
1251 return 0;
1252}
1253
1254device_initcall(init_blk_tracer);
1255
1256static int blk_trace_remove_queue(struct request_queue *q)
1257{
1258 struct blk_trace *bt;
1259
1260 bt = xchg(&q->blk_trace, NULL);
1261 if (bt == NULL)
1262 return -EINVAL;
1263
1264 kfree(bt);
1265 return 0;
1266}
1267
1268/*
1269 * Setup everything required to start tracing
1270 */
1271static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1272{
1273 struct blk_trace *old_bt, *bt = NULL;
1274 int ret;
1275
1276 ret = -ENOMEM;
1277 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1278 if (!bt)
1279 goto err;
1280
1281 bt->dev = dev;
1282 bt->act_mask = (u16)-1;
1283 bt->end_lba = -1ULL;
1284 bt->trace_state = Blktrace_running;
1285
1286 old_bt = xchg(&q->blk_trace, bt);
1287 if (old_bt != NULL) {
1288 (void)xchg(&q->blk_trace, old_bt);
1289 kfree(bt);
1290 ret = -EBUSY;
1291 }
1292 return 0;
1293err:
1294 return ret;
1295}
1296
1297/*
1298 * sysfs interface to enable and configure tracing
1299 */
1300
1301static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
1302 struct device_attribute *attr,
1303 char *buf)
1304{
1305 struct hd_struct *p = dev_to_part(dev);
1306 struct block_device *bdev;
1307 ssize_t ret = -ENXIO;
1308
1309 lock_kernel();
1310 bdev = bdget(part_devt(p));
1311 if (bdev != NULL) {
1312 struct request_queue *q = bdev_get_queue(bdev);
1313
1314 if (q != NULL) {
1315 mutex_lock(&bdev->bd_mutex);
1316 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1317 mutex_unlock(&bdev->bd_mutex);
1318 }
1319
1320 bdput(bdev);
1321 }
1322
1323 unlock_kernel();
1324 return ret;
1325}
1326
1327static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
1328 struct device_attribute *attr,
1329 const char *buf, size_t count)
1330{
1331 struct block_device *bdev;
1332 struct request_queue *q;
1333 struct hd_struct *p;
1334 int value;
1335 ssize_t ret = -ENXIO;
1336
1337 if (count == 0 || sscanf(buf, "%d", &value) != 1)
1338 goto out;
1339
1340 lock_kernel();
1341 p = dev_to_part(dev);
1342 bdev = bdget(part_devt(p));
1343 if (bdev == NULL)
1344 goto out_unlock_kernel;
1345
1346 q = bdev_get_queue(bdev);
1347 if (q == NULL)
1348 goto out_bdput;
1349
1350 mutex_lock(&bdev->bd_mutex);
1351 if (value)
1352 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1353 else
1354 ret = blk_trace_remove_queue(q);
1355 mutex_unlock(&bdev->bd_mutex);
1356
1357 if (ret == 0)
1358 ret = count;
1359out_bdput:
1360 bdput(bdev);
1361out_unlock_kernel:
1362 unlock_kernel();
1363out:
1364 return ret;
1365}
1366
1367static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1368 struct device_attribute *attr,
1369 char *buf);
1370static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1371 struct device_attribute *attr,
1372 const char *buf, size_t count);
1373#define BLK_TRACE_DEVICE_ATTR(_name) \
1374 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
1375 sysfs_blk_trace_attr_show, \
1376 sysfs_blk_trace_attr_store)
1377
1378static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
1379 sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
1380static BLK_TRACE_DEVICE_ATTR(act_mask);
1381static BLK_TRACE_DEVICE_ATTR(pid);
1382static BLK_TRACE_DEVICE_ATTR(start_lba);
1383static BLK_TRACE_DEVICE_ATTR(end_lba);
1384
1385static struct attribute *blk_trace_attrs[] = {
1386 &dev_attr_enable.attr,
1387 &dev_attr_act_mask.attr,
1388 &dev_attr_pid.attr,
1389 &dev_attr_start_lba.attr,
1390 &dev_attr_end_lba.attr,
1391 NULL
1392};
1393
1394struct attribute_group blk_trace_attr_group = {
1395 .name = "trace",
1396 .attrs = blk_trace_attrs,
1397};
1398
1399static int blk_str2act_mask(const char *str)
1400{
1401 int mask = 0;
1402 char *copy = kstrdup(str, GFP_KERNEL), *s;
1403
1404 if (copy == NULL)
1405 return -ENOMEM;
1406
1407 s = strstrip(copy);
1408
1409 while (1) {
1410 char *sep = strchr(s, ',');
1411
1412 if (sep != NULL)
1413 *sep = '\0';
1414
1415 if (strcasecmp(s, "barrier") == 0)
1416 mask |= BLK_TC_BARRIER;
1417 else if (strcasecmp(s, "complete") == 0)
1418 mask |= BLK_TC_COMPLETE;
1419 else if (strcasecmp(s, "fs") == 0)
1420 mask |= BLK_TC_FS;
1421 else if (strcasecmp(s, "issue") == 0)
1422 mask |= BLK_TC_ISSUE;
1423 else if (strcasecmp(s, "pc") == 0)
1424 mask |= BLK_TC_PC;
1425 else if (strcasecmp(s, "queue") == 0)
1426 mask |= BLK_TC_QUEUE;
1427 else if (strcasecmp(s, "read") == 0)
1428 mask |= BLK_TC_READ;
1429 else if (strcasecmp(s, "requeue") == 0)
1430 mask |= BLK_TC_REQUEUE;
1431 else if (strcasecmp(s, "sync") == 0)
1432 mask |= BLK_TC_SYNC;
1433 else if (strcasecmp(s, "write") == 0)
1434 mask |= BLK_TC_WRITE;
1435
1436 if (sep == NULL)
1437 break;
1438
1439 s = sep + 1;
1440 }
1441 kfree(copy);
1442
1443 return mask;
1444}
1445
1446static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1447 struct device_attribute *attr,
1448 char *buf)
1449{
1450 struct hd_struct *p = dev_to_part(dev);
1451 struct request_queue *q;
1452 struct block_device *bdev;
1453 ssize_t ret = -ENXIO;
1454
1455 lock_kernel();
1456 bdev = bdget(part_devt(p));
1457 if (bdev == NULL)
1458 goto out_unlock_kernel;
1459
1460 q = bdev_get_queue(bdev);
1461 if (q == NULL)
1462 goto out_bdput;
1463 mutex_lock(&bdev->bd_mutex);
1464 if (q->blk_trace == NULL)
1465 ret = sprintf(buf, "disabled\n");
1466 else if (attr == &dev_attr_act_mask)
1467 ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask);
1468 else if (attr == &dev_attr_pid)
1469 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1470 else if (attr == &dev_attr_start_lba)
1471 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1472 else if (attr == &dev_attr_end_lba)
1473 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1474 mutex_unlock(&bdev->bd_mutex);
1475out_bdput:
1476 bdput(bdev);
1477out_unlock_kernel:
1478 unlock_kernel();
1479 return ret;
1480}
1481
1482static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1483 struct device_attribute *attr,
1484 const char *buf, size_t count)
1485{
1486 struct block_device *bdev;
1487 struct request_queue *q;
1488 struct hd_struct *p;
1489 u64 value;
1490 ssize_t ret = -ENXIO;
1491
1492 if (count == 0)
1493 goto out;
1494
1495 if (attr == &dev_attr_act_mask) {
1496 if (sscanf(buf, "%llx", &value) != 1) {
1497 /* Assume it is a list of trace category names */
1498 value = blk_str2act_mask(buf);
1499 if (value < 0)
1500 goto out;
1501 }
1502 } else if (sscanf(buf, "%llu", &value) != 1)
1503 goto out;
1504
1505 lock_kernel();
1506 p = dev_to_part(dev);
1507 bdev = bdget(part_devt(p));
1508 if (bdev == NULL)
1509 goto out_unlock_kernel;
1510
1511 q = bdev_get_queue(bdev);
1512 if (q == NULL)
1513 goto out_bdput;
1514
1515 mutex_lock(&bdev->bd_mutex);
1516 ret = 0;
1517 if (q->blk_trace == NULL)
1518 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1519
1520 if (ret == 0) {
1521 if (attr == &dev_attr_act_mask)
1522 q->blk_trace->act_mask = value;
1523 else if (attr == &dev_attr_pid)
1524 q->blk_trace->pid = value;
1525 else if (attr == &dev_attr_start_lba)
1526 q->blk_trace->start_lba = value;
1527 else if (attr == &dev_attr_end_lba)
1528 q->blk_trace->end_lba = value;
1529 ret = count;
1530 }
1531 mutex_unlock(&bdev->bd_mutex);
1532out_bdput:
1533 bdput(bdev);
1534out_unlock_kernel:
1535 unlock_kernel();
1536out:
1537 return ret;
1538}
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
new file mode 100644
index 000000000000..f2509cbaacea
--- /dev/null
+++ b/kernel/trace/events.c
@@ -0,0 +1,17 @@
1/*
2 * This is the place to register all trace points as events.
3 */
4
5/* someday this needs to go in a generic header */
6#define __STR(x) #x
7#define STR(x) __STR(x)
8
9#include <trace/trace_events.h>
10
11#include "trace_output.h"
12
13#include "trace_events_stage_1.h"
14#include "trace_events_stage_2.h"
15#include "trace_events_stage_3.h"
16
17#include <trace/trace_event_types.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fdf913dfc7e8..5a3a06b21eee 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,7 @@
27#include <linux/sysctl.h> 27#include <linux/sysctl.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h>
30 31
31#include <asm/ftrace.h> 32#include <asm/ftrace.h>
32 33
@@ -44,14 +45,14 @@
44 ftrace_kill(); \ 45 ftrace_kill(); \
45 } while (0) 46 } while (0)
46 47
48/* hash bits for specific function selection */
49#define FTRACE_HASH_BITS 7
50#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
51
47/* ftrace_enabled is a method to turn ftrace on or off */ 52/* ftrace_enabled is a method to turn ftrace on or off */
48int ftrace_enabled __read_mostly; 53int ftrace_enabled __read_mostly;
49static int last_ftrace_enabled; 54static int last_ftrace_enabled;
50 55
51/* set when tracing only a pid */
52struct pid *ftrace_pid_trace;
53static struct pid * const ftrace_swapper_pid = &init_struct_pid;
54
55/* Quick disabling of function tracer. */ 56/* Quick disabling of function tracer. */
56int function_trace_stop; 57int function_trace_stop;
57 58
@@ -61,9 +62,7 @@ int function_trace_stop;
61 */ 62 */
62static int ftrace_disabled __read_mostly; 63static int ftrace_disabled __read_mostly;
63 64
64static DEFINE_SPINLOCK(ftrace_lock); 65static DEFINE_MUTEX(ftrace_lock);
65static DEFINE_MUTEX(ftrace_sysctl_lock);
66static DEFINE_MUTEX(ftrace_start_lock);
67 66
68static struct ftrace_ops ftrace_list_end __read_mostly = 67static struct ftrace_ops ftrace_list_end __read_mostly =
69{ 68{
@@ -134,9 +133,6 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
134 133
135static int __register_ftrace_function(struct ftrace_ops *ops) 134static int __register_ftrace_function(struct ftrace_ops *ops)
136{ 135{
137 /* should not be called from interrupt context */
138 spin_lock(&ftrace_lock);
139
140 ops->next = ftrace_list; 136 ops->next = ftrace_list;
141 /* 137 /*
142 * We are entering ops into the ftrace_list but another 138 * We are entering ops into the ftrace_list but another
@@ -172,18 +168,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
172#endif 168#endif
173 } 169 }
174 170
175 spin_unlock(&ftrace_lock);
176
177 return 0; 171 return 0;
178} 172}
179 173
180static int __unregister_ftrace_function(struct ftrace_ops *ops) 174static int __unregister_ftrace_function(struct ftrace_ops *ops)
181{ 175{
182 struct ftrace_ops **p; 176 struct ftrace_ops **p;
183 int ret = 0;
184
185 /* should not be called from interrupt context */
186 spin_lock(&ftrace_lock);
187 177
188 /* 178 /*
189 * If we are removing the last function, then simply point 179 * If we are removing the last function, then simply point
@@ -192,17 +182,15 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
192 if (ftrace_list == ops && ops->next == &ftrace_list_end) { 182 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
193 ftrace_trace_function = ftrace_stub; 183 ftrace_trace_function = ftrace_stub;
194 ftrace_list = &ftrace_list_end; 184 ftrace_list = &ftrace_list_end;
195 goto out; 185 return 0;
196 } 186 }
197 187
198 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) 188 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
199 if (*p == ops) 189 if (*p == ops)
200 break; 190 break;
201 191
202 if (*p != ops) { 192 if (*p != ops)
203 ret = -1; 193 return -1;
204 goto out;
205 }
206 194
207 *p = (*p)->next; 195 *p = (*p)->next;
208 196
@@ -223,18 +211,14 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
223 } 211 }
224 } 212 }
225 213
226 out: 214 return 0;
227 spin_unlock(&ftrace_lock);
228
229 return ret;
230} 215}
231 216
232static void ftrace_update_pid_func(void) 217static void ftrace_update_pid_func(void)
233{ 218{
234 ftrace_func_t func; 219 ftrace_func_t func;
235 220
236 /* should not be called from interrupt context */ 221 mutex_lock(&ftrace_lock);
237 spin_lock(&ftrace_lock);
238 222
239 if (ftrace_trace_function == ftrace_stub) 223 if (ftrace_trace_function == ftrace_stub)
240 goto out; 224 goto out;
@@ -256,21 +240,30 @@ static void ftrace_update_pid_func(void)
256#endif 240#endif
257 241
258 out: 242 out:
259 spin_unlock(&ftrace_lock); 243 mutex_unlock(&ftrace_lock);
260} 244}
261 245
246/* set when tracing only a pid */
247struct pid *ftrace_pid_trace;
248static struct pid * const ftrace_swapper_pid = &init_struct_pid;
249
262#ifdef CONFIG_DYNAMIC_FTRACE 250#ifdef CONFIG_DYNAMIC_FTRACE
251
263#ifndef CONFIG_FTRACE_MCOUNT_RECORD 252#ifndef CONFIG_FTRACE_MCOUNT_RECORD
264# error Dynamic ftrace depends on MCOUNT_RECORD 253# error Dynamic ftrace depends on MCOUNT_RECORD
265#endif 254#endif
266 255
267/* 256static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
268 * Since MCOUNT_ADDR may point to mcount itself, we do not want 257
269 * to get it confused by reading a reference in the code as we 258struct ftrace_func_probe {
270 * are parsing on objcopy output of text. Use a variable for 259 struct hlist_node node;
271 * it instead. 260 struct ftrace_probe_ops *ops;
272 */ 261 unsigned long flags;
273static unsigned long mcount_addr = MCOUNT_ADDR; 262 unsigned long ip;
263 void *data;
264 struct rcu_head rcu;
265};
266
274 267
275enum { 268enum {
276 FTRACE_ENABLE_CALLS = (1 << 0), 269 FTRACE_ENABLE_CALLS = (1 << 0),
@@ -290,7 +283,7 @@ static DEFINE_MUTEX(ftrace_regex_lock);
290 283
291struct ftrace_page { 284struct ftrace_page {
292 struct ftrace_page *next; 285 struct ftrace_page *next;
293 unsigned long index; 286 int index;
294 struct dyn_ftrace records[]; 287 struct dyn_ftrace records[];
295}; 288};
296 289
@@ -305,6 +298,19 @@ static struct ftrace_page *ftrace_pages;
305 298
306static struct dyn_ftrace *ftrace_free_records; 299static struct dyn_ftrace *ftrace_free_records;
307 300
301/*
302 * This is a double for. Do not use 'break' to break out of the loop,
303 * you must use a goto.
304 */
305#define do_for_each_ftrace_rec(pg, rec) \
306 for (pg = ftrace_pages_start; pg; pg = pg->next) { \
307 int _____i; \
308 for (_____i = 0; _____i < pg->index; _____i++) { \
309 rec = &pg->records[_____i];
310
311#define while_for_each_ftrace_rec() \
312 } \
313 }
308 314
309#ifdef CONFIG_KPROBES 315#ifdef CONFIG_KPROBES
310 316
@@ -349,23 +355,16 @@ void ftrace_release(void *start, unsigned long size)
349 struct ftrace_page *pg; 355 struct ftrace_page *pg;
350 unsigned long s = (unsigned long)start; 356 unsigned long s = (unsigned long)start;
351 unsigned long e = s + size; 357 unsigned long e = s + size;
352 int i;
353 358
354 if (ftrace_disabled || !start) 359 if (ftrace_disabled || !start)
355 return; 360 return;
356 361
357 /* should not be called from interrupt context */ 362 mutex_lock(&ftrace_lock);
358 spin_lock(&ftrace_lock); 363 do_for_each_ftrace_rec(pg, rec) {
359 364 if ((rec->ip >= s) && (rec->ip < e))
360 for (pg = ftrace_pages_start; pg; pg = pg->next) { 365 ftrace_free_rec(rec);
361 for (i = 0; i < pg->index; i++) { 366 } while_for_each_ftrace_rec();
362 rec = &pg->records[i]; 367 mutex_unlock(&ftrace_lock);
363
364 if ((rec->ip >= s) && (rec->ip < e))
365 ftrace_free_rec(rec);
366 }
367 }
368 spin_unlock(&ftrace_lock);
369} 368}
370 369
371static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 370static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -461,10 +460,10 @@ static void ftrace_bug(int failed, unsigned long ip)
461static int 460static int
462__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 461__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
463{ 462{
464 unsigned long ip, fl;
465 unsigned long ftrace_addr; 463 unsigned long ftrace_addr;
464 unsigned long ip, fl;
466 465
467 ftrace_addr = (unsigned long)ftrace_caller; 466 ftrace_addr = (unsigned long)FTRACE_ADDR;
468 467
469 ip = rec->ip; 468 ip = rec->ip;
470 469
@@ -473,7 +472,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
473 * it is not enabled then do nothing. 472 * it is not enabled then do nothing.
474 * 473 *
475 * If this record is not to be traced and 474 * If this record is not to be traced and
476 * it is enabled then disabled it. 475 * it is enabled then disable it.
477 * 476 *
478 */ 477 */
479 if (rec->flags & FTRACE_FL_NOTRACE) { 478 if (rec->flags & FTRACE_FL_NOTRACE) {
@@ -493,7 +492,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
493 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) 492 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
494 return 0; 493 return 0;
495 494
496 /* Record is not filtered and is not enabled do nothing */ 495 /* Record is not filtered or enabled, do nothing */
497 if (!fl) 496 if (!fl)
498 return 0; 497 return 0;
499 498
@@ -515,7 +514,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
515 514
516 } else { 515 } else {
517 516
518 /* if record is not enabled do nothing */ 517 /* if record is not enabled, do nothing */
519 if (!(rec->flags & FTRACE_FL_ENABLED)) 518 if (!(rec->flags & FTRACE_FL_ENABLED))
520 return 0; 519 return 0;
521 520
@@ -531,41 +530,40 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
531 530
532static void ftrace_replace_code(int enable) 531static void ftrace_replace_code(int enable)
533{ 532{
534 int i, failed;
535 struct dyn_ftrace *rec; 533 struct dyn_ftrace *rec;
536 struct ftrace_page *pg; 534 struct ftrace_page *pg;
535 int failed;
537 536
538 for (pg = ftrace_pages_start; pg; pg = pg->next) { 537 do_for_each_ftrace_rec(pg, rec) {
539 for (i = 0; i < pg->index; i++) { 538 /*
540 rec = &pg->records[i]; 539 * Skip over free records and records that have
541 540 * failed.
542 /* 541 */
543 * Skip over free records and records that have 542 if (rec->flags & FTRACE_FL_FREE ||
544 * failed. 543 rec->flags & FTRACE_FL_FAILED)
545 */ 544 continue;
546 if (rec->flags & FTRACE_FL_FREE ||
547 rec->flags & FTRACE_FL_FAILED)
548 continue;
549 545
550 /* ignore updates to this record's mcount site */ 546 /* ignore updates to this record's mcount site */
551 if (get_kprobe((void *)rec->ip)) { 547 if (get_kprobe((void *)rec->ip)) {
552 freeze_record(rec); 548 freeze_record(rec);
553 continue; 549 continue;
554 } else { 550 } else {
555 unfreeze_record(rec); 551 unfreeze_record(rec);
556 } 552 }
557 553
558 failed = __ftrace_replace_code(rec, enable); 554 failed = __ftrace_replace_code(rec, enable);
559 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 555 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
560 rec->flags |= FTRACE_FL_FAILED; 556 rec->flags |= FTRACE_FL_FAILED;
561 if ((system_state == SYSTEM_BOOTING) || 557 if ((system_state == SYSTEM_BOOTING) ||
562 !core_kernel_text(rec->ip)) { 558 !core_kernel_text(rec->ip)) {
563 ftrace_free_rec(rec); 559 ftrace_free_rec(rec);
564 } else 560 } else {
565 ftrace_bug(failed, rec->ip); 561 ftrace_bug(failed, rec->ip);
566 } 562 /* Stop processing */
563 return;
564 }
567 } 565 }
568 } 566 } while_for_each_ftrace_rec();
569} 567}
570 568
571static int 569static int
@@ -576,7 +574,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
576 574
577 ip = rec->ip; 575 ip = rec->ip;
578 576
579 ret = ftrace_make_nop(mod, rec, mcount_addr); 577 ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
580 if (ret) { 578 if (ret) {
581 ftrace_bug(ret, ip); 579 ftrace_bug(ret, ip);
582 rec->flags |= FTRACE_FL_FAILED; 580 rec->flags |= FTRACE_FL_FAILED;
@@ -585,6 +583,24 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
585 return 1; 583 return 1;
586} 584}
587 585
586/*
587 * archs can override this function if they must do something
588 * before the modifying code is performed.
589 */
590int __weak ftrace_arch_code_modify_prepare(void)
591{
592 return 0;
593}
594
595/*
596 * archs can override this function if they must do something
597 * after the modifying code is performed.
598 */
599int __weak ftrace_arch_code_modify_post_process(void)
600{
601 return 0;
602}
603
588static int __ftrace_modify_code(void *data) 604static int __ftrace_modify_code(void *data)
589{ 605{
590 int *command = data; 606 int *command = data;
@@ -607,7 +623,17 @@ static int __ftrace_modify_code(void *data)
607 623
608static void ftrace_run_update_code(int command) 624static void ftrace_run_update_code(int command)
609{ 625{
626 int ret;
627
628 ret = ftrace_arch_code_modify_prepare();
629 FTRACE_WARN_ON(ret);
630 if (ret)
631 return;
632
610 stop_machine(__ftrace_modify_code, &command, NULL); 633 stop_machine(__ftrace_modify_code, &command, NULL);
634
635 ret = ftrace_arch_code_modify_post_process();
636 FTRACE_WARN_ON(ret);
611} 637}
612 638
613static ftrace_func_t saved_ftrace_func; 639static ftrace_func_t saved_ftrace_func;
@@ -631,13 +657,10 @@ static void ftrace_startup(int command)
631 if (unlikely(ftrace_disabled)) 657 if (unlikely(ftrace_disabled))
632 return; 658 return;
633 659
634 mutex_lock(&ftrace_start_lock);
635 ftrace_start_up++; 660 ftrace_start_up++;
636 command |= FTRACE_ENABLE_CALLS; 661 command |= FTRACE_ENABLE_CALLS;
637 662
638 ftrace_startup_enable(command); 663 ftrace_startup_enable(command);
639
640 mutex_unlock(&ftrace_start_lock);
641} 664}
642 665
643static void ftrace_shutdown(int command) 666static void ftrace_shutdown(int command)
@@ -645,7 +668,6 @@ static void ftrace_shutdown(int command)
645 if (unlikely(ftrace_disabled)) 668 if (unlikely(ftrace_disabled))
646 return; 669 return;
647 670
648 mutex_lock(&ftrace_start_lock);
649 ftrace_start_up--; 671 ftrace_start_up--;
650 if (!ftrace_start_up) 672 if (!ftrace_start_up)
651 command |= FTRACE_DISABLE_CALLS; 673 command |= FTRACE_DISABLE_CALLS;
@@ -656,11 +678,9 @@ static void ftrace_shutdown(int command)
656 } 678 }
657 679
658 if (!command || !ftrace_enabled) 680 if (!command || !ftrace_enabled)
659 goto out; 681 return;
660 682
661 ftrace_run_update_code(command); 683 ftrace_run_update_code(command);
662 out:
663 mutex_unlock(&ftrace_start_lock);
664} 684}
665 685
666static void ftrace_startup_sysctl(void) 686static void ftrace_startup_sysctl(void)
@@ -670,7 +690,6 @@ static void ftrace_startup_sysctl(void)
670 if (unlikely(ftrace_disabled)) 690 if (unlikely(ftrace_disabled))
671 return; 691 return;
672 692
673 mutex_lock(&ftrace_start_lock);
674 /* Force update next time */ 693 /* Force update next time */
675 saved_ftrace_func = NULL; 694 saved_ftrace_func = NULL;
676 /* ftrace_start_up is true if we want ftrace running */ 695 /* ftrace_start_up is true if we want ftrace running */
@@ -678,7 +697,6 @@ static void ftrace_startup_sysctl(void)
678 command |= FTRACE_ENABLE_CALLS; 697 command |= FTRACE_ENABLE_CALLS;
679 698
680 ftrace_run_update_code(command); 699 ftrace_run_update_code(command);
681 mutex_unlock(&ftrace_start_lock);
682} 700}
683 701
684static void ftrace_shutdown_sysctl(void) 702static void ftrace_shutdown_sysctl(void)
@@ -688,13 +706,11 @@ static void ftrace_shutdown_sysctl(void)
688 if (unlikely(ftrace_disabled)) 706 if (unlikely(ftrace_disabled))
689 return; 707 return;
690 708
691 mutex_lock(&ftrace_start_lock);
692 /* ftrace_start_up is true if ftrace is running */ 709 /* ftrace_start_up is true if ftrace is running */
693 if (ftrace_start_up) 710 if (ftrace_start_up)
694 command |= FTRACE_DISABLE_CALLS; 711 command |= FTRACE_DISABLE_CALLS;
695 712
696 ftrace_run_update_code(command); 713 ftrace_run_update_code(command);
697 mutex_unlock(&ftrace_start_lock);
698} 714}
699 715
700static cycle_t ftrace_update_time; 716static cycle_t ftrace_update_time;
@@ -781,13 +797,16 @@ enum {
781 FTRACE_ITER_CONT = (1 << 1), 797 FTRACE_ITER_CONT = (1 << 1),
782 FTRACE_ITER_NOTRACE = (1 << 2), 798 FTRACE_ITER_NOTRACE = (1 << 2),
783 FTRACE_ITER_FAILURES = (1 << 3), 799 FTRACE_ITER_FAILURES = (1 << 3),
800 FTRACE_ITER_PRINTALL = (1 << 4),
801 FTRACE_ITER_HASH = (1 << 5),
784}; 802};
785 803
786#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 804#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
787 805
788struct ftrace_iterator { 806struct ftrace_iterator {
789 struct ftrace_page *pg; 807 struct ftrace_page *pg;
790 unsigned idx; 808 int hidx;
809 int idx;
791 unsigned flags; 810 unsigned flags;
792 unsigned char buffer[FTRACE_BUFF_MAX+1]; 811 unsigned char buffer[FTRACE_BUFF_MAX+1];
793 unsigned buffer_idx; 812 unsigned buffer_idx;
@@ -795,15 +814,89 @@ struct ftrace_iterator {
795}; 814};
796 815
797static void * 816static void *
817t_hash_next(struct seq_file *m, void *v, loff_t *pos)
818{
819 struct ftrace_iterator *iter = m->private;
820 struct hlist_node *hnd = v;
821 struct hlist_head *hhd;
822
823 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
824
825 (*pos)++;
826
827 retry:
828 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
829 return NULL;
830
831 hhd = &ftrace_func_hash[iter->hidx];
832
833 if (hlist_empty(hhd)) {
834 iter->hidx++;
835 hnd = NULL;
836 goto retry;
837 }
838
839 if (!hnd)
840 hnd = hhd->first;
841 else {
842 hnd = hnd->next;
843 if (!hnd) {
844 iter->hidx++;
845 goto retry;
846 }
847 }
848
849 return hnd;
850}
851
852static void *t_hash_start(struct seq_file *m, loff_t *pos)
853{
854 struct ftrace_iterator *iter = m->private;
855 void *p = NULL;
856
857 iter->flags |= FTRACE_ITER_HASH;
858
859 return t_hash_next(m, p, pos);
860}
861
862static int t_hash_show(struct seq_file *m, void *v)
863{
864 struct ftrace_func_probe *rec;
865 struct hlist_node *hnd = v;
866 char str[KSYM_SYMBOL_LEN];
867
868 rec = hlist_entry(hnd, struct ftrace_func_probe, node);
869
870 if (rec->ops->print)
871 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
872
873 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
874 seq_printf(m, "%s:", str);
875
876 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
877 seq_printf(m, "%s", str);
878
879 if (rec->data)
880 seq_printf(m, ":%p", rec->data);
881 seq_putc(m, '\n');
882
883 return 0;
884}
885
886static void *
798t_next(struct seq_file *m, void *v, loff_t *pos) 887t_next(struct seq_file *m, void *v, loff_t *pos)
799{ 888{
800 struct ftrace_iterator *iter = m->private; 889 struct ftrace_iterator *iter = m->private;
801 struct dyn_ftrace *rec = NULL; 890 struct dyn_ftrace *rec = NULL;
802 891
892 if (iter->flags & FTRACE_ITER_HASH)
893 return t_hash_next(m, v, pos);
894
803 (*pos)++; 895 (*pos)++;
804 896
805 /* should not be called from interrupt context */ 897 if (iter->flags & FTRACE_ITER_PRINTALL)
806 spin_lock(&ftrace_lock); 898 return NULL;
899
807 retry: 900 retry:
808 if (iter->idx >= iter->pg->index) { 901 if (iter->idx >= iter->pg->index) {
809 if (iter->pg->next) { 902 if (iter->pg->next) {
@@ -832,7 +925,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
832 goto retry; 925 goto retry;
833 } 926 }
834 } 927 }
835 spin_unlock(&ftrace_lock);
836 928
837 return rec; 929 return rec;
838} 930}
@@ -842,6 +934,23 @@ static void *t_start(struct seq_file *m, loff_t *pos)
842 struct ftrace_iterator *iter = m->private; 934 struct ftrace_iterator *iter = m->private;
843 void *p = NULL; 935 void *p = NULL;
844 936
937 mutex_lock(&ftrace_lock);
938 /*
939 * For set_ftrace_filter reading, if we have the filter
940 * off, we can short cut and just print out that all
941 * functions are enabled.
942 */
943 if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
944 if (*pos > 0)
945 return t_hash_start(m, pos);
946 iter->flags |= FTRACE_ITER_PRINTALL;
947 (*pos)++;
948 return iter;
949 }
950
951 if (iter->flags & FTRACE_ITER_HASH)
952 return t_hash_start(m, pos);
953
845 if (*pos > 0) { 954 if (*pos > 0) {
846 if (iter->idx < 0) 955 if (iter->idx < 0)
847 return p; 956 return p;
@@ -851,18 +960,31 @@ static void *t_start(struct seq_file *m, loff_t *pos)
851 960
852 p = t_next(m, p, pos); 961 p = t_next(m, p, pos);
853 962
963 if (!p)
964 return t_hash_start(m, pos);
965
854 return p; 966 return p;
855} 967}
856 968
857static void t_stop(struct seq_file *m, void *p) 969static void t_stop(struct seq_file *m, void *p)
858{ 970{
971 mutex_unlock(&ftrace_lock);
859} 972}
860 973
861static int t_show(struct seq_file *m, void *v) 974static int t_show(struct seq_file *m, void *v)
862{ 975{
976 struct ftrace_iterator *iter = m->private;
863 struct dyn_ftrace *rec = v; 977 struct dyn_ftrace *rec = v;
864 char str[KSYM_SYMBOL_LEN]; 978 char str[KSYM_SYMBOL_LEN];
865 979
980 if (iter->flags & FTRACE_ITER_HASH)
981 return t_hash_show(m, v);
982
983 if (iter->flags & FTRACE_ITER_PRINTALL) {
984 seq_printf(m, "#### all functions enabled ####\n");
985 return 0;
986 }
987
866 if (!rec) 988 if (!rec)
867 return 0; 989 return 0;
868 990
@@ -941,23 +1063,16 @@ static void ftrace_filter_reset(int enable)
941 struct ftrace_page *pg; 1063 struct ftrace_page *pg;
942 struct dyn_ftrace *rec; 1064 struct dyn_ftrace *rec;
943 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1065 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
944 unsigned i;
945 1066
946 /* should not be called from interrupt context */ 1067 mutex_lock(&ftrace_lock);
947 spin_lock(&ftrace_lock);
948 if (enable) 1068 if (enable)
949 ftrace_filtered = 0; 1069 ftrace_filtered = 0;
950 pg = ftrace_pages_start; 1070 do_for_each_ftrace_rec(pg, rec) {
951 while (pg) { 1071 if (rec->flags & FTRACE_FL_FAILED)
952 for (i = 0; i < pg->index; i++) { 1072 continue;
953 rec = &pg->records[i]; 1073 rec->flags &= ~type;
954 if (rec->flags & FTRACE_FL_FAILED) 1074 } while_for_each_ftrace_rec();
955 continue; 1075 mutex_unlock(&ftrace_lock);
956 rec->flags &= ~type;
957 }
958 pg = pg->next;
959 }
960 spin_unlock(&ftrace_lock);
961} 1076}
962 1077
963static int 1078static int
@@ -1038,86 +1153,536 @@ enum {
1038 MATCH_END_ONLY, 1153 MATCH_END_ONLY,
1039}; 1154};
1040 1155
1041static void 1156/*
1042ftrace_match(unsigned char *buff, int len, int enable) 1157 * (static function - no need for kernel doc)
1158 *
1159 * Pass in a buffer containing a glob and this function will
1160 * set search to point to the search part of the buffer and
1161 * return the type of search it is (see enum above).
1162 * This does modify buff.
1163 *
1164 * Returns enum type.
1165 * search returns the pointer to use for comparison.
1166 * not returns 1 if buff started with a '!'
1167 * 0 otherwise.
1168 */
1169static int
1170ftrace_setup_glob(char *buff, int len, char **search, int *not)
1043{ 1171{
1044 char str[KSYM_SYMBOL_LEN];
1045 char *search = NULL;
1046 struct ftrace_page *pg;
1047 struct dyn_ftrace *rec;
1048 int type = MATCH_FULL; 1172 int type = MATCH_FULL;
1049 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1173 int i;
1050 unsigned i, match = 0, search_len = 0;
1051 int not = 0;
1052 1174
1053 if (buff[0] == '!') { 1175 if (buff[0] == '!') {
1054 not = 1; 1176 *not = 1;
1055 buff++; 1177 buff++;
1056 len--; 1178 len--;
1057 } 1179 } else
1180 *not = 0;
1181
1182 *search = buff;
1058 1183
1059 for (i = 0; i < len; i++) { 1184 for (i = 0; i < len; i++) {
1060 if (buff[i] == '*') { 1185 if (buff[i] == '*') {
1061 if (!i) { 1186 if (!i) {
1062 search = buff + i + 1; 1187 *search = buff + 1;
1063 type = MATCH_END_ONLY; 1188 type = MATCH_END_ONLY;
1064 search_len = len - (i + 1);
1065 } else { 1189 } else {
1066 if (type == MATCH_END_ONLY) { 1190 if (type == MATCH_END_ONLY)
1067 type = MATCH_MIDDLE_ONLY; 1191 type = MATCH_MIDDLE_ONLY;
1068 } else { 1192 else
1069 match = i;
1070 type = MATCH_FRONT_ONLY; 1193 type = MATCH_FRONT_ONLY;
1071 }
1072 buff[i] = 0; 1194 buff[i] = 0;
1073 break; 1195 break;
1074 } 1196 }
1075 } 1197 }
1076 } 1198 }
1077 1199
1078 /* should not be called from interrupt context */ 1200 return type;
1079 spin_lock(&ftrace_lock); 1201}
1080 if (enable) 1202
1081 ftrace_filtered = 1; 1203static int ftrace_match(char *str, char *regex, int len, int type)
1082 pg = ftrace_pages_start; 1204{
1083 while (pg) { 1205 int matched = 0;
1084 for (i = 0; i < pg->index; i++) { 1206 char *ptr;
1085 int matched = 0; 1207
1086 char *ptr; 1208 switch (type) {
1087 1209 case MATCH_FULL:
1088 rec = &pg->records[i]; 1210 if (strcmp(str, regex) == 0)
1089 if (rec->flags & FTRACE_FL_FAILED) 1211 matched = 1;
1212 break;
1213 case MATCH_FRONT_ONLY:
1214 if (strncmp(str, regex, len) == 0)
1215 matched = 1;
1216 break;
1217 case MATCH_MIDDLE_ONLY:
1218 if (strstr(str, regex))
1219 matched = 1;
1220 break;
1221 case MATCH_END_ONLY:
1222 ptr = strstr(str, regex);
1223 if (ptr && (ptr[len] == 0))
1224 matched = 1;
1225 break;
1226 }
1227
1228 return matched;
1229}
1230
1231static int
1232ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1233{
1234 char str[KSYM_SYMBOL_LEN];
1235
1236 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1237 return ftrace_match(str, regex, len, type);
1238}
1239
1240static void ftrace_match_records(char *buff, int len, int enable)
1241{
1242 unsigned int search_len;
1243 struct ftrace_page *pg;
1244 struct dyn_ftrace *rec;
1245 unsigned long flag;
1246 char *search;
1247 int type;
1248 int not;
1249
1250 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1251 type = ftrace_setup_glob(buff, len, &search, &not);
1252
1253 search_len = strlen(search);
1254
1255 mutex_lock(&ftrace_lock);
1256 do_for_each_ftrace_rec(pg, rec) {
1257
1258 if (rec->flags & FTRACE_FL_FAILED)
1259 continue;
1260
1261 if (ftrace_match_record(rec, search, search_len, type)) {
1262 if (not)
1263 rec->flags &= ~flag;
1264 else
1265 rec->flags |= flag;
1266 }
1267 /*
1268 * Only enable filtering if we have a function that
1269 * is filtered on.
1270 */
1271 if (enable && (rec->flags & FTRACE_FL_FILTER))
1272 ftrace_filtered = 1;
1273 } while_for_each_ftrace_rec();
1274 mutex_unlock(&ftrace_lock);
1275}
1276
1277static int
1278ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1279 char *regex, int len, int type)
1280{
1281 char str[KSYM_SYMBOL_LEN];
1282 char *modname;
1283
1284 kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
1285
1286 if (!modname || strcmp(modname, mod))
1287 return 0;
1288
1289 /* blank search means to match all funcs in the mod */
1290 if (len)
1291 return ftrace_match(str, regex, len, type);
1292 else
1293 return 1;
1294}
1295
1296static void ftrace_match_module_records(char *buff, char *mod, int enable)
1297{
1298 unsigned search_len = 0;
1299 struct ftrace_page *pg;
1300 struct dyn_ftrace *rec;
1301 int type = MATCH_FULL;
1302 char *search = buff;
1303 unsigned long flag;
1304 int not = 0;
1305
1306 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1307
1308 /* blank or '*' mean the same */
1309 if (strcmp(buff, "*") == 0)
1310 buff[0] = 0;
1311
1312 /* handle the case of 'dont filter this module' */
1313 if (strcmp(buff, "!") == 0 || strcmp(buff, "!*") == 0) {
1314 buff[0] = 0;
1315 not = 1;
1316 }
1317
1318 if (strlen(buff)) {
1319 type = ftrace_setup_glob(buff, strlen(buff), &search, &not);
1320 search_len = strlen(search);
1321 }
1322
1323 mutex_lock(&ftrace_lock);
1324 do_for_each_ftrace_rec(pg, rec) {
1325
1326 if (rec->flags & FTRACE_FL_FAILED)
1327 continue;
1328
1329 if (ftrace_match_module_record(rec, mod,
1330 search, search_len, type)) {
1331 if (not)
1332 rec->flags &= ~flag;
1333 else
1334 rec->flags |= flag;
1335 }
1336 if (enable && (rec->flags & FTRACE_FL_FILTER))
1337 ftrace_filtered = 1;
1338
1339 } while_for_each_ftrace_rec();
1340 mutex_unlock(&ftrace_lock);
1341}
1342
1343/*
1344 * We register the module command as a template to show others how
1345 * to register the a command as well.
1346 */
1347
1348static int
1349ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1350{
1351 char *mod;
1352
1353 /*
1354 * cmd == 'mod' because we only registered this func
1355 * for the 'mod' ftrace_func_command.
1356 * But if you register one func with multiple commands,
1357 * you can tell which command was used by the cmd
1358 * parameter.
1359 */
1360
1361 /* we must have a module name */
1362 if (!param)
1363 return -EINVAL;
1364
1365 mod = strsep(&param, ":");
1366 if (!strlen(mod))
1367 return -EINVAL;
1368
1369 ftrace_match_module_records(func, mod, enable);
1370 return 0;
1371}
1372
1373static struct ftrace_func_command ftrace_mod_cmd = {
1374 .name = "mod",
1375 .func = ftrace_mod_callback,
1376};
1377
1378static int __init ftrace_mod_cmd_init(void)
1379{
1380 return register_ftrace_command(&ftrace_mod_cmd);
1381}
1382device_initcall(ftrace_mod_cmd_init);
1383
1384static void
1385function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1386{
1387 struct ftrace_func_probe *entry;
1388 struct hlist_head *hhd;
1389 struct hlist_node *n;
1390 unsigned long key;
1391 int resched;
1392
1393 key = hash_long(ip, FTRACE_HASH_BITS);
1394
1395 hhd = &ftrace_func_hash[key];
1396
1397 if (hlist_empty(hhd))
1398 return;
1399
1400 /*
1401 * Disable preemption for these calls to prevent a RCU grace
1402 * period. This syncs the hash iteration and freeing of items
1403 * on the hash. rcu_read_lock is too dangerous here.
1404 */
1405 resched = ftrace_preempt_disable();
1406 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1407 if (entry->ip == ip)
1408 entry->ops->func(ip, parent_ip, &entry->data);
1409 }
1410 ftrace_preempt_enable(resched);
1411}
1412
1413static struct ftrace_ops trace_probe_ops __read_mostly =
1414{
1415 .func = function_trace_probe_call,
1416};
1417
1418static int ftrace_probe_registered;
1419
1420static void __enable_ftrace_function_probe(void)
1421{
1422 int i;
1423
1424 if (ftrace_probe_registered)
1425 return;
1426
1427 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1428 struct hlist_head *hhd = &ftrace_func_hash[i];
1429 if (hhd->first)
1430 break;
1431 }
1432 /* Nothing registered? */
1433 if (i == FTRACE_FUNC_HASHSIZE)
1434 return;
1435
1436 __register_ftrace_function(&trace_probe_ops);
1437 ftrace_startup(0);
1438 ftrace_probe_registered = 1;
1439}
1440
1441static void __disable_ftrace_function_probe(void)
1442{
1443 int i;
1444
1445 if (!ftrace_probe_registered)
1446 return;
1447
1448 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1449 struct hlist_head *hhd = &ftrace_func_hash[i];
1450 if (hhd->first)
1451 return;
1452 }
1453
1454 /* no more funcs left */
1455 __unregister_ftrace_function(&trace_probe_ops);
1456 ftrace_shutdown(0);
1457 ftrace_probe_registered = 0;
1458}
1459
1460
1461static void ftrace_free_entry_rcu(struct rcu_head *rhp)
1462{
1463 struct ftrace_func_probe *entry =
1464 container_of(rhp, struct ftrace_func_probe, rcu);
1465
1466 if (entry->ops->free)
1467 entry->ops->free(&entry->data);
1468 kfree(entry);
1469}
1470
1471
1472int
1473register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1474 void *data)
1475{
1476 struct ftrace_func_probe *entry;
1477 struct ftrace_page *pg;
1478 struct dyn_ftrace *rec;
1479 int type, len, not;
1480 unsigned long key;
1481 int count = 0;
1482 char *search;
1483
1484 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
1485 len = strlen(search);
1486
1487 /* we do not support '!' for function probes */
1488 if (WARN_ON(not))
1489 return -EINVAL;
1490
1491 mutex_lock(&ftrace_lock);
1492 do_for_each_ftrace_rec(pg, rec) {
1493
1494 if (rec->flags & FTRACE_FL_FAILED)
1495 continue;
1496
1497 if (!ftrace_match_record(rec, search, len, type))
1498 continue;
1499
1500 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
1501 if (!entry) {
1502 /* If we did not process any, then return error */
1503 if (!count)
1504 count = -ENOMEM;
1505 goto out_unlock;
1506 }
1507
1508 count++;
1509
1510 entry->data = data;
1511
1512 /*
1513 * The caller might want to do something special
1514 * for each function we find. We call the callback
1515 * to give the caller an opportunity to do so.
1516 */
1517 if (ops->callback) {
1518 if (ops->callback(rec->ip, &entry->data) < 0) {
1519 /* caller does not like this func */
1520 kfree(entry);
1090 continue; 1521 continue;
1091 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1092 switch (type) {
1093 case MATCH_FULL:
1094 if (strcmp(str, buff) == 0)
1095 matched = 1;
1096 break;
1097 case MATCH_FRONT_ONLY:
1098 if (memcmp(str, buff, match) == 0)
1099 matched = 1;
1100 break;
1101 case MATCH_MIDDLE_ONLY:
1102 if (strstr(str, search))
1103 matched = 1;
1104 break;
1105 case MATCH_END_ONLY:
1106 ptr = strstr(str, search);
1107 if (ptr && (ptr[search_len] == 0))
1108 matched = 1;
1109 break;
1110 } 1522 }
1111 if (matched) { 1523 }
1112 if (not) 1524
1113 rec->flags &= ~flag; 1525 entry->ops = ops;
1114 else 1526 entry->ip = rec->ip;
1115 rec->flags |= flag; 1527
1528 key = hash_long(entry->ip, FTRACE_HASH_BITS);
1529 hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
1530
1531 } while_for_each_ftrace_rec();
1532 __enable_ftrace_function_probe();
1533
1534 out_unlock:
1535 mutex_unlock(&ftrace_lock);
1536
1537 return count;
1538}
1539
1540enum {
1541 PROBE_TEST_FUNC = 1,
1542 PROBE_TEST_DATA = 2
1543};
1544
1545static void
1546__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1547 void *data, int flags)
1548{
1549 struct ftrace_func_probe *entry;
1550 struct hlist_node *n, *tmp;
1551 char str[KSYM_SYMBOL_LEN];
1552 int type = MATCH_FULL;
1553 int i, len = 0;
1554 char *search;
1555
1556 if (glob && (strcmp(glob, "*") || !strlen(glob)))
1557 glob = NULL;
1558 else {
1559 int not;
1560
1561 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
1562 len = strlen(search);
1563
1564 /* we do not support '!' for function probes */
1565 if (WARN_ON(not))
1566 return;
1567 }
1568
1569 mutex_lock(&ftrace_lock);
1570 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1571 struct hlist_head *hhd = &ftrace_func_hash[i];
1572
1573 hlist_for_each_entry_safe(entry, n, tmp, hhd, node) {
1574
1575 /* break up if statements for readability */
1576 if ((flags & PROBE_TEST_FUNC) && entry->ops != ops)
1577 continue;
1578
1579 if ((flags & PROBE_TEST_DATA) && entry->data != data)
1580 continue;
1581
1582 /* do this last, since it is the most expensive */
1583 if (glob) {
1584 kallsyms_lookup(entry->ip, NULL, NULL,
1585 NULL, str);
1586 if (!ftrace_match(str, glob, len, type))
1587 continue;
1116 } 1588 }
1589
1590 hlist_del(&entry->node);
1591 call_rcu(&entry->rcu, ftrace_free_entry_rcu);
1117 } 1592 }
1118 pg = pg->next;
1119 } 1593 }
1120 spin_unlock(&ftrace_lock); 1594 __disable_ftrace_function_probe();
1595 mutex_unlock(&ftrace_lock);
1596}
1597
1598void
1599unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1600 void *data)
1601{
1602 __unregister_ftrace_function_probe(glob, ops, data,
1603 PROBE_TEST_FUNC | PROBE_TEST_DATA);
1604}
1605
1606void
1607unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops)
1608{
1609 __unregister_ftrace_function_probe(glob, ops, NULL, PROBE_TEST_FUNC);
1610}
1611
1612void unregister_ftrace_function_probe_all(char *glob)
1613{
1614 __unregister_ftrace_function_probe(glob, NULL, NULL, 0);
1615}
1616
1617static LIST_HEAD(ftrace_commands);
1618static DEFINE_MUTEX(ftrace_cmd_mutex);
1619
1620int register_ftrace_command(struct ftrace_func_command *cmd)
1621{
1622 struct ftrace_func_command *p;
1623 int ret = 0;
1624
1625 mutex_lock(&ftrace_cmd_mutex);
1626 list_for_each_entry(p, &ftrace_commands, list) {
1627 if (strcmp(cmd->name, p->name) == 0) {
1628 ret = -EBUSY;
1629 goto out_unlock;
1630 }
1631 }
1632 list_add(&cmd->list, &ftrace_commands);
1633 out_unlock:
1634 mutex_unlock(&ftrace_cmd_mutex);
1635
1636 return ret;
1637}
1638
1639int unregister_ftrace_command(struct ftrace_func_command *cmd)
1640{
1641 struct ftrace_func_command *p, *n;
1642 int ret = -ENODEV;
1643
1644 mutex_lock(&ftrace_cmd_mutex);
1645 list_for_each_entry_safe(p, n, &ftrace_commands, list) {
1646 if (strcmp(cmd->name, p->name) == 0) {
1647 ret = 0;
1648 list_del_init(&p->list);
1649 goto out_unlock;
1650 }
1651 }
1652 out_unlock:
1653 mutex_unlock(&ftrace_cmd_mutex);
1654
1655 return ret;
1656}
1657
1658static int ftrace_process_regex(char *buff, int len, int enable)
1659{
1660 char *func, *command, *next = buff;
1661 struct ftrace_func_command *p;
1662 int ret = -EINVAL;
1663
1664 func = strsep(&next, ":");
1665
1666 if (!next) {
1667 ftrace_match_records(func, len, enable);
1668 return 0;
1669 }
1670
1671 /* command found */
1672
1673 command = strsep(&next, ":");
1674
1675 mutex_lock(&ftrace_cmd_mutex);
1676 list_for_each_entry(p, &ftrace_commands, list) {
1677 if (strcmp(p->name, command) == 0) {
1678 ret = p->func(func, command, next, enable);
1679 goto out_unlock;
1680 }
1681 }
1682 out_unlock:
1683 mutex_unlock(&ftrace_cmd_mutex);
1684
1685 return ret;
1121} 1686}
1122 1687
1123static ssize_t 1688static ssize_t
@@ -1187,7 +1752,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
1187 if (isspace(ch)) { 1752 if (isspace(ch)) {
1188 iter->filtered++; 1753 iter->filtered++;
1189 iter->buffer[iter->buffer_idx] = 0; 1754 iter->buffer[iter->buffer_idx] = 0;
1190 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1755 ret = ftrace_process_regex(iter->buffer,
1756 iter->buffer_idx, enable);
1757 if (ret)
1758 goto out;
1191 iter->buffer_idx = 0; 1759 iter->buffer_idx = 0;
1192 } else 1760 } else
1193 iter->flags |= FTRACE_ITER_CONT; 1761 iter->flags |= FTRACE_ITER_CONT;
@@ -1226,7 +1794,7 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1226 if (reset) 1794 if (reset)
1227 ftrace_filter_reset(enable); 1795 ftrace_filter_reset(enable);
1228 if (buf) 1796 if (buf)
1229 ftrace_match(buf, len, enable); 1797 ftrace_match_records(buf, len, enable);
1230 mutex_unlock(&ftrace_regex_lock); 1798 mutex_unlock(&ftrace_regex_lock);
1231} 1799}
1232 1800
@@ -1276,15 +1844,13 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1276 if (iter->buffer_idx) { 1844 if (iter->buffer_idx) {
1277 iter->filtered++; 1845 iter->filtered++;
1278 iter->buffer[iter->buffer_idx] = 0; 1846 iter->buffer[iter->buffer_idx] = 0;
1279 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1847 ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
1280 } 1848 }
1281 1849
1282 mutex_lock(&ftrace_sysctl_lock); 1850 mutex_lock(&ftrace_lock);
1283 mutex_lock(&ftrace_start_lock);
1284 if (ftrace_start_up && ftrace_enabled) 1851 if (ftrace_start_up && ftrace_enabled)
1285 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1852 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1286 mutex_unlock(&ftrace_start_lock); 1853 mutex_unlock(&ftrace_lock);
1287 mutex_unlock(&ftrace_sysctl_lock);
1288 1854
1289 kfree(iter); 1855 kfree(iter);
1290 mutex_unlock(&ftrace_regex_lock); 1856 mutex_unlock(&ftrace_regex_lock);
@@ -1360,6 +1926,10 @@ static void *g_start(struct seq_file *m, loff_t *pos)
1360 1926
1361 mutex_lock(&graph_lock); 1927 mutex_lock(&graph_lock);
1362 1928
1929 /* Nothing, tell g_show to print all functions are enabled */
1930 if (!ftrace_graph_count && !*pos)
1931 return (void *)1;
1932
1363 p = g_next(m, p, pos); 1933 p = g_next(m, p, pos);
1364 1934
1365 return p; 1935 return p;
@@ -1378,6 +1948,11 @@ static int g_show(struct seq_file *m, void *v)
1378 if (!ptr) 1948 if (!ptr)
1379 return 0; 1949 return 0;
1380 1950
1951 if (ptr == (unsigned long *)1) {
1952 seq_printf(m, "#### all functions enabled ####\n");
1953 return 0;
1954 }
1955
1381 kallsyms_lookup(*ptr, NULL, NULL, NULL, str); 1956 kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
1382 1957
1383 seq_printf(m, "%s\n", str); 1958 seq_printf(m, "%s\n", str);
@@ -1431,42 +2006,52 @@ ftrace_graph_read(struct file *file, char __user *ubuf,
1431} 2006}
1432 2007
1433static int 2008static int
1434ftrace_set_func(unsigned long *array, int idx, char *buffer) 2009ftrace_set_func(unsigned long *array, int *idx, char *buffer)
1435{ 2010{
1436 char str[KSYM_SYMBOL_LEN];
1437 struct dyn_ftrace *rec; 2011 struct dyn_ftrace *rec;
1438 struct ftrace_page *pg; 2012 struct ftrace_page *pg;
2013 int search_len;
1439 int found = 0; 2014 int found = 0;
1440 int i, j; 2015 int type, not;
2016 char *search;
2017 bool exists;
2018 int i;
1441 2019
1442 if (ftrace_disabled) 2020 if (ftrace_disabled)
1443 return -ENODEV; 2021 return -ENODEV;
1444 2022
1445 /* should not be called from interrupt context */ 2023 /* decode regex */
1446 spin_lock(&ftrace_lock); 2024 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not);
2025 if (not)
2026 return -EINVAL;
1447 2027
1448 for (pg = ftrace_pages_start; pg; pg = pg->next) { 2028 search_len = strlen(search);
1449 for (i = 0; i < pg->index; i++) {
1450 rec = &pg->records[i];
1451 2029
1452 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2030 mutex_lock(&ftrace_lock);
1453 continue; 2031 do_for_each_ftrace_rec(pg, rec) {
2032
2033 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2034 break;
2035
2036 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2037 continue;
1454 2038
1455 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 2039 if (ftrace_match_record(rec, search, search_len, type)) {
1456 if (strcmp(str, buffer) == 0) { 2040 /* ensure it is not already in the array */
2041 exists = false;
2042 for (i = 0; i < *idx; i++)
2043 if (array[i] == rec->ip) {
2044 exists = true;
2045 break;
2046 }
2047 if (!exists) {
2048 array[(*idx)++] = rec->ip;
1457 found = 1; 2049 found = 1;
1458 for (j = 0; j < idx; j++)
1459 if (array[j] == rec->ip) {
1460 found = 0;
1461 break;
1462 }
1463 if (found)
1464 array[idx] = rec->ip;
1465 break;
1466 } 2050 }
1467 } 2051 }
1468 } 2052 } while_for_each_ftrace_rec();
1469 spin_unlock(&ftrace_lock); 2053
2054 mutex_unlock(&ftrace_lock);
1470 2055
1471 return found ? 0 : -EINVAL; 2056 return found ? 0 : -EINVAL;
1472} 2057}
@@ -1534,13 +2119,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
1534 } 2119 }
1535 buffer[index] = 0; 2120 buffer[index] = 0;
1536 2121
1537 /* we allow only one at a time */ 2122 /* we allow only one expression at a time */
1538 ret = ftrace_set_func(array, ftrace_graph_count, buffer); 2123 ret = ftrace_set_func(array, &ftrace_graph_count, buffer);
1539 if (ret) 2124 if (ret)
1540 goto out; 2125 goto out;
1541 2126
1542 ftrace_graph_count++;
1543
1544 file->f_pos += read; 2127 file->f_pos += read;
1545 2128
1546 ret = read; 2129 ret = read;
@@ -1604,7 +2187,7 @@ static int ftrace_convert_nops(struct module *mod,
1604 unsigned long addr; 2187 unsigned long addr;
1605 unsigned long flags; 2188 unsigned long flags;
1606 2189
1607 mutex_lock(&ftrace_start_lock); 2190 mutex_lock(&ftrace_lock);
1608 p = start; 2191 p = start;
1609 while (p < end) { 2192 while (p < end) {
1610 addr = ftrace_call_adjust(*p++); 2193 addr = ftrace_call_adjust(*p++);
@@ -1623,7 +2206,7 @@ static int ftrace_convert_nops(struct module *mod,
1623 local_irq_save(flags); 2206 local_irq_save(flags);
1624 ftrace_update_code(mod); 2207 ftrace_update_code(mod);
1625 local_irq_restore(flags); 2208 local_irq_restore(flags);
1626 mutex_unlock(&ftrace_start_lock); 2209 mutex_unlock(&ftrace_lock);
1627 2210
1628 return 0; 2211 return 0;
1629} 2212}
@@ -1796,7 +2379,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
1796 if (ret < 0) 2379 if (ret < 0)
1797 return ret; 2380 return ret;
1798 2381
1799 mutex_lock(&ftrace_start_lock); 2382 mutex_lock(&ftrace_lock);
1800 if (val < 0) { 2383 if (val < 0) {
1801 /* disable pid tracing */ 2384 /* disable pid tracing */
1802 if (!ftrace_pid_trace) 2385 if (!ftrace_pid_trace)
@@ -1835,7 +2418,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
1835 ftrace_startup_enable(0); 2418 ftrace_startup_enable(0);
1836 2419
1837 out: 2420 out:
1838 mutex_unlock(&ftrace_start_lock); 2421 mutex_unlock(&ftrace_lock);
1839 2422
1840 return cnt; 2423 return cnt;
1841} 2424}
@@ -1863,7 +2446,6 @@ static __init int ftrace_init_debugfs(void)
1863 "'set_ftrace_pid' entry\n"); 2446 "'set_ftrace_pid' entry\n");
1864 return 0; 2447 return 0;
1865} 2448}
1866
1867fs_initcall(ftrace_init_debugfs); 2449fs_initcall(ftrace_init_debugfs);
1868 2450
1869/** 2451/**
@@ -1898,17 +2480,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
1898 if (unlikely(ftrace_disabled)) 2480 if (unlikely(ftrace_disabled))
1899 return -1; 2481 return -1;
1900 2482
1901 mutex_lock(&ftrace_sysctl_lock); 2483 mutex_lock(&ftrace_lock);
1902 2484
1903 ret = __register_ftrace_function(ops); 2485 ret = __register_ftrace_function(ops);
1904 ftrace_startup(0); 2486 ftrace_startup(0);
1905 2487
1906 mutex_unlock(&ftrace_sysctl_lock); 2488 mutex_unlock(&ftrace_lock);
1907 return ret; 2489 return ret;
1908} 2490}
1909 2491
1910/** 2492/**
1911 * unregister_ftrace_function - unresgister a function for profiling. 2493 * unregister_ftrace_function - unregister a function for profiling.
1912 * @ops - ops structure that holds the function to unregister 2494 * @ops - ops structure that holds the function to unregister
1913 * 2495 *
1914 * Unregister a function that was added to be called by ftrace profiling. 2496 * Unregister a function that was added to be called by ftrace profiling.
@@ -1917,10 +2499,10 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
1917{ 2499{
1918 int ret; 2500 int ret;
1919 2501
1920 mutex_lock(&ftrace_sysctl_lock); 2502 mutex_lock(&ftrace_lock);
1921 ret = __unregister_ftrace_function(ops); 2503 ret = __unregister_ftrace_function(ops);
1922 ftrace_shutdown(0); 2504 ftrace_shutdown(0);
1923 mutex_unlock(&ftrace_sysctl_lock); 2505 mutex_unlock(&ftrace_lock);
1924 2506
1925 return ret; 2507 return ret;
1926} 2508}
@@ -1935,7 +2517,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1935 if (unlikely(ftrace_disabled)) 2517 if (unlikely(ftrace_disabled))
1936 return -ENODEV; 2518 return -ENODEV;
1937 2519
1938 mutex_lock(&ftrace_sysctl_lock); 2520 mutex_lock(&ftrace_lock);
1939 2521
1940 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 2522 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1941 2523
@@ -1964,7 +2546,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1964 } 2546 }
1965 2547
1966 out: 2548 out:
1967 mutex_unlock(&ftrace_sysctl_lock); 2549 mutex_unlock(&ftrace_lock);
1968 return ret; 2550 return ret;
1969} 2551}
1970 2552
@@ -2080,7 +2662,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2080{ 2662{
2081 int ret = 0; 2663 int ret = 0;
2082 2664
2083 mutex_lock(&ftrace_sysctl_lock); 2665 mutex_lock(&ftrace_lock);
2084 2666
2085 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; 2667 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2086 register_pm_notifier(&ftrace_suspend_notifier); 2668 register_pm_notifier(&ftrace_suspend_notifier);
@@ -2098,13 +2680,13 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2098 ftrace_startup(FTRACE_START_FUNC_RET); 2680 ftrace_startup(FTRACE_START_FUNC_RET);
2099 2681
2100out: 2682out:
2101 mutex_unlock(&ftrace_sysctl_lock); 2683 mutex_unlock(&ftrace_lock);
2102 return ret; 2684 return ret;
2103} 2685}
2104 2686
2105void unregister_ftrace_graph(void) 2687void unregister_ftrace_graph(void)
2106{ 2688{
2107 mutex_lock(&ftrace_sysctl_lock); 2689 mutex_lock(&ftrace_lock);
2108 2690
2109 atomic_dec(&ftrace_graph_active); 2691 atomic_dec(&ftrace_graph_active);
2110 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2692 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
@@ -2112,7 +2694,7 @@ void unregister_ftrace_graph(void)
2112 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2694 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2113 unregister_pm_notifier(&ftrace_suspend_notifier); 2695 unregister_pm_notifier(&ftrace_suspend_notifier);
2114 2696
2115 mutex_unlock(&ftrace_sysctl_lock); 2697 mutex_unlock(&ftrace_lock);
2116} 2698}
2117 2699
2118/* Allocate a return stack for newly created task */ 2700/* Allocate a return stack for newly created task */
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644
index 000000000000..ae201b3eda89
--- /dev/null
+++ b/kernel/trace/kmemtrace.c
@@ -0,0 +1,339 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/dcache.h>
10#include <linux/debugfs.h>
11#include <linux/fs.h>
12#include <linux/seq_file.h>
13#include <trace/kmemtrace.h>
14
15#include "trace.h"
16#include "trace_output.h"
17
18/* Select an alternative, minimalistic output than the original one */
19#define TRACE_KMEM_OPT_MINIMAL 0x1
20
21static struct tracer_opt kmem_opts[] = {
22 /* Default disable the minimalistic output */
23 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
24 { }
25};
26
27static struct tracer_flags kmem_tracer_flags = {
28 .val = 0,
29 .opts = kmem_opts
30};
31
32
33static bool kmem_tracing_enabled __read_mostly;
34static struct trace_array *kmemtrace_array;
35
36static int kmem_trace_init(struct trace_array *tr)
37{
38 int cpu;
39 kmemtrace_array = tr;
40
41 for_each_cpu_mask(cpu, cpu_possible_map)
42 tracing_reset(tr, cpu);
43
44 kmem_tracing_enabled = true;
45
46 return 0;
47}
48
49static void kmem_trace_reset(struct trace_array *tr)
50{
51 kmem_tracing_enabled = false;
52}
53
54static void kmemtrace_headers(struct seq_file *s)
55{
56 /* Don't need headers for the original kmemtrace output */
57 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
58 return;
59
60 seq_printf(s, "#\n");
61 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
62 " POINTER NODE CALLER\n");
63 seq_printf(s, "# FREE | | | | "
64 " | | | |\n");
65 seq_printf(s, "# |\n\n");
66}
67
68/*
69 * The two following functions give the original output from kmemtrace,
70 * or something close to....perhaps they need some missing things
71 */
72static enum print_line_t
73kmemtrace_print_alloc_original(struct trace_iterator *iter,
74 struct kmemtrace_alloc_entry *entry)
75{
76 struct trace_seq *s = &iter->seq;
77 int ret;
78
79 /* Taken from the old linux/kmemtrace.h */
80 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
81 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
82 entry->type_id, entry->call_site, (unsigned long) entry->ptr,
83 (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
84 (unsigned long) entry->gfp_flags, entry->node);
85
86 if (!ret)
87 return TRACE_TYPE_PARTIAL_LINE;
88
89 return TRACE_TYPE_HANDLED;
90}
91
92static enum print_line_t
93kmemtrace_print_free_original(struct trace_iterator *iter,
94 struct kmemtrace_free_entry *entry)
95{
96 struct trace_seq *s = &iter->seq;
97 int ret;
98
99 /* Taken from the old linux/kmemtrace.h */
100 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
101 entry->type_id, entry->call_site, (unsigned long) entry->ptr);
102
103 if (!ret)
104 return TRACE_TYPE_PARTIAL_LINE;
105
106 return TRACE_TYPE_HANDLED;
107}
108
109
110/* The two other following provide a more minimalistic output */
111static enum print_line_t
112kmemtrace_print_alloc_compress(struct trace_iterator *iter,
113 struct kmemtrace_alloc_entry *entry)
114{
115 struct trace_seq *s = &iter->seq;
116 int ret;
117
118 /* Alloc entry */
119 ret = trace_seq_printf(s, " + ");
120 if (!ret)
121 return TRACE_TYPE_PARTIAL_LINE;
122
123 /* Type */
124 switch (entry->type_id) {
125 case KMEMTRACE_TYPE_KMALLOC:
126 ret = trace_seq_printf(s, "K ");
127 break;
128 case KMEMTRACE_TYPE_CACHE:
129 ret = trace_seq_printf(s, "C ");
130 break;
131 case KMEMTRACE_TYPE_PAGES:
132 ret = trace_seq_printf(s, "P ");
133 break;
134 default:
135 ret = trace_seq_printf(s, "? ");
136 }
137
138 if (!ret)
139 return TRACE_TYPE_PARTIAL_LINE;
140
141 /* Requested */
142 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
143 if (!ret)
144 return TRACE_TYPE_PARTIAL_LINE;
145
146 /* Allocated */
147 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
148 if (!ret)
149 return TRACE_TYPE_PARTIAL_LINE;
150
151 /* Flags
152 * TODO: would be better to see the name of the GFP flag names
153 */
154 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
155 if (!ret)
156 return TRACE_TYPE_PARTIAL_LINE;
157
158 /* Pointer to allocated */
159 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
160 if (!ret)
161 return TRACE_TYPE_PARTIAL_LINE;
162
163 /* Node */
164 ret = trace_seq_printf(s, "%4d ", entry->node);
165 if (!ret)
166 return TRACE_TYPE_PARTIAL_LINE;
167
168 /* Call site */
169 ret = seq_print_ip_sym(s, entry->call_site, 0);
170 if (!ret)
171 return TRACE_TYPE_PARTIAL_LINE;
172
173 if (!trace_seq_printf(s, "\n"))
174 return TRACE_TYPE_PARTIAL_LINE;
175
176 return TRACE_TYPE_HANDLED;
177}
178
179static enum print_line_t
180kmemtrace_print_free_compress(struct trace_iterator *iter,
181 struct kmemtrace_free_entry *entry)
182{
183 struct trace_seq *s = &iter->seq;
184 int ret;
185
186 /* Free entry */
187 ret = trace_seq_printf(s, " - ");
188 if (!ret)
189 return TRACE_TYPE_PARTIAL_LINE;
190
191 /* Type */
192 switch (entry->type_id) {
193 case KMEMTRACE_TYPE_KMALLOC:
194 ret = trace_seq_printf(s, "K ");
195 break;
196 case KMEMTRACE_TYPE_CACHE:
197 ret = trace_seq_printf(s, "C ");
198 break;
199 case KMEMTRACE_TYPE_PAGES:
200 ret = trace_seq_printf(s, "P ");
201 break;
202 default:
203 ret = trace_seq_printf(s, "? ");
204 }
205
206 if (!ret)
207 return TRACE_TYPE_PARTIAL_LINE;
208
209 /* Skip requested/allocated/flags */
210 ret = trace_seq_printf(s, " ");
211 if (!ret)
212 return TRACE_TYPE_PARTIAL_LINE;
213
214 /* Pointer to allocated */
215 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
216 if (!ret)
217 return TRACE_TYPE_PARTIAL_LINE;
218
219 /* Skip node */
220 ret = trace_seq_printf(s, " ");
221 if (!ret)
222 return TRACE_TYPE_PARTIAL_LINE;
223
224 /* Call site */
225 ret = seq_print_ip_sym(s, entry->call_site, 0);
226 if (!ret)
227 return TRACE_TYPE_PARTIAL_LINE;
228
229 if (!trace_seq_printf(s, "\n"))
230 return TRACE_TYPE_PARTIAL_LINE;
231
232 return TRACE_TYPE_HANDLED;
233}
234
235static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
236{
237 struct trace_entry *entry = iter->ent;
238
239 switch (entry->type) {
240 case TRACE_KMEM_ALLOC: {
241 struct kmemtrace_alloc_entry *field;
242 trace_assign_type(field, entry);
243 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
244 return kmemtrace_print_alloc_compress(iter, field);
245 else
246 return kmemtrace_print_alloc_original(iter, field);
247 }
248
249 case TRACE_KMEM_FREE: {
250 struct kmemtrace_free_entry *field;
251 trace_assign_type(field, entry);
252 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
253 return kmemtrace_print_free_compress(iter, field);
254 else
255 return kmemtrace_print_free_original(iter, field);
256 }
257
258 default:
259 return TRACE_TYPE_UNHANDLED;
260 }
261}
262
263/* Trace allocations */
264void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
265 unsigned long call_site,
266 const void *ptr,
267 size_t bytes_req,
268 size_t bytes_alloc,
269 gfp_t gfp_flags,
270 int node)
271{
272 struct ring_buffer_event *event;
273 struct kmemtrace_alloc_entry *entry;
274 struct trace_array *tr = kmemtrace_array;
275
276 if (!kmem_tracing_enabled)
277 return;
278
279 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
280 sizeof(*entry), 0, 0);
281 if (!event)
282 return;
283 entry = ring_buffer_event_data(event);
284
285 entry->call_site = call_site;
286 entry->ptr = ptr;
287 entry->bytes_req = bytes_req;
288 entry->bytes_alloc = bytes_alloc;
289 entry->gfp_flags = gfp_flags;
290 entry->node = node;
291
292 trace_buffer_unlock_commit(tr, event, 0, 0);
293}
294EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
295
296void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
297 unsigned long call_site,
298 const void *ptr)
299{
300 struct ring_buffer_event *event;
301 struct kmemtrace_free_entry *entry;
302 struct trace_array *tr = kmemtrace_array;
303
304 if (!kmem_tracing_enabled)
305 return;
306
307 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
308 sizeof(*entry), 0, 0);
309 if (!event)
310 return;
311 entry = ring_buffer_event_data(event);
312 entry->type_id = type_id;
313 entry->call_site = call_site;
314 entry->ptr = ptr;
315
316 trace_buffer_unlock_commit(tr, event, 0, 0);
317}
318EXPORT_SYMBOL(kmemtrace_mark_free);
319
320static struct tracer kmem_tracer __read_mostly = {
321 .name = "kmemtrace",
322 .init = kmem_trace_init,
323 .reset = kmem_trace_reset,
324 .print_line = kmemtrace_print_line,
325 .print_header = kmemtrace_headers,
326 .flags = &kmem_tracer_flags
327};
328
329void kmemtrace_init(void)
330{
331 /* earliest opportunity to start kmem tracing */
332}
333
334static int __init init_kmem_tracer(void)
335{
336 return register_tracer(&kmem_tracer);
337}
338
339device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd38c5cfd8ad..a8c275c01e83 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,13 +4,15 @@
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h>
8#include <linux/ftrace_irq.h>
7#include <linux/spinlock.h> 9#include <linux/spinlock.h>
8#include <linux/debugfs.h> 10#include <linux/debugfs.h>
9#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/hardirq.h>
10#include <linux/module.h> 13#include <linux/module.h>
11#include <linux/percpu.h> 14#include <linux/percpu.h>
12#include <linux/mutex.h> 15#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/hash.h> 17#include <linux/hash.h>
16#include <linux/list.h> 18#include <linux/list.h>
@@ -57,7 +59,7 @@ enum {
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, 59 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58}; 60};
59 61
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 62static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
61 63
62/** 64/**
63 * tracing_on - enable all tracing buffers 65 * tracing_on - enable all tracing buffers
@@ -89,26 +91,34 @@ EXPORT_SYMBOL_GPL(tracing_off);
89 * tracing_off_permanent - permanently disable ring buffers 91 * tracing_off_permanent - permanently disable ring buffers
90 * 92 *
91 * This function, once called, will disable all ring buffers 93 * This function, once called, will disable all ring buffers
92 * permanenty. 94 * permanently.
93 */ 95 */
94void tracing_off_permanent(void) 96void tracing_off_permanent(void)
95{ 97{
96 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 98 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
97} 99}
98 100
101/**
102 * tracing_is_on - show state of ring buffers enabled
103 */
104int tracing_is_on(void)
105{
106 return ring_buffer_flags == RB_BUFFERS_ON;
107}
108EXPORT_SYMBOL_GPL(tracing_is_on);
109
99#include "trace.h" 110#include "trace.h"
100 111
101/* Up this if you want to test the TIME_EXTENTS and normalization */ 112/* Up this if you want to test the TIME_EXTENTS and normalization */
102#define DEBUG_SHIFT 0 113#define DEBUG_SHIFT 0
103 114
104/* FIXME!!! */
105u64 ring_buffer_time_stamp(int cpu) 115u64 ring_buffer_time_stamp(int cpu)
106{ 116{
107 u64 time; 117 u64 time;
108 118
109 preempt_disable_notrace(); 119 preempt_disable_notrace();
110 /* shift to debug/test normalization and TIME_EXTENTS */ 120 /* shift to debug/test normalization and TIME_EXTENTS */
111 time = sched_clock() << DEBUG_SHIFT; 121 time = trace_clock_local() << DEBUG_SHIFT;
112 preempt_enable_no_resched_notrace(); 122 preempt_enable_no_resched_notrace();
113 123
114 return time; 124 return time;
@@ -123,8 +133,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
123EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 133EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
124 134
125#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 135#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
126#define RB_ALIGNMENT_SHIFT 2 136#define RB_ALIGNMENT 4U
127#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
128#define RB_MAX_SMALL_DATA 28 137#define RB_MAX_SMALL_DATA 28
129 138
130enum { 139enum {
@@ -133,7 +142,7 @@ enum {
133}; 142};
134 143
135/* inline for ring buffer fast paths */ 144/* inline for ring buffer fast paths */
136static inline unsigned 145static unsigned
137rb_event_length(struct ring_buffer_event *event) 146rb_event_length(struct ring_buffer_event *event)
138{ 147{
139 unsigned length; 148 unsigned length;
@@ -151,7 +160,7 @@ rb_event_length(struct ring_buffer_event *event)
151 160
152 case RINGBUF_TYPE_DATA: 161 case RINGBUF_TYPE_DATA:
153 if (event->len) 162 if (event->len)
154 length = event->len << RB_ALIGNMENT_SHIFT; 163 length = event->len * RB_ALIGNMENT;
155 else 164 else
156 length = event->array[0]; 165 length = event->array[0];
157 return length + RB_EVNT_HDR_SIZE; 166 return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +188,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
179EXPORT_SYMBOL_GPL(ring_buffer_event_length); 188EXPORT_SYMBOL_GPL(ring_buffer_event_length);
180 189
181/* inline for ring buffer fast paths */ 190/* inline for ring buffer fast paths */
182static inline void * 191static void *
183rb_event_data(struct ring_buffer_event *event) 192rb_event_data(struct ring_buffer_event *event)
184{ 193{
185 BUG_ON(event->type != RINGBUF_TYPE_DATA); 194 BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -209,7 +218,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
209 218
210struct buffer_data_page { 219struct buffer_data_page {
211 u64 time_stamp; /* page time stamp */ 220 u64 time_stamp; /* page time stamp */
212 local_t commit; /* write commited index */ 221 local_t commit; /* write committed index */
213 unsigned char data[]; /* data of buffer page */ 222 unsigned char data[]; /* data of buffer page */
214}; 223};
215 224
@@ -229,10 +238,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
229 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 238 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
230 * this issue out. 239 * this issue out.
231 */ 240 */
232static inline void free_buffer_page(struct buffer_page *bpage) 241static void free_buffer_page(struct buffer_page *bpage)
233{ 242{
234 if (bpage->page) 243 free_page((unsigned long)bpage->page);
235 free_page((unsigned long)bpage->page);
236 kfree(bpage); 244 kfree(bpage);
237} 245}
238 246
@@ -260,7 +268,7 @@ struct ring_buffer_per_cpu {
260 struct list_head pages; 268 struct list_head pages;
261 struct buffer_page *head_page; /* read from head */ 269 struct buffer_page *head_page; /* read from head */
262 struct buffer_page *tail_page; /* write to tail */ 270 struct buffer_page *tail_page; /* write to tail */
263 struct buffer_page *commit_page; /* commited pages */ 271 struct buffer_page *commit_page; /* committed pages */
264 struct buffer_page *reader_page; 272 struct buffer_page *reader_page;
265 unsigned long overrun; 273 unsigned long overrun;
266 unsigned long entries; 274 unsigned long entries;
@@ -273,8 +281,8 @@ struct ring_buffer {
273 unsigned pages; 281 unsigned pages;
274 unsigned flags; 282 unsigned flags;
275 int cpus; 283 int cpus;
276 cpumask_var_t cpumask;
277 atomic_t record_disabled; 284 atomic_t record_disabled;
285 cpumask_var_t cpumask;
278 286
279 struct mutex mutex; 287 struct mutex mutex;
280 288
@@ -303,7 +311,7 @@ struct ring_buffer_iter {
303 * check_pages - integrity check of buffer pages 311 * check_pages - integrity check of buffer pages
304 * @cpu_buffer: CPU buffer with pages to test 312 * @cpu_buffer: CPU buffer with pages to test
305 * 313 *
306 * As a safty measure we check to make sure the data pages have not 314 * As a safety measure we check to make sure the data pages have not
307 * been corrupted. 315 * been corrupted.
308 */ 316 */
309static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 317static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
@@ -811,7 +819,7 @@ rb_event_index(struct ring_buffer_event *event)
811 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 819 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
812} 820}
813 821
814static inline int 822static int
815rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 823rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
816 struct ring_buffer_event *event) 824 struct ring_buffer_event *event)
817{ 825{
@@ -825,7 +833,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
825 rb_commit_index(cpu_buffer) == index; 833 rb_commit_index(cpu_buffer) == index;
826} 834}
827 835
828static inline void 836static void
829rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, 837rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
830 struct ring_buffer_event *event) 838 struct ring_buffer_event *event)
831{ 839{
@@ -850,7 +858,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
850 local_set(&cpu_buffer->commit_page->page->commit, index); 858 local_set(&cpu_buffer->commit_page->page->commit, index);
851} 859}
852 860
853static inline void 861static void
854rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 862rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
855{ 863{
856 /* 864 /*
@@ -896,7 +904,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
896 cpu_buffer->reader_page->read = 0; 904 cpu_buffer->reader_page->read = 0;
897} 905}
898 906
899static inline void rb_inc_iter(struct ring_buffer_iter *iter) 907static void rb_inc_iter(struct ring_buffer_iter *iter)
900{ 908{
901 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 909 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
902 910
@@ -926,7 +934,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
926 * and with this, we can determine what to place into the 934 * and with this, we can determine what to place into the
927 * data field. 935 * data field.
928 */ 936 */
929static inline void 937static void
930rb_update_event(struct ring_buffer_event *event, 938rb_update_event(struct ring_buffer_event *event,
931 unsigned type, unsigned length) 939 unsigned type, unsigned length)
932{ 940{
@@ -938,15 +946,11 @@ rb_update_event(struct ring_buffer_event *event,
938 break; 946 break;
939 947
940 case RINGBUF_TYPE_TIME_EXTEND: 948 case RINGBUF_TYPE_TIME_EXTEND:
941 event->len = 949 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
942 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
943 >> RB_ALIGNMENT_SHIFT;
944 break; 950 break;
945 951
946 case RINGBUF_TYPE_TIME_STAMP: 952 case RINGBUF_TYPE_TIME_STAMP:
947 event->len = 953 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
948 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
949 >> RB_ALIGNMENT_SHIFT;
950 break; 954 break;
951 955
952 case RINGBUF_TYPE_DATA: 956 case RINGBUF_TYPE_DATA:
@@ -955,16 +959,14 @@ rb_update_event(struct ring_buffer_event *event,
955 event->len = 0; 959 event->len = 0;
956 event->array[0] = length; 960 event->array[0] = length;
957 } else 961 } else
958 event->len = 962 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
959 (length + (RB_ALIGNMENT-1))
960 >> RB_ALIGNMENT_SHIFT;
961 break; 963 break;
962 default: 964 default:
963 BUG(); 965 BUG();
964 } 966 }
965} 967}
966 968
967static inline unsigned rb_calculate_event_length(unsigned length) 969static unsigned rb_calculate_event_length(unsigned length)
968{ 970{
969 struct ring_buffer_event event; /* Used only for sizeof array */ 971 struct ring_buffer_event event; /* Used only for sizeof array */
970 972
@@ -990,6 +992,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
990 struct ring_buffer *buffer = cpu_buffer->buffer; 992 struct ring_buffer *buffer = cpu_buffer->buffer;
991 struct ring_buffer_event *event; 993 struct ring_buffer_event *event;
992 unsigned long flags; 994 unsigned long flags;
995 bool lock_taken = false;
993 996
994 commit_page = cpu_buffer->commit_page; 997 commit_page = cpu_buffer->commit_page;
995 /* we just need to protect against interrupts */ 998 /* we just need to protect against interrupts */
@@ -1003,7 +1006,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1003 struct buffer_page *next_page = tail_page; 1006 struct buffer_page *next_page = tail_page;
1004 1007
1005 local_irq_save(flags); 1008 local_irq_save(flags);
1006 __raw_spin_lock(&cpu_buffer->lock); 1009 /*
1010 * Since the write to the buffer is still not
1011 * fully lockless, we must be careful with NMIs.
1012 * The locks in the writers are taken when a write
1013 * crosses to a new page. The locks protect against
1014 * races with the readers (this will soon be fixed
1015 * with a lockless solution).
1016 *
1017 * Because we can not protect against NMIs, and we
1018 * want to keep traces reentrant, we need to manage
1019 * what happens when we are in an NMI.
1020 *
1021 * NMIs can happen after we take the lock.
1022 * If we are in an NMI, only take the lock
1023 * if it is not already taken. Otherwise
1024 * simply fail.
1025 */
1026 if (unlikely(in_nmi())) {
1027 if (!__raw_spin_trylock(&cpu_buffer->lock))
1028 goto out_reset;
1029 } else
1030 __raw_spin_lock(&cpu_buffer->lock);
1031
1032 lock_taken = true;
1007 1033
1008 rb_inc_page(cpu_buffer, &next_page); 1034 rb_inc_page(cpu_buffer, &next_page);
1009 1035
@@ -1012,7 +1038,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1012 1038
1013 /* we grabbed the lock before incrementing */ 1039 /* we grabbed the lock before incrementing */
1014 if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) 1040 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1015 goto out_unlock; 1041 goto out_reset;
1016 1042
1017 /* 1043 /*
1018 * If for some reason, we had an interrupt storm that made 1044 * If for some reason, we had an interrupt storm that made
@@ -1021,12 +1047,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1021 */ 1047 */
1022 if (unlikely(next_page == commit_page)) { 1048 if (unlikely(next_page == commit_page)) {
1023 WARN_ON_ONCE(1); 1049 WARN_ON_ONCE(1);
1024 goto out_unlock; 1050 goto out_reset;
1025 } 1051 }
1026 1052
1027 if (next_page == head_page) { 1053 if (next_page == head_page) {
1028 if (!(buffer->flags & RB_FL_OVERWRITE)) 1054 if (!(buffer->flags & RB_FL_OVERWRITE))
1029 goto out_unlock; 1055 goto out_reset;
1030 1056
1031 /* tail_page has not moved yet? */ 1057 /* tail_page has not moved yet? */
1032 if (tail_page == cpu_buffer->tail_page) { 1058 if (tail_page == cpu_buffer->tail_page) {
@@ -1100,12 +1126,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1100 1126
1101 return event; 1127 return event;
1102 1128
1103 out_unlock: 1129 out_reset:
1104 /* reset write */ 1130 /* reset write */
1105 if (tail <= BUF_PAGE_SIZE) 1131 if (tail <= BUF_PAGE_SIZE)
1106 local_set(&tail_page->write, tail); 1132 local_set(&tail_page->write, tail);
1107 1133
1108 __raw_spin_unlock(&cpu_buffer->lock); 1134 if (likely(lock_taken))
1135 __raw_spin_unlock(&cpu_buffer->lock);
1109 local_irq_restore(flags); 1136 local_irq_restore(flags);
1110 return NULL; 1137 return NULL;
1111} 1138}
@@ -1265,7 +1292,6 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1265 * ring_buffer_lock_reserve - reserve a part of the buffer 1292 * ring_buffer_lock_reserve - reserve a part of the buffer
1266 * @buffer: the ring buffer to reserve from 1293 * @buffer: the ring buffer to reserve from
1267 * @length: the length of the data to reserve (excluding event header) 1294 * @length: the length of the data to reserve (excluding event header)
1268 * @flags: a pointer to save the interrupt flags
1269 * 1295 *
1270 * Returns a reseverd event on the ring buffer to copy directly to. 1296 * Returns a reseverd event on the ring buffer to copy directly to.
1271 * The user of this interface will need to get the body to write into 1297 * The user of this interface will need to get the body to write into
@@ -1278,9 +1304,7 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1278 * If NULL is returned, then nothing has been allocated or locked. 1304 * If NULL is returned, then nothing has been allocated or locked.
1279 */ 1305 */
1280struct ring_buffer_event * 1306struct ring_buffer_event *
1281ring_buffer_lock_reserve(struct ring_buffer *buffer, 1307ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1282 unsigned long length,
1283 unsigned long *flags)
1284{ 1308{
1285 struct ring_buffer_per_cpu *cpu_buffer; 1309 struct ring_buffer_per_cpu *cpu_buffer;
1286 struct ring_buffer_event *event; 1310 struct ring_buffer_event *event;
@@ -1347,15 +1371,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1347 * ring_buffer_unlock_commit - commit a reserved 1371 * ring_buffer_unlock_commit - commit a reserved
1348 * @buffer: The buffer to commit to 1372 * @buffer: The buffer to commit to
1349 * @event: The event pointer to commit. 1373 * @event: The event pointer to commit.
1350 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1351 * 1374 *
1352 * This commits the data to the ring buffer, and releases any locks held. 1375 * This commits the data to the ring buffer, and releases any locks held.
1353 * 1376 *
1354 * Must be paired with ring_buffer_lock_reserve. 1377 * Must be paired with ring_buffer_lock_reserve.
1355 */ 1378 */
1356int ring_buffer_unlock_commit(struct ring_buffer *buffer, 1379int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1357 struct ring_buffer_event *event, 1380 struct ring_buffer_event *event)
1358 unsigned long flags)
1359{ 1381{
1360 struct ring_buffer_per_cpu *cpu_buffer; 1382 struct ring_buffer_per_cpu *cpu_buffer;
1361 int cpu = raw_smp_processor_id(); 1383 int cpu = raw_smp_processor_id();
@@ -1438,7 +1460,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1438} 1460}
1439EXPORT_SYMBOL_GPL(ring_buffer_write); 1461EXPORT_SYMBOL_GPL(ring_buffer_write);
1440 1462
1441static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1463static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1442{ 1464{
1443 struct buffer_page *reader = cpu_buffer->reader_page; 1465 struct buffer_page *reader = cpu_buffer->reader_page;
1444 struct buffer_page *head = cpu_buffer->head_page; 1466 struct buffer_page *head = cpu_buffer->head_page;
@@ -2277,9 +2299,24 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2277 if (buffer_a->pages != buffer_b->pages) 2299 if (buffer_a->pages != buffer_b->pages)
2278 return -EINVAL; 2300 return -EINVAL;
2279 2301
2302 if (ring_buffer_flags != RB_BUFFERS_ON)
2303 return -EAGAIN;
2304
2305 if (atomic_read(&buffer_a->record_disabled))
2306 return -EAGAIN;
2307
2308 if (atomic_read(&buffer_b->record_disabled))
2309 return -EAGAIN;
2310
2280 cpu_buffer_a = buffer_a->buffers[cpu]; 2311 cpu_buffer_a = buffer_a->buffers[cpu];
2281 cpu_buffer_b = buffer_b->buffers[cpu]; 2312 cpu_buffer_b = buffer_b->buffers[cpu];
2282 2313
2314 if (atomic_read(&cpu_buffer_a->record_disabled))
2315 return -EAGAIN;
2316
2317 if (atomic_read(&cpu_buffer_b->record_disabled))
2318 return -EAGAIN;
2319
2283 /* 2320 /*
2284 * We can't do a synchronize_sched here because this 2321 * We can't do a synchronize_sched here because this
2285 * function can be called in atomic context. 2322 * function can be called in atomic context.
@@ -2303,13 +2340,14 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2303EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2340EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2304 2341
2305static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, 2342static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2306 struct buffer_data_page *bpage) 2343 struct buffer_data_page *bpage,
2344 unsigned int offset)
2307{ 2345{
2308 struct ring_buffer_event *event; 2346 struct ring_buffer_event *event;
2309 unsigned long head; 2347 unsigned long head;
2310 2348
2311 __raw_spin_lock(&cpu_buffer->lock); 2349 __raw_spin_lock(&cpu_buffer->lock);
2312 for (head = 0; head < local_read(&bpage->commit); 2350 for (head = offset; head < local_read(&bpage->commit);
2313 head += rb_event_length(event)) { 2351 head += rb_event_length(event)) {
2314 2352
2315 event = __rb_data_page_index(bpage, head); 2353 event = __rb_data_page_index(bpage, head);
@@ -2377,12 +2415,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2377 * to swap with a page in the ring buffer. 2415 * to swap with a page in the ring buffer.
2378 * 2416 *
2379 * for example: 2417 * for example:
2380 * rpage = ring_buffer_alloc_page(buffer); 2418 * rpage = ring_buffer_alloc_read_page(buffer);
2381 * if (!rpage) 2419 * if (!rpage)
2382 * return error; 2420 * return error;
2383 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); 2421 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2384 * if (ret) 2422 * if (ret >= 0)
2385 * process_page(rpage); 2423 * process_page(rpage, ret);
2386 * 2424 *
2387 * When @full is set, the function will not return true unless 2425 * When @full is set, the function will not return true unless
2388 * the writer is off the reader page. 2426 * the writer is off the reader page.
@@ -2393,8 +2431,8 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2393 * responsible for that. 2431 * responsible for that.
2394 * 2432 *
2395 * Returns: 2433 * Returns:
2396 * 1 if data has been transferred 2434 * >=0 if data has been transferred, returns the offset of consumed data.
2397 * 0 if no data has been transferred. 2435 * <0 if no data has been transferred.
2398 */ 2436 */
2399int ring_buffer_read_page(struct ring_buffer *buffer, 2437int ring_buffer_read_page(struct ring_buffer *buffer,
2400 void **data_page, int cpu, int full) 2438 void **data_page, int cpu, int full)
@@ -2403,7 +2441,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2403 struct ring_buffer_event *event; 2441 struct ring_buffer_event *event;
2404 struct buffer_data_page *bpage; 2442 struct buffer_data_page *bpage;
2405 unsigned long flags; 2443 unsigned long flags;
2406 int ret = 0; 2444 unsigned int read;
2445 int ret = -1;
2407 2446
2408 if (!data_page) 2447 if (!data_page)
2409 return 0; 2448 return 0;
@@ -2425,25 +2464,29 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2425 /* check for data */ 2464 /* check for data */
2426 if (!local_read(&cpu_buffer->reader_page->page->commit)) 2465 if (!local_read(&cpu_buffer->reader_page->page->commit))
2427 goto out; 2466 goto out;
2467
2468 read = cpu_buffer->reader_page->read;
2428 /* 2469 /*
2429 * If the writer is already off of the read page, then simply 2470 * If the writer is already off of the read page, then simply
2430 * switch the read page with the given page. Otherwise 2471 * switch the read page with the given page. Otherwise
2431 * we need to copy the data from the reader to the writer. 2472 * we need to copy the data from the reader to the writer.
2432 */ 2473 */
2433 if (cpu_buffer->reader_page == cpu_buffer->commit_page) { 2474 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2434 unsigned int read = cpu_buffer->reader_page->read; 2475 unsigned int commit = rb_page_commit(cpu_buffer->reader_page);
2476 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
2435 2477
2436 if (full) 2478 if (full)
2437 goto out; 2479 goto out;
2438 /* The writer is still on the reader page, we must copy */ 2480 /* The writer is still on the reader page, we must copy */
2439 bpage = cpu_buffer->reader_page->page; 2481 memcpy(bpage->data + read, rpage->data + read, commit - read);
2440 memcpy(bpage->data,
2441 cpu_buffer->reader_page->page->data + read,
2442 local_read(&bpage->commit) - read);
2443 2482
2444 /* consume what was read */ 2483 /* consume what was read */
2445 cpu_buffer->reader_page += read; 2484 cpu_buffer->reader_page->read = commit;
2446 2485
2486 /* update bpage */
2487 local_set(&bpage->commit, commit);
2488 if (!read)
2489 bpage->time_stamp = rpage->time_stamp;
2447 } else { 2490 } else {
2448 /* swap the pages */ 2491 /* swap the pages */
2449 rb_init_page(bpage); 2492 rb_init_page(bpage);
@@ -2452,10 +2495,10 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2452 cpu_buffer->reader_page->read = 0; 2495 cpu_buffer->reader_page->read = 0;
2453 *data_page = bpage; 2496 *data_page = bpage;
2454 } 2497 }
2455 ret = 1; 2498 ret = read;
2456 2499
2457 /* update the entry counter */ 2500 /* update the entry counter */
2458 rb_remove_entries(cpu_buffer, bpage); 2501 rb_remove_entries(cpu_buffer, bpage, read);
2459 out: 2502 out:
2460 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2503 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2461 2504
@@ -2466,7 +2509,7 @@ static ssize_t
2466rb_simple_read(struct file *filp, char __user *ubuf, 2509rb_simple_read(struct file *filp, char __user *ubuf,
2467 size_t cnt, loff_t *ppos) 2510 size_t cnt, loff_t *ppos)
2468{ 2511{
2469 long *p = filp->private_data; 2512 unsigned long *p = filp->private_data;
2470 char buf[64]; 2513 char buf[64];
2471 int r; 2514 int r;
2472 2515
@@ -2482,9 +2525,9 @@ static ssize_t
2482rb_simple_write(struct file *filp, const char __user *ubuf, 2525rb_simple_write(struct file *filp, const char __user *ubuf,
2483 size_t cnt, loff_t *ppos) 2526 size_t cnt, loff_t *ppos)
2484{ 2527{
2485 long *p = filp->private_data; 2528 unsigned long *p = filp->private_data;
2486 char buf[64]; 2529 char buf[64];
2487 long val; 2530 unsigned long val;
2488 int ret; 2531 int ret;
2489 2532
2490 if (cnt >= sizeof(buf)) 2533 if (cnt >= sizeof(buf))
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 17bb88d86ac2..ea055aa21cd9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -31,12 +31,14 @@
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/writeback.h> 33#include <linux/writeback.h>
34#include <linux/splice.h>
34 35
35#include <linux/stacktrace.h> 36#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h> 37#include <linux/ring_buffer.h>
37#include <linux/irqflags.h> 38#include <linux/irqflags.h>
38 39
39#include "trace.h" 40#include "trace.h"
41#include "trace_output.h"
40 42
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 43#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42 44
@@ -52,6 +54,11 @@ unsigned long __read_mostly tracing_thresh;
52 */ 54 */
53static bool __read_mostly tracing_selftest_running; 55static bool __read_mostly tracing_selftest_running;
54 56
57/*
58 * If a tracer is running, we do not want to run SELFTEST.
59 */
60static bool __read_mostly tracing_selftest_disabled;
61
55/* For tracers that don't implement custom flags */ 62/* For tracers that don't implement custom flags */
56static struct tracer_opt dummy_tracer_opt[] = { 63static struct tracer_opt dummy_tracer_opt[] = {
57 { } 64 { }
@@ -73,7 +80,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
73 * of the tracer is successful. But that is the only place that sets 80 * of the tracer is successful. But that is the only place that sets
74 * this back to zero. 81 * this back to zero.
75 */ 82 */
76int tracing_disabled = 1; 83static int tracing_disabled = 1;
77 84
78static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 85static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
79 86
@@ -91,6 +98,9 @@ static inline void ftrace_enable_cpu(void)
91 98
92static cpumask_var_t __read_mostly tracing_buffer_mask; 99static cpumask_var_t __read_mostly tracing_buffer_mask;
93 100
101/* Define which cpu buffers are currently read in trace_pipe */
102static cpumask_var_t tracing_reader_cpumask;
103
94#define for_each_tracing_cpu(cpu) \ 104#define for_each_tracing_cpu(cpu) \
95 for_each_cpu(cpu, tracing_buffer_mask) 105 for_each_cpu(cpu, tracing_buffer_mask)
96 106
@@ -109,14 +119,19 @@ static cpumask_var_t __read_mostly tracing_buffer_mask;
109 */ 119 */
110int ftrace_dump_on_oops; 120int ftrace_dump_on_oops;
111 121
112static int tracing_set_tracer(char *buf); 122static int tracing_set_tracer(const char *buf);
123
124#define BOOTUP_TRACER_SIZE 100
125static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
126static char *default_bootup_tracer;
113 127
114static int __init set_ftrace(char *str) 128static int __init set_ftrace(char *str)
115{ 129{
116 tracing_set_tracer(str); 130 strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
131 default_bootup_tracer = bootup_tracer_buf;
117 return 1; 132 return 1;
118} 133}
119__setup("ftrace", set_ftrace); 134__setup("ftrace=", set_ftrace);
120 135
121static int __init set_ftrace_dump_on_oops(char *str) 136static int __init set_ftrace_dump_on_oops(char *str)
122{ 137{
@@ -186,9 +201,6 @@ int tracing_is_enabled(void)
186 return tracer_enabled; 201 return tracer_enabled;
187} 202}
188 203
189/* function tracing enabled */
190int ftrace_function_enabled;
191
192/* 204/*
193 * trace_buf_size is the size in bytes that is allocated 205 * trace_buf_size is the size in bytes that is allocated
194 * for a buffer. Note, the number of bytes is always rounded 206 * for a buffer. Note, the number of bytes is always rounded
@@ -229,7 +241,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
229 241
230/* trace_flags holds trace_options default values */ 242/* trace_flags holds trace_options default values */
231unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 243unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
232 TRACE_ITER_ANNOTATE; 244 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
233 245
234/** 246/**
235 * trace_wake_up - wake up tasks waiting for trace input 247 * trace_wake_up - wake up tasks waiting for trace input
@@ -287,6 +299,7 @@ static const char *trace_options[] = {
287 "userstacktrace", 299 "userstacktrace",
288 "sym-userobj", 300 "sym-userobj",
289 "printk-msg-only", 301 "printk-msg-only",
302 "context-info",
290 NULL 303 NULL
291}; 304};
292 305
@@ -326,146 +339,32 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
326 data->rt_priority = tsk->rt_priority; 339 data->rt_priority = tsk->rt_priority;
327 340
328 /* record this tasks comm */ 341 /* record this tasks comm */
329 tracing_record_cmdline(current); 342 tracing_record_cmdline(tsk);
330} 343}
331 344
332/** 345ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
333 * trace_seq_printf - sequence printing of trace information
334 * @s: trace sequence descriptor
335 * @fmt: printf format string
336 *
337 * The tracer may use either sequence operations or its own
338 * copy to user routines. To simplify formating of a trace
339 * trace_seq_printf is used to store strings into a special
340 * buffer (@s). Then the output may be either used by
341 * the sequencer or pulled into another buffer.
342 */
343int
344trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
345{ 346{
346 int len = (PAGE_SIZE - 1) - s->len; 347 int len;
347 va_list ap;
348 int ret; 348 int ret;
349 349
350 if (!len) 350 if (s->len <= s->readpos)
351 return 0; 351 return -EBUSY;
352
353 va_start(ap, fmt);
354 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
355 va_end(ap);
356
357 /* If we can't write it all, don't bother writing anything */
358 if (ret >= len)
359 return 0;
360
361 s->len += ret;
362
363 return len;
364}
365
366/**
367 * trace_seq_puts - trace sequence printing of simple string
368 * @s: trace sequence descriptor
369 * @str: simple string to record
370 *
371 * The tracer may use either the sequence operations or its own
372 * copy to user routines. This function records a simple string
373 * into a special buffer (@s) for later retrieval by a sequencer
374 * or other mechanism.
375 */
376static int
377trace_seq_puts(struct trace_seq *s, const char *str)
378{
379 int len = strlen(str);
380
381 if (len > ((PAGE_SIZE - 1) - s->len))
382 return 0;
383
384 memcpy(s->buffer + s->len, str, len);
385 s->len += len;
386
387 return len;
388}
389
390static int
391trace_seq_putc(struct trace_seq *s, unsigned char c)
392{
393 if (s->len >= (PAGE_SIZE - 1))
394 return 0;
395
396 s->buffer[s->len++] = c;
397
398 return 1;
399}
400
401static int
402trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
403{
404 if (len > ((PAGE_SIZE - 1) - s->len))
405 return 0;
406
407 memcpy(s->buffer + s->len, mem, len);
408 s->len += len;
409
410 return len;
411}
412
413#define MAX_MEMHEX_BYTES 8
414#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
415
416static int
417trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
418{
419 unsigned char hex[HEX_CHARS];
420 unsigned char *data = mem;
421 int i, j;
422
423#ifdef __BIG_ENDIAN
424 for (i = 0, j = 0; i < len; i++) {
425#else
426 for (i = len-1, j = 0; i >= 0; i--) {
427#endif
428 hex[j++] = hex_asc_hi(data[i]);
429 hex[j++] = hex_asc_lo(data[i]);
430 }
431 hex[j++] = ' ';
432
433 return trace_seq_putmem(s, hex, j);
434}
435
436static int
437trace_seq_path(struct trace_seq *s, struct path *path)
438{
439 unsigned char *p;
440
441 if (s->len >= (PAGE_SIZE - 1))
442 return 0;
443 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
444 if (!IS_ERR(p)) {
445 p = mangle_path(s->buffer + s->len, p, "\n");
446 if (p) {
447 s->len = p - s->buffer;
448 return 1;
449 }
450 } else {
451 s->buffer[s->len++] = '?';
452 return 1;
453 }
454 352
455 return 0; 353 len = s->len - s->readpos;
456} 354 if (cnt > len)
355 cnt = len;
356 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
357 if (ret)
358 return -EFAULT;
457 359
458static void 360 s->readpos += len;
459trace_seq_reset(struct trace_seq *s) 361 return cnt;
460{
461 s->len = 0;
462 s->readpos = 0;
463} 362}
464 363
465ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) 364ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
466{ 365{
467 int len; 366 int len;
468 int ret; 367 void *ret;
469 368
470 if (s->len <= s->readpos) 369 if (s->len <= s->readpos)
471 return -EBUSY; 370 return -EBUSY;
@@ -473,8 +372,8 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
473 len = s->len - s->readpos; 372 len = s->len - s->readpos;
474 if (cnt > len) 373 if (cnt > len)
475 cnt = len; 374 cnt = len;
476 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); 375 ret = memcpy(buf, s->buffer + s->readpos, cnt);
477 if (ret) 376 if (!ret)
478 return -EFAULT; 377 return -EFAULT;
479 378
480 s->readpos += len; 379 s->readpos += len;
@@ -489,7 +388,7 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
489 s->buffer[len] = 0; 388 s->buffer[len] = 0;
490 seq_puts(m, s->buffer); 389 seq_puts(m, s->buffer);
491 390
492 trace_seq_reset(s); 391 trace_seq_init(s);
493} 392}
494 393
495/** 394/**
@@ -543,7 +442,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
543 442
544 ftrace_enable_cpu(); 443 ftrace_enable_cpu();
545 444
546 WARN_ON_ONCE(ret); 445 WARN_ON_ONCE(ret && ret != -EAGAIN);
547 446
548 __update_max_tr(tr, tsk, cpu); 447 __update_max_tr(tr, tsk, cpu);
549 __raw_spin_unlock(&ftrace_max_lock); 448 __raw_spin_unlock(&ftrace_max_lock);
@@ -556,6 +455,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
556 * Register a new plugin tracer. 455 * Register a new plugin tracer.
557 */ 456 */
558int register_tracer(struct tracer *type) 457int register_tracer(struct tracer *type)
458__releases(kernel_lock)
459__acquires(kernel_lock)
559{ 460{
560 struct tracer *t; 461 struct tracer *t;
561 int len; 462 int len;
@@ -594,9 +495,12 @@ int register_tracer(struct tracer *type)
594 else 495 else
595 if (!type->flags->opts) 496 if (!type->flags->opts)
596 type->flags->opts = dummy_tracer_opt; 497 type->flags->opts = dummy_tracer_opt;
498 if (!type->wait_pipe)
499 type->wait_pipe = default_wait_pipe;
500
597 501
598#ifdef CONFIG_FTRACE_STARTUP_TEST 502#ifdef CONFIG_FTRACE_STARTUP_TEST
599 if (type->selftest) { 503 if (type->selftest && !tracing_selftest_disabled) {
600 struct tracer *saved_tracer = current_trace; 504 struct tracer *saved_tracer = current_trace;
601 struct trace_array *tr = &global_trace; 505 struct trace_array *tr = &global_trace;
602 int i; 506 int i;
@@ -638,8 +542,26 @@ int register_tracer(struct tracer *type)
638 out: 542 out:
639 tracing_selftest_running = false; 543 tracing_selftest_running = false;
640 mutex_unlock(&trace_types_lock); 544 mutex_unlock(&trace_types_lock);
641 lock_kernel();
642 545
546 if (ret || !default_bootup_tracer)
547 goto out_unlock;
548
549 if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
550 goto out_unlock;
551
552 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
553 /* Do we want this tracer to start on bootup? */
554 tracing_set_tracer(type->name);
555 default_bootup_tracer = NULL;
556 /* disable other selftests, since this will break it. */
557 tracing_selftest_disabled = 1;
558#ifdef CONFIG_FTRACE_STARTUP_TEST
559 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
560 type->name);
561#endif
562
563 out_unlock:
564 lock_kernel();
643 return ret; 565 return ret;
644} 566}
645 567
@@ -658,6 +580,15 @@ void unregister_tracer(struct tracer *type)
658 580
659 found: 581 found:
660 *t = (*t)->next; 582 *t = (*t)->next;
583
584 if (type == current_trace && tracer_enabled) {
585 tracer_enabled = 0;
586 tracing_stop();
587 if (current_trace->stop)
588 current_trace->stop(&global_trace);
589 current_trace = &nop_trace;
590 }
591
661 if (strlen(type->name) != max_tracer_type_len) 592 if (strlen(type->name) != max_tracer_type_len)
662 goto out; 593 goto out;
663 594
@@ -696,7 +627,7 @@ static int cmdline_idx;
696static DEFINE_SPINLOCK(trace_cmdline_lock); 627static DEFINE_SPINLOCK(trace_cmdline_lock);
697 628
698/* temporary disable recording */ 629/* temporary disable recording */
699atomic_t trace_record_cmdline_disabled __read_mostly; 630static atomic_t trace_record_cmdline_disabled __read_mostly;
700 631
701static void trace_init_cmdlines(void) 632static void trace_init_cmdlines(void)
702{ 633{
@@ -738,13 +669,12 @@ void tracing_start(void)
738 return; 669 return;
739 670
740 spin_lock_irqsave(&tracing_start_lock, flags); 671 spin_lock_irqsave(&tracing_start_lock, flags);
741 if (--trace_stop_count) 672 if (--trace_stop_count) {
742 goto out; 673 if (trace_stop_count < 0) {
743 674 /* Someone screwed up their debugging */
744 if (trace_stop_count < 0) { 675 WARN_ON_ONCE(1);
745 /* Someone screwed up their debugging */ 676 trace_stop_count = 0;
746 WARN_ON_ONCE(1); 677 }
747 trace_stop_count = 0;
748 goto out; 678 goto out;
749 } 679 }
750 680
@@ -876,78 +806,114 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
876 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 806 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
877} 807}
878 808
809struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
810 unsigned char type,
811 unsigned long len,
812 unsigned long flags, int pc)
813{
814 struct ring_buffer_event *event;
815
816 event = ring_buffer_lock_reserve(tr->buffer, len);
817 if (event != NULL) {
818 struct trace_entry *ent = ring_buffer_event_data(event);
819
820 tracing_generic_entry_update(ent, flags, pc);
821 ent->type = type;
822 }
823
824 return event;
825}
826static void ftrace_trace_stack(struct trace_array *tr,
827 unsigned long flags, int skip, int pc);
828static void ftrace_trace_userstack(struct trace_array *tr,
829 unsigned long flags, int pc);
830
831void trace_buffer_unlock_commit(struct trace_array *tr,
832 struct ring_buffer_event *event,
833 unsigned long flags, int pc)
834{
835 ring_buffer_unlock_commit(tr->buffer, event);
836
837 ftrace_trace_stack(tr, flags, 6, pc);
838 ftrace_trace_userstack(tr, flags, pc);
839 trace_wake_up();
840}
841
842struct ring_buffer_event *
843trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
844 unsigned long flags, int pc)
845{
846 return trace_buffer_lock_reserve(&global_trace,
847 type, len, flags, pc);
848}
849
850void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
851 unsigned long flags, int pc)
852{
853 return trace_buffer_unlock_commit(&global_trace, event, flags, pc);
854}
855
879void 856void
880trace_function(struct trace_array *tr, struct trace_array_cpu *data, 857trace_function(struct trace_array *tr,
881 unsigned long ip, unsigned long parent_ip, unsigned long flags, 858 unsigned long ip, unsigned long parent_ip, unsigned long flags,
882 int pc) 859 int pc)
883{ 860{
884 struct ring_buffer_event *event; 861 struct ring_buffer_event *event;
885 struct ftrace_entry *entry; 862 struct ftrace_entry *entry;
886 unsigned long irq_flags;
887 863
888 /* If we are reading the ring buffer, don't trace */ 864 /* If we are reading the ring buffer, don't trace */
889 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 865 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
890 return; 866 return;
891 867
892 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 868 event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
893 &irq_flags); 869 flags, pc);
894 if (!event) 870 if (!event)
895 return; 871 return;
896 entry = ring_buffer_event_data(event); 872 entry = ring_buffer_event_data(event);
897 tracing_generic_entry_update(&entry->ent, flags, pc);
898 entry->ent.type = TRACE_FN;
899 entry->ip = ip; 873 entry->ip = ip;
900 entry->parent_ip = parent_ip; 874 entry->parent_ip = parent_ip;
901 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 875 ring_buffer_unlock_commit(tr->buffer, event);
902} 876}
903 877
904#ifdef CONFIG_FUNCTION_GRAPH_TRACER 878#ifdef CONFIG_FUNCTION_GRAPH_TRACER
905static void __trace_graph_entry(struct trace_array *tr, 879static void __trace_graph_entry(struct trace_array *tr,
906 struct trace_array_cpu *data,
907 struct ftrace_graph_ent *trace, 880 struct ftrace_graph_ent *trace,
908 unsigned long flags, 881 unsigned long flags,
909 int pc) 882 int pc)
910{ 883{
911 struct ring_buffer_event *event; 884 struct ring_buffer_event *event;
912 struct ftrace_graph_ent_entry *entry; 885 struct ftrace_graph_ent_entry *entry;
913 unsigned long irq_flags;
914 886
915 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 887 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
916 return; 888 return;
917 889
918 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), 890 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
919 &irq_flags); 891 sizeof(*entry), flags, pc);
920 if (!event) 892 if (!event)
921 return; 893 return;
922 entry = ring_buffer_event_data(event); 894 entry = ring_buffer_event_data(event);
923 tracing_generic_entry_update(&entry->ent, flags, pc);
924 entry->ent.type = TRACE_GRAPH_ENT;
925 entry->graph_ent = *trace; 895 entry->graph_ent = *trace;
926 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); 896 ring_buffer_unlock_commit(global_trace.buffer, event);
927} 897}
928 898
929static void __trace_graph_return(struct trace_array *tr, 899static void __trace_graph_return(struct trace_array *tr,
930 struct trace_array_cpu *data,
931 struct ftrace_graph_ret *trace, 900 struct ftrace_graph_ret *trace,
932 unsigned long flags, 901 unsigned long flags,
933 int pc) 902 int pc)
934{ 903{
935 struct ring_buffer_event *event; 904 struct ring_buffer_event *event;
936 struct ftrace_graph_ret_entry *entry; 905 struct ftrace_graph_ret_entry *entry;
937 unsigned long irq_flags;
938 906
939 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 907 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
940 return; 908 return;
941 909
942 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), 910 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
943 &irq_flags); 911 sizeof(*entry), flags, pc);
944 if (!event) 912 if (!event)
945 return; 913 return;
946 entry = ring_buffer_event_data(event); 914 entry = ring_buffer_event_data(event);
947 tracing_generic_entry_update(&entry->ent, flags, pc);
948 entry->ent.type = TRACE_GRAPH_RET;
949 entry->ret = *trace; 915 entry->ret = *trace;
950 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); 916 ring_buffer_unlock_commit(global_trace.buffer, event);
951} 917}
952#endif 918#endif
953 919
@@ -957,31 +923,23 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
957 int pc) 923 int pc)
958{ 924{
959 if (likely(!atomic_read(&data->disabled))) 925 if (likely(!atomic_read(&data->disabled)))
960 trace_function(tr, data, ip, parent_ip, flags, pc); 926 trace_function(tr, ip, parent_ip, flags, pc);
961} 927}
962 928
963static void ftrace_trace_stack(struct trace_array *tr, 929static void __ftrace_trace_stack(struct trace_array *tr,
964 struct trace_array_cpu *data, 930 unsigned long flags,
965 unsigned long flags, 931 int skip, int pc)
966 int skip, int pc)
967{ 932{
968#ifdef CONFIG_STACKTRACE 933#ifdef CONFIG_STACKTRACE
969 struct ring_buffer_event *event; 934 struct ring_buffer_event *event;
970 struct stack_entry *entry; 935 struct stack_entry *entry;
971 struct stack_trace trace; 936 struct stack_trace trace;
972 unsigned long irq_flags;
973 937
974 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 938 event = trace_buffer_lock_reserve(tr, TRACE_STACK,
975 return; 939 sizeof(*entry), flags, pc);
976
977 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
978 &irq_flags);
979 if (!event) 940 if (!event)
980 return; 941 return;
981 entry = ring_buffer_event_data(event); 942 entry = ring_buffer_event_data(event);
982 tracing_generic_entry_update(&entry->ent, flags, pc);
983 entry->ent.type = TRACE_STACK;
984
985 memset(&entry->caller, 0, sizeof(entry->caller)); 943 memset(&entry->caller, 0, sizeof(entry->caller));
986 944
987 trace.nr_entries = 0; 945 trace.nr_entries = 0;
@@ -990,38 +948,43 @@ static void ftrace_trace_stack(struct trace_array *tr,
990 trace.entries = entry->caller; 948 trace.entries = entry->caller;
991 949
992 save_stack_trace(&trace); 950 save_stack_trace(&trace);
993 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 951 ring_buffer_unlock_commit(tr->buffer, event);
994#endif 952#endif
995} 953}
996 954
955static void ftrace_trace_stack(struct trace_array *tr,
956 unsigned long flags,
957 int skip, int pc)
958{
959 if (!(trace_flags & TRACE_ITER_STACKTRACE))
960 return;
961
962 __ftrace_trace_stack(tr, flags, skip, pc);
963}
964
997void __trace_stack(struct trace_array *tr, 965void __trace_stack(struct trace_array *tr,
998 struct trace_array_cpu *data,
999 unsigned long flags, 966 unsigned long flags,
1000 int skip) 967 int skip, int pc)
1001{ 968{
1002 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 969 __ftrace_trace_stack(tr, flags, skip, pc);
1003} 970}
1004 971
1005static void ftrace_trace_userstack(struct trace_array *tr, 972static void ftrace_trace_userstack(struct trace_array *tr,
1006 struct trace_array_cpu *data, 973 unsigned long flags, int pc)
1007 unsigned long flags, int pc)
1008{ 974{
1009#ifdef CONFIG_STACKTRACE 975#ifdef CONFIG_STACKTRACE
1010 struct ring_buffer_event *event; 976 struct ring_buffer_event *event;
1011 struct userstack_entry *entry; 977 struct userstack_entry *entry;
1012 struct stack_trace trace; 978 struct stack_trace trace;
1013 unsigned long irq_flags;
1014 979
1015 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 980 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1016 return; 981 return;
1017 982
1018 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 983 event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
1019 &irq_flags); 984 sizeof(*entry), flags, pc);
1020 if (!event) 985 if (!event)
1021 return; 986 return;
1022 entry = ring_buffer_event_data(event); 987 entry = ring_buffer_event_data(event);
1023 tracing_generic_entry_update(&entry->ent, flags, pc);
1024 entry->ent.type = TRACE_USER_STACK;
1025 988
1026 memset(&entry->caller, 0, sizeof(entry->caller)); 989 memset(&entry->caller, 0, sizeof(entry->caller));
1027 990
@@ -1031,70 +994,58 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1031 trace.entries = entry->caller; 994 trace.entries = entry->caller;
1032 995
1033 save_stack_trace_user(&trace); 996 save_stack_trace_user(&trace);
1034 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 997 ring_buffer_unlock_commit(tr->buffer, event);
1035#endif 998#endif
1036} 999}
1037 1000
1038void __trace_userstack(struct trace_array *tr, 1001#ifdef UNUSED
1039 struct trace_array_cpu *data, 1002static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1040 unsigned long flags)
1041{ 1003{
1042 ftrace_trace_userstack(tr, data, flags, preempt_count()); 1004 ftrace_trace_userstack(tr, flags, preempt_count());
1043} 1005}
1006#endif /* UNUSED */
1044 1007
1045static void 1008static void
1046ftrace_trace_special(void *__tr, void *__data, 1009ftrace_trace_special(void *__tr,
1047 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1010 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1048 int pc) 1011 int pc)
1049{ 1012{
1050 struct ring_buffer_event *event; 1013 struct ring_buffer_event *event;
1051 struct trace_array_cpu *data = __data;
1052 struct trace_array *tr = __tr; 1014 struct trace_array *tr = __tr;
1053 struct special_entry *entry; 1015 struct special_entry *entry;
1054 unsigned long irq_flags;
1055 1016
1056 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1017 event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
1057 &irq_flags); 1018 sizeof(*entry), 0, pc);
1058 if (!event) 1019 if (!event)
1059 return; 1020 return;
1060 entry = ring_buffer_event_data(event); 1021 entry = ring_buffer_event_data(event);
1061 tracing_generic_entry_update(&entry->ent, 0, pc);
1062 entry->ent.type = TRACE_SPECIAL;
1063 entry->arg1 = arg1; 1022 entry->arg1 = arg1;
1064 entry->arg2 = arg2; 1023 entry->arg2 = arg2;
1065 entry->arg3 = arg3; 1024 entry->arg3 = arg3;
1066 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1025 trace_buffer_unlock_commit(tr, event, 0, pc);
1067 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1068 ftrace_trace_userstack(tr, data, irq_flags, pc);
1069
1070 trace_wake_up();
1071} 1026}
1072 1027
1073void 1028void
1074__trace_special(void *__tr, void *__data, 1029__trace_special(void *__tr, void *__data,
1075 unsigned long arg1, unsigned long arg2, unsigned long arg3) 1030 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1076{ 1031{
1077 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); 1032 ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1078} 1033}
1079 1034
1080void 1035void
1081tracing_sched_switch_trace(struct trace_array *tr, 1036tracing_sched_switch_trace(struct trace_array *tr,
1082 struct trace_array_cpu *data,
1083 struct task_struct *prev, 1037 struct task_struct *prev,
1084 struct task_struct *next, 1038 struct task_struct *next,
1085 unsigned long flags, int pc) 1039 unsigned long flags, int pc)
1086{ 1040{
1087 struct ring_buffer_event *event; 1041 struct ring_buffer_event *event;
1088 struct ctx_switch_entry *entry; 1042 struct ctx_switch_entry *entry;
1089 unsigned long irq_flags;
1090 1043
1091 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1044 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1092 &irq_flags); 1045 sizeof(*entry), flags, pc);
1093 if (!event) 1046 if (!event)
1094 return; 1047 return;
1095 entry = ring_buffer_event_data(event); 1048 entry = ring_buffer_event_data(event);
1096 tracing_generic_entry_update(&entry->ent, flags, pc);
1097 entry->ent.type = TRACE_CTX;
1098 entry->prev_pid = prev->pid; 1049 entry->prev_pid = prev->pid;
1099 entry->prev_prio = prev->prio; 1050 entry->prev_prio = prev->prio;
1100 entry->prev_state = prev->state; 1051 entry->prev_state = prev->state;
@@ -1102,29 +1053,23 @@ tracing_sched_switch_trace(struct trace_array *tr,
1102 entry->next_prio = next->prio; 1053 entry->next_prio = next->prio;
1103 entry->next_state = next->state; 1054 entry->next_state = next->state;
1104 entry->next_cpu = task_cpu(next); 1055 entry->next_cpu = task_cpu(next);
1105 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1056 trace_buffer_unlock_commit(tr, event, flags, pc);
1106 ftrace_trace_stack(tr, data, flags, 5, pc);
1107 ftrace_trace_userstack(tr, data, flags, pc);
1108} 1057}
1109 1058
1110void 1059void
1111tracing_sched_wakeup_trace(struct trace_array *tr, 1060tracing_sched_wakeup_trace(struct trace_array *tr,
1112 struct trace_array_cpu *data,
1113 struct task_struct *wakee, 1061 struct task_struct *wakee,
1114 struct task_struct *curr, 1062 struct task_struct *curr,
1115 unsigned long flags, int pc) 1063 unsigned long flags, int pc)
1116{ 1064{
1117 struct ring_buffer_event *event; 1065 struct ring_buffer_event *event;
1118 struct ctx_switch_entry *entry; 1066 struct ctx_switch_entry *entry;
1119 unsigned long irq_flags;
1120 1067
1121 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1068 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1122 &irq_flags); 1069 sizeof(*entry), flags, pc);
1123 if (!event) 1070 if (!event)
1124 return; 1071 return;
1125 entry = ring_buffer_event_data(event); 1072 entry = ring_buffer_event_data(event);
1126 tracing_generic_entry_update(&entry->ent, flags, pc);
1127 entry->ent.type = TRACE_WAKE;
1128 entry->prev_pid = curr->pid; 1073 entry->prev_pid = curr->pid;
1129 entry->prev_prio = curr->prio; 1074 entry->prev_prio = curr->prio;
1130 entry->prev_state = curr->state; 1075 entry->prev_state = curr->state;
@@ -1132,11 +1077,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1132 entry->next_prio = wakee->prio; 1077 entry->next_prio = wakee->prio;
1133 entry->next_state = wakee->state; 1078 entry->next_state = wakee->state;
1134 entry->next_cpu = task_cpu(wakee); 1079 entry->next_cpu = task_cpu(wakee);
1135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1136 ftrace_trace_stack(tr, data, flags, 6, pc);
1137 ftrace_trace_userstack(tr, data, flags, pc);
1138 1080
1139 trace_wake_up(); 1081 ring_buffer_unlock_commit(tr->buffer, event);
1082 ftrace_trace_stack(tr, flags, 6, pc);
1083 ftrace_trace_userstack(tr, flags, pc);
1140} 1084}
1141 1085
1142void 1086void
@@ -1157,66 +1101,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1157 data = tr->data[cpu]; 1101 data = tr->data[cpu];
1158 1102
1159 if (likely(atomic_inc_return(&data->disabled) == 1)) 1103 if (likely(atomic_inc_return(&data->disabled) == 1))
1160 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1104 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1161
1162 atomic_dec(&data->disabled);
1163 local_irq_restore(flags);
1164}
1165
1166#ifdef CONFIG_FUNCTION_TRACER
1167static void
1168function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
1169{
1170 struct trace_array *tr = &global_trace;
1171 struct trace_array_cpu *data;
1172 unsigned long flags;
1173 long disabled;
1174 int cpu, resched;
1175 int pc;
1176
1177 if (unlikely(!ftrace_function_enabled))
1178 return;
1179
1180 pc = preempt_count();
1181 resched = ftrace_preempt_disable();
1182 local_save_flags(flags);
1183 cpu = raw_smp_processor_id();
1184 data = tr->data[cpu];
1185 disabled = atomic_inc_return(&data->disabled);
1186
1187 if (likely(disabled == 1))
1188 trace_function(tr, data, ip, parent_ip, flags, pc);
1189
1190 atomic_dec(&data->disabled);
1191 ftrace_preempt_enable(resched);
1192}
1193
1194static void
1195function_trace_call(unsigned long ip, unsigned long parent_ip)
1196{
1197 struct trace_array *tr = &global_trace;
1198 struct trace_array_cpu *data;
1199 unsigned long flags;
1200 long disabled;
1201 int cpu;
1202 int pc;
1203
1204 if (unlikely(!ftrace_function_enabled))
1205 return;
1206
1207 /*
1208 * Need to use raw, since this must be called before the
1209 * recursive protection is performed.
1210 */
1211 local_irq_save(flags);
1212 cpu = raw_smp_processor_id();
1213 data = tr->data[cpu];
1214 disabled = atomic_inc_return(&data->disabled);
1215
1216 if (likely(disabled == 1)) {
1217 pc = preempt_count();
1218 trace_function(tr, data, ip, parent_ip, flags, pc);
1219 }
1220 1105
1221 atomic_dec(&data->disabled); 1106 atomic_dec(&data->disabled);
1222 local_irq_restore(flags); 1107 local_irq_restore(flags);
@@ -1244,7 +1129,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1244 disabled = atomic_inc_return(&data->disabled); 1129 disabled = atomic_inc_return(&data->disabled);
1245 if (likely(disabled == 1)) { 1130 if (likely(disabled == 1)) {
1246 pc = preempt_count(); 1131 pc = preempt_count();
1247 __trace_graph_entry(tr, data, trace, flags, pc); 1132 __trace_graph_entry(tr, trace, flags, pc);
1248 } 1133 }
1249 /* Only do the atomic if it is not already set */ 1134 /* Only do the atomic if it is not already set */
1250 if (!test_tsk_trace_graph(current)) 1135 if (!test_tsk_trace_graph(current))
@@ -1270,7 +1155,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1270 disabled = atomic_inc_return(&data->disabled); 1155 disabled = atomic_inc_return(&data->disabled);
1271 if (likely(disabled == 1)) { 1156 if (likely(disabled == 1)) {
1272 pc = preempt_count(); 1157 pc = preempt_count();
1273 __trace_graph_return(tr, data, trace, flags, pc); 1158 __trace_graph_return(tr, trace, flags, pc);
1274 } 1159 }
1275 if (!trace->depth) 1160 if (!trace->depth)
1276 clear_tsk_trace_graph(current); 1161 clear_tsk_trace_graph(current);
@@ -1279,31 +1164,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1279} 1164}
1280#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 1165#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1281 1166
1282static struct ftrace_ops trace_ops __read_mostly =
1283{
1284 .func = function_trace_call,
1285};
1286
1287void tracing_start_function_trace(void)
1288{
1289 ftrace_function_enabled = 0;
1290
1291 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1292 trace_ops.func = function_trace_call_preempt_only;
1293 else
1294 trace_ops.func = function_trace_call;
1295
1296 register_ftrace_function(&trace_ops);
1297 ftrace_function_enabled = 1;
1298}
1299
1300void tracing_stop_function_trace(void)
1301{
1302 ftrace_function_enabled = 0;
1303 unregister_ftrace_function(&trace_ops);
1304}
1305#endif
1306
1307enum trace_file_type { 1167enum trace_file_type {
1308 TRACE_FILE_LAT_FMT = 1, 1168 TRACE_FILE_LAT_FMT = 1,
1309 TRACE_FILE_ANNOTATE = 2, 1169 TRACE_FILE_ANNOTATE = 2,
@@ -1345,10 +1205,25 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1345{ 1205{
1346 struct ring_buffer *buffer = iter->tr->buffer; 1206 struct ring_buffer *buffer = iter->tr->buffer;
1347 struct trace_entry *ent, *next = NULL; 1207 struct trace_entry *ent, *next = NULL;
1208 int cpu_file = iter->cpu_file;
1348 u64 next_ts = 0, ts; 1209 u64 next_ts = 0, ts;
1349 int next_cpu = -1; 1210 int next_cpu = -1;
1350 int cpu; 1211 int cpu;
1351 1212
1213 /*
1214 * If we are in a per_cpu trace file, don't bother by iterating over
1215 * all cpu and peek directly.
1216 */
1217 if (cpu_file > TRACE_PIPE_ALL_CPU) {
1218 if (ring_buffer_empty_cpu(buffer, cpu_file))
1219 return NULL;
1220 ent = peek_next_entry(iter, cpu_file, ent_ts);
1221 if (ent_cpu)
1222 *ent_cpu = cpu_file;
1223
1224 return ent;
1225 }
1226
1352 for_each_tracing_cpu(cpu) { 1227 for_each_tracing_cpu(cpu) {
1353 1228
1354 if (ring_buffer_empty_cpu(buffer, cpu)) 1229 if (ring_buffer_empty_cpu(buffer, cpu))
@@ -1376,8 +1251,8 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1376} 1251}
1377 1252
1378/* Find the next real entry, without updating the iterator itself */ 1253/* Find the next real entry, without updating the iterator itself */
1379static struct trace_entry * 1254struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1380find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1255 int *ent_cpu, u64 *ent_ts)
1381{ 1256{
1382 return __find_next_entry(iter, ent_cpu, ent_ts); 1257 return __find_next_entry(iter, ent_cpu, ent_ts);
1383} 1258}
@@ -1426,19 +1301,32 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1426 return ent; 1301 return ent;
1427} 1302}
1428 1303
1304/*
1305 * No necessary locking here. The worst thing which can
1306 * happen is loosing events consumed at the same time
1307 * by a trace_pipe reader.
1308 * Other than that, we don't risk to crash the ring buffer
1309 * because it serializes the readers.
1310 *
1311 * The current tracer is copied to avoid a global locking
1312 * all around.
1313 */
1429static void *s_start(struct seq_file *m, loff_t *pos) 1314static void *s_start(struct seq_file *m, loff_t *pos)
1430{ 1315{
1431 struct trace_iterator *iter = m->private; 1316 struct trace_iterator *iter = m->private;
1317 static struct tracer *old_tracer;
1318 int cpu_file = iter->cpu_file;
1432 void *p = NULL; 1319 void *p = NULL;
1433 loff_t l = 0; 1320 loff_t l = 0;
1434 int cpu; 1321 int cpu;
1435 1322
1323 /* copy the tracer to avoid using a global lock all around */
1436 mutex_lock(&trace_types_lock); 1324 mutex_lock(&trace_types_lock);
1437 1325 if (unlikely(old_tracer != current_trace && current_trace)) {
1438 if (!current_trace || current_trace != iter->trace) { 1326 old_tracer = current_trace;
1439 mutex_unlock(&trace_types_lock); 1327 *iter->trace = *current_trace;
1440 return NULL;
1441 } 1328 }
1329 mutex_unlock(&trace_types_lock);
1442 1330
1443 atomic_inc(&trace_record_cmdline_disabled); 1331 atomic_inc(&trace_record_cmdline_disabled);
1444 1332
@@ -1449,9 +1337,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1449 1337
1450 ftrace_disable_cpu(); 1338 ftrace_disable_cpu();
1451 1339
1452 for_each_tracing_cpu(cpu) { 1340 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1453 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1341 for_each_tracing_cpu(cpu)
1454 } 1342 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1343 } else
1344 ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
1345
1455 1346
1456 ftrace_enable_cpu(); 1347 ftrace_enable_cpu();
1457 1348
@@ -1469,155 +1360,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1469static void s_stop(struct seq_file *m, void *p) 1360static void s_stop(struct seq_file *m, void *p)
1470{ 1361{
1471 atomic_dec(&trace_record_cmdline_disabled); 1362 atomic_dec(&trace_record_cmdline_disabled);
1472 mutex_unlock(&trace_types_lock);
1473}
1474
1475#ifdef CONFIG_KRETPROBES
1476static inline const char *kretprobed(const char *name)
1477{
1478 static const char tramp_name[] = "kretprobe_trampoline";
1479 int size = sizeof(tramp_name);
1480
1481 if (strncmp(tramp_name, name, size) == 0)
1482 return "[unknown/kretprobe'd]";
1483 return name;
1484}
1485#else
1486static inline const char *kretprobed(const char *name)
1487{
1488 return name;
1489}
1490#endif /* CONFIG_KRETPROBES */
1491
1492static int
1493seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1494{
1495#ifdef CONFIG_KALLSYMS
1496 char str[KSYM_SYMBOL_LEN];
1497 const char *name;
1498
1499 kallsyms_lookup(address, NULL, NULL, NULL, str);
1500
1501 name = kretprobed(str);
1502
1503 return trace_seq_printf(s, fmt, name);
1504#endif
1505 return 1;
1506}
1507
1508static int
1509seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1510 unsigned long address)
1511{
1512#ifdef CONFIG_KALLSYMS
1513 char str[KSYM_SYMBOL_LEN];
1514 const char *name;
1515
1516 sprint_symbol(str, address);
1517 name = kretprobed(str);
1518
1519 return trace_seq_printf(s, fmt, name);
1520#endif
1521 return 1;
1522}
1523
1524#ifndef CONFIG_64BIT
1525# define IP_FMT "%08lx"
1526#else
1527# define IP_FMT "%016lx"
1528#endif
1529
1530int
1531seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1532{
1533 int ret;
1534
1535 if (!ip)
1536 return trace_seq_printf(s, "0");
1537
1538 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1539 ret = seq_print_sym_offset(s, "%s", ip);
1540 else
1541 ret = seq_print_sym_short(s, "%s", ip);
1542
1543 if (!ret)
1544 return 0;
1545
1546 if (sym_flags & TRACE_ITER_SYM_ADDR)
1547 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1548 return ret;
1549}
1550
1551static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1552 unsigned long ip, unsigned long sym_flags)
1553{
1554 struct file *file = NULL;
1555 unsigned long vmstart = 0;
1556 int ret = 1;
1557
1558 if (mm) {
1559 const struct vm_area_struct *vma;
1560
1561 down_read(&mm->mmap_sem);
1562 vma = find_vma(mm, ip);
1563 if (vma) {
1564 file = vma->vm_file;
1565 vmstart = vma->vm_start;
1566 }
1567 if (file) {
1568 ret = trace_seq_path(s, &file->f_path);
1569 if (ret)
1570 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1571 }
1572 up_read(&mm->mmap_sem);
1573 }
1574 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1575 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1576 return ret;
1577}
1578
1579static int
1580seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1581 unsigned long sym_flags)
1582{
1583 struct mm_struct *mm = NULL;
1584 int ret = 1;
1585 unsigned int i;
1586
1587 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1588 struct task_struct *task;
1589 /*
1590 * we do the lookup on the thread group leader,
1591 * since individual threads might have already quit!
1592 */
1593 rcu_read_lock();
1594 task = find_task_by_vpid(entry->ent.tgid);
1595 if (task)
1596 mm = get_task_mm(task);
1597 rcu_read_unlock();
1598 }
1599
1600 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1601 unsigned long ip = entry->caller[i];
1602
1603 if (ip == ULONG_MAX || !ret)
1604 break;
1605 if (i && ret)
1606 ret = trace_seq_puts(s, " <- ");
1607 if (!ip) {
1608 if (ret)
1609 ret = trace_seq_puts(s, "??");
1610 continue;
1611 }
1612 if (!ret)
1613 break;
1614 if (ret)
1615 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1616 }
1617
1618 if (mm)
1619 mmput(mm);
1620 return ret;
1621} 1363}
1622 1364
1623static void print_lat_help_header(struct seq_file *m) 1365static void print_lat_help_header(struct seq_file *m)
@@ -1704,103 +1446,6 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1704 seq_puts(m, "\n"); 1446 seq_puts(m, "\n");
1705} 1447}
1706 1448
1707static void
1708lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1709{
1710 int hardirq, softirq;
1711 char *comm;
1712
1713 comm = trace_find_cmdline(entry->pid);
1714
1715 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1716 trace_seq_printf(s, "%3d", cpu);
1717 trace_seq_printf(s, "%c%c",
1718 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1719 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1720 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1721
1722 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1723 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1724 if (hardirq && softirq) {
1725 trace_seq_putc(s, 'H');
1726 } else {
1727 if (hardirq) {
1728 trace_seq_putc(s, 'h');
1729 } else {
1730 if (softirq)
1731 trace_seq_putc(s, 's');
1732 else
1733 trace_seq_putc(s, '.');
1734 }
1735 }
1736
1737 if (entry->preempt_count)
1738 trace_seq_printf(s, "%x", entry->preempt_count);
1739 else
1740 trace_seq_puts(s, ".");
1741}
1742
1743unsigned long preempt_mark_thresh = 100;
1744
1745static void
1746lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1747 unsigned long rel_usecs)
1748{
1749 trace_seq_printf(s, " %4lldus", abs_usecs);
1750 if (rel_usecs > preempt_mark_thresh)
1751 trace_seq_puts(s, "!: ");
1752 else if (rel_usecs > 1)
1753 trace_seq_puts(s, "+: ");
1754 else
1755 trace_seq_puts(s, " : ");
1756}
1757
1758static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1759
1760static int task_state_char(unsigned long state)
1761{
1762 int bit = state ? __ffs(state) + 1 : 0;
1763
1764 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
1765}
1766
1767/*
1768 * The message is supposed to contain an ending newline.
1769 * If the printing stops prematurely, try to add a newline of our own.
1770 */
1771void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1772{
1773 struct trace_entry *ent;
1774 struct trace_field_cont *cont;
1775 bool ok = true;
1776
1777 ent = peek_next_entry(iter, iter->cpu, NULL);
1778 if (!ent || ent->type != TRACE_CONT) {
1779 trace_seq_putc(s, '\n');
1780 return;
1781 }
1782
1783 do {
1784 cont = (struct trace_field_cont *)ent;
1785 if (ok)
1786 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1787
1788 ftrace_disable_cpu();
1789
1790 if (iter->buffer_iter[iter->cpu])
1791 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1792 else
1793 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1794
1795 ftrace_enable_cpu();
1796
1797 ent = peek_next_entry(iter, iter->cpu, NULL);
1798 } while (ent && ent->type == TRACE_CONT);
1799
1800 if (!ok)
1801 trace_seq_putc(s, '\n');
1802}
1803
1804static void test_cpu_buff_start(struct trace_iterator *iter) 1449static void test_cpu_buff_start(struct trace_iterator *iter)
1805{ 1450{
1806 struct trace_seq *s = &iter->seq; 1451 struct trace_seq *s = &iter->seq;
@@ -1818,138 +1463,31 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1818 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); 1463 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1819} 1464}
1820 1465
1821static enum print_line_t 1466static enum print_line_t print_lat_fmt(struct trace_iterator *iter)
1822print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1823{ 1467{
1824 struct trace_seq *s = &iter->seq; 1468 struct trace_seq *s = &iter->seq;
1825 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1469 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1826 struct trace_entry *next_entry; 1470 struct trace_event *event;
1827 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1828 struct trace_entry *entry = iter->ent; 1471 struct trace_entry *entry = iter->ent;
1829 unsigned long abs_usecs;
1830 unsigned long rel_usecs;
1831 u64 next_ts;
1832 char *comm;
1833 int S, T;
1834 int i;
1835
1836 if (entry->type == TRACE_CONT)
1837 return TRACE_TYPE_HANDLED;
1838 1472
1839 test_cpu_buff_start(iter); 1473 test_cpu_buff_start(iter);
1840 1474
1841 next_entry = find_next_entry(iter, NULL, &next_ts); 1475 event = ftrace_find_event(entry->type);
1842 if (!next_entry)
1843 next_ts = iter->ts;
1844 rel_usecs = ns2usecs(next_ts - iter->ts);
1845 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1846
1847 if (verbose) {
1848 comm = trace_find_cmdline(entry->pid);
1849 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1850 " %ld.%03ldms (+%ld.%03ldms): ",
1851 comm,
1852 entry->pid, cpu, entry->flags,
1853 entry->preempt_count, trace_idx,
1854 ns2usecs(iter->ts),
1855 abs_usecs/1000,
1856 abs_usecs % 1000, rel_usecs/1000,
1857 rel_usecs % 1000);
1858 } else {
1859 lat_print_generic(s, entry, cpu);
1860 lat_print_timestamp(s, abs_usecs, rel_usecs);
1861 }
1862 switch (entry->type) {
1863 case TRACE_FN: {
1864 struct ftrace_entry *field;
1865
1866 trace_assign_type(field, entry);
1867
1868 seq_print_ip_sym(s, field->ip, sym_flags);
1869 trace_seq_puts(s, " (");
1870 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1871 trace_seq_puts(s, ")\n");
1872 break;
1873 }
1874 case TRACE_CTX:
1875 case TRACE_WAKE: {
1876 struct ctx_switch_entry *field;
1877
1878 trace_assign_type(field, entry);
1879
1880 T = task_state_char(field->next_state);
1881 S = task_state_char(field->prev_state);
1882 comm = trace_find_cmdline(field->next_pid);
1883 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1884 field->prev_pid,
1885 field->prev_prio,
1886 S, entry->type == TRACE_CTX ? "==>" : " +",
1887 field->next_cpu,
1888 field->next_pid,
1889 field->next_prio,
1890 T, comm);
1891 break;
1892 }
1893 case TRACE_SPECIAL: {
1894 struct special_entry *field;
1895
1896 trace_assign_type(field, entry);
1897
1898 trace_seq_printf(s, "# %ld %ld %ld\n",
1899 field->arg1,
1900 field->arg2,
1901 field->arg3);
1902 break;
1903 }
1904 case TRACE_STACK: {
1905 struct stack_entry *field;
1906
1907 trace_assign_type(field, entry);
1908
1909 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1910 if (i)
1911 trace_seq_puts(s, " <= ");
1912 seq_print_ip_sym(s, field->caller[i], sym_flags);
1913 }
1914 trace_seq_puts(s, "\n");
1915 break;
1916 }
1917 case TRACE_PRINT: {
1918 struct print_entry *field;
1919
1920 trace_assign_type(field, entry);
1921 1476
1922 seq_print_ip_sym(s, field->ip, sym_flags); 1477 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1923 trace_seq_printf(s, ": %s", field->buf); 1478 if (!trace_print_lat_context(iter))
1924 if (entry->flags & TRACE_FLAG_CONT) 1479 goto partial;
1925 trace_seq_print_cont(s, iter);
1926 break;
1927 } 1480 }
1928 case TRACE_BRANCH: {
1929 struct trace_branch *field;
1930 1481
1931 trace_assign_type(field, entry); 1482 if (event)
1483 return event->latency_trace(iter, sym_flags);
1932 1484
1933 trace_seq_printf(s, "[%s] %s:%s:%d\n", 1485 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1934 field->correct ? " ok " : " MISS ", 1486 goto partial;
1935 field->func,
1936 field->file,
1937 field->line);
1938 break;
1939 }
1940 case TRACE_USER_STACK: {
1941 struct userstack_entry *field;
1942
1943 trace_assign_type(field, entry);
1944 1487
1945 seq_print_userip_objs(field, s, sym_flags);
1946 trace_seq_putc(s, '\n');
1947 break;
1948 }
1949 default:
1950 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1951 }
1952 return TRACE_TYPE_HANDLED; 1488 return TRACE_TYPE_HANDLED;
1489partial:
1490 return TRACE_TYPE_PARTIAL_LINE;
1953} 1491}
1954 1492
1955static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 1493static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1957,313 +1495,78 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1957 struct trace_seq *s = &iter->seq; 1495 struct trace_seq *s = &iter->seq;
1958 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1496 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1959 struct trace_entry *entry; 1497 struct trace_entry *entry;
1960 unsigned long usec_rem; 1498 struct trace_event *event;
1961 unsigned long long t;
1962 unsigned long secs;
1963 char *comm;
1964 int ret;
1965 int S, T;
1966 int i;
1967 1499
1968 entry = iter->ent; 1500 entry = iter->ent;
1969 1501
1970 if (entry->type == TRACE_CONT)
1971 return TRACE_TYPE_HANDLED;
1972
1973 test_cpu_buff_start(iter); 1502 test_cpu_buff_start(iter);
1974 1503
1975 comm = trace_find_cmdline(iter->ent->pid); 1504 event = ftrace_find_event(entry->type);
1976
1977 t = ns2usecs(iter->ts);
1978 usec_rem = do_div(t, 1000000ULL);
1979 secs = (unsigned long)t;
1980 1505
1981 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); 1506 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1982 if (!ret) 1507 if (!trace_print_context(iter))
1983 return TRACE_TYPE_PARTIAL_LINE; 1508 goto partial;
1984 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1985 if (!ret)
1986 return TRACE_TYPE_PARTIAL_LINE;
1987 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1988 if (!ret)
1989 return TRACE_TYPE_PARTIAL_LINE;
1990
1991 switch (entry->type) {
1992 case TRACE_FN: {
1993 struct ftrace_entry *field;
1994
1995 trace_assign_type(field, entry);
1996
1997 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1998 if (!ret)
1999 return TRACE_TYPE_PARTIAL_LINE;
2000 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
2001 field->parent_ip) {
2002 ret = trace_seq_printf(s, " <-");
2003 if (!ret)
2004 return TRACE_TYPE_PARTIAL_LINE;
2005 ret = seq_print_ip_sym(s,
2006 field->parent_ip,
2007 sym_flags);
2008 if (!ret)
2009 return TRACE_TYPE_PARTIAL_LINE;
2010 }
2011 ret = trace_seq_printf(s, "\n");
2012 if (!ret)
2013 return TRACE_TYPE_PARTIAL_LINE;
2014 break;
2015 }
2016 case TRACE_CTX:
2017 case TRACE_WAKE: {
2018 struct ctx_switch_entry *field;
2019
2020 trace_assign_type(field, entry);
2021
2022 T = task_state_char(field->next_state);
2023 S = task_state_char(field->prev_state);
2024 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
2025 field->prev_pid,
2026 field->prev_prio,
2027 S,
2028 entry->type == TRACE_CTX ? "==>" : " +",
2029 field->next_cpu,
2030 field->next_pid,
2031 field->next_prio,
2032 T);
2033 if (!ret)
2034 return TRACE_TYPE_PARTIAL_LINE;
2035 break;
2036 }
2037 case TRACE_SPECIAL: {
2038 struct special_entry *field;
2039
2040 trace_assign_type(field, entry);
2041
2042 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
2043 field->arg1,
2044 field->arg2,
2045 field->arg3);
2046 if (!ret)
2047 return TRACE_TYPE_PARTIAL_LINE;
2048 break;
2049 }
2050 case TRACE_STACK: {
2051 struct stack_entry *field;
2052
2053 trace_assign_type(field, entry);
2054
2055 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
2056 if (i) {
2057 ret = trace_seq_puts(s, " <= ");
2058 if (!ret)
2059 return TRACE_TYPE_PARTIAL_LINE;
2060 }
2061 ret = seq_print_ip_sym(s, field->caller[i],
2062 sym_flags);
2063 if (!ret)
2064 return TRACE_TYPE_PARTIAL_LINE;
2065 }
2066 ret = trace_seq_puts(s, "\n");
2067 if (!ret)
2068 return TRACE_TYPE_PARTIAL_LINE;
2069 break;
2070 }
2071 case TRACE_PRINT: {
2072 struct print_entry *field;
2073
2074 trace_assign_type(field, entry);
2075
2076 seq_print_ip_sym(s, field->ip, sym_flags);
2077 trace_seq_printf(s, ": %s", field->buf);
2078 if (entry->flags & TRACE_FLAG_CONT)
2079 trace_seq_print_cont(s, iter);
2080 break;
2081 }
2082 case TRACE_GRAPH_RET: {
2083 return print_graph_function(iter);
2084 }
2085 case TRACE_GRAPH_ENT: {
2086 return print_graph_function(iter);
2087 } 1509 }
2088 case TRACE_BRANCH: {
2089 struct trace_branch *field;
2090 1510
2091 trace_assign_type(field, entry); 1511 if (event)
1512 return event->trace(iter, sym_flags);
2092 1513
2093 trace_seq_printf(s, "[%s] %s:%s:%d\n", 1514 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2094 field->correct ? " ok " : " MISS ", 1515 goto partial;
2095 field->func,
2096 field->file,
2097 field->line);
2098 break;
2099 }
2100 case TRACE_USER_STACK: {
2101 struct userstack_entry *field;
2102 1516
2103 trace_assign_type(field, entry);
2104
2105 ret = seq_print_userip_objs(field, s, sym_flags);
2106 if (!ret)
2107 return TRACE_TYPE_PARTIAL_LINE;
2108 ret = trace_seq_putc(s, '\n');
2109 if (!ret)
2110 return TRACE_TYPE_PARTIAL_LINE;
2111 break;
2112 }
2113 }
2114 return TRACE_TYPE_HANDLED; 1517 return TRACE_TYPE_HANDLED;
1518partial:
1519 return TRACE_TYPE_PARTIAL_LINE;
2115} 1520}
2116 1521
2117static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 1522static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2118{ 1523{
2119 struct trace_seq *s = &iter->seq; 1524 struct trace_seq *s = &iter->seq;
2120 struct trace_entry *entry; 1525 struct trace_entry *entry;
2121 int ret; 1526 struct trace_event *event;
2122 int S, T;
2123 1527
2124 entry = iter->ent; 1528 entry = iter->ent;
2125 1529
2126 if (entry->type == TRACE_CONT) 1530 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2127 return TRACE_TYPE_HANDLED; 1531 if (!trace_seq_printf(s, "%d %d %llu ",
2128 1532 entry->pid, iter->cpu, iter->ts))
2129 ret = trace_seq_printf(s, "%d %d %llu ", 1533 goto partial;
2130 entry->pid, iter->cpu, iter->ts);
2131 if (!ret)
2132 return TRACE_TYPE_PARTIAL_LINE;
2133
2134 switch (entry->type) {
2135 case TRACE_FN: {
2136 struct ftrace_entry *field;
2137
2138 trace_assign_type(field, entry);
2139
2140 ret = trace_seq_printf(s, "%x %x\n",
2141 field->ip,
2142 field->parent_ip);
2143 if (!ret)
2144 return TRACE_TYPE_PARTIAL_LINE;
2145 break;
2146 } 1534 }
2147 case TRACE_CTX:
2148 case TRACE_WAKE: {
2149 struct ctx_switch_entry *field;
2150
2151 trace_assign_type(field, entry);
2152
2153 T = task_state_char(field->next_state);
2154 S = entry->type == TRACE_WAKE ? '+' :
2155 task_state_char(field->prev_state);
2156 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
2157 field->prev_pid,
2158 field->prev_prio,
2159 S,
2160 field->next_cpu,
2161 field->next_pid,
2162 field->next_prio,
2163 T);
2164 if (!ret)
2165 return TRACE_TYPE_PARTIAL_LINE;
2166 break;
2167 }
2168 case TRACE_SPECIAL:
2169 case TRACE_USER_STACK:
2170 case TRACE_STACK: {
2171 struct special_entry *field;
2172 1535
2173 trace_assign_type(field, entry); 1536 event = ftrace_find_event(entry->type);
1537 if (event)
1538 return event->raw(iter, 0);
2174 1539
2175 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1540 if (!trace_seq_printf(s, "%d ?\n", entry->type))
2176 field->arg1, 1541 goto partial;
2177 field->arg2,
2178 field->arg3);
2179 if (!ret)
2180 return TRACE_TYPE_PARTIAL_LINE;
2181 break;
2182 }
2183 case TRACE_PRINT: {
2184 struct print_entry *field;
2185 1542
2186 trace_assign_type(field, entry);
2187
2188 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
2189 if (entry->flags & TRACE_FLAG_CONT)
2190 trace_seq_print_cont(s, iter);
2191 break;
2192 }
2193 }
2194 return TRACE_TYPE_HANDLED; 1543 return TRACE_TYPE_HANDLED;
1544partial:
1545 return TRACE_TYPE_PARTIAL_LINE;
2195} 1546}
2196 1547
2197#define SEQ_PUT_FIELD_RET(s, x) \
2198do { \
2199 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
2200 return 0; \
2201} while (0)
2202
2203#define SEQ_PUT_HEX_FIELD_RET(s, x) \
2204do { \
2205 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
2206 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
2207 return 0; \
2208} while (0)
2209
2210static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 1548static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2211{ 1549{
2212 struct trace_seq *s = &iter->seq; 1550 struct trace_seq *s = &iter->seq;
2213 unsigned char newline = '\n'; 1551 unsigned char newline = '\n';
2214 struct trace_entry *entry; 1552 struct trace_entry *entry;
2215 int S, T; 1553 struct trace_event *event;
2216 1554
2217 entry = iter->ent; 1555 entry = iter->ent;
2218 1556
2219 if (entry->type == TRACE_CONT) 1557 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2220 return TRACE_TYPE_HANDLED; 1558 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2221 1559 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2222 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1560 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2223 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2224 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2225
2226 switch (entry->type) {
2227 case TRACE_FN: {
2228 struct ftrace_entry *field;
2229
2230 trace_assign_type(field, entry);
2231
2232 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
2233 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
2234 break;
2235 }
2236 case TRACE_CTX:
2237 case TRACE_WAKE: {
2238 struct ctx_switch_entry *field;
2239
2240 trace_assign_type(field, entry);
2241
2242 T = task_state_char(field->next_state);
2243 S = entry->type == TRACE_WAKE ? '+' :
2244 task_state_char(field->prev_state);
2245 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
2246 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
2247 SEQ_PUT_HEX_FIELD_RET(s, S);
2248 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
2249 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
2250 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
2251 SEQ_PUT_HEX_FIELD_RET(s, T);
2252 break;
2253 } 1561 }
2254 case TRACE_SPECIAL:
2255 case TRACE_USER_STACK:
2256 case TRACE_STACK: {
2257 struct special_entry *field;
2258
2259 trace_assign_type(field, entry);
2260 1562
2261 SEQ_PUT_HEX_FIELD_RET(s, field->arg1); 1563 event = ftrace_find_event(entry->type);
2262 SEQ_PUT_HEX_FIELD_RET(s, field->arg2); 1564 if (event) {
2263 SEQ_PUT_HEX_FIELD_RET(s, field->arg3); 1565 enum print_line_t ret = event->hex(iter, 0);
2264 break; 1566 if (ret != TRACE_TYPE_HANDLED)
2265 } 1567 return ret;
2266 } 1568 }
1569
2267 SEQ_PUT_FIELD_RET(s, newline); 1570 SEQ_PUT_FIELD_RET(s, newline);
2268 1571
2269 return TRACE_TYPE_HANDLED; 1572 return TRACE_TYPE_HANDLED;
@@ -2278,13 +1581,10 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
2278 1581
2279 trace_assign_type(field, entry); 1582 trace_assign_type(field, entry);
2280 1583
2281 ret = trace_seq_printf(s, field->buf); 1584 ret = trace_seq_printf(s, "%s", field->buf);
2282 if (!ret) 1585 if (!ret)
2283 return TRACE_TYPE_PARTIAL_LINE; 1586 return TRACE_TYPE_PARTIAL_LINE;
2284 1587
2285 if (entry->flags & TRACE_FLAG_CONT)
2286 trace_seq_print_cont(s, iter);
2287
2288 return TRACE_TYPE_HANDLED; 1588 return TRACE_TYPE_HANDLED;
2289} 1589}
2290 1590
@@ -2292,53 +1592,18 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2292{ 1592{
2293 struct trace_seq *s = &iter->seq; 1593 struct trace_seq *s = &iter->seq;
2294 struct trace_entry *entry; 1594 struct trace_entry *entry;
1595 struct trace_event *event;
2295 1596
2296 entry = iter->ent; 1597 entry = iter->ent;
2297 1598
2298 if (entry->type == TRACE_CONT) 1599 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2299 return TRACE_TYPE_HANDLED; 1600 SEQ_PUT_FIELD_RET(s, entry->pid);
2300 1601 SEQ_PUT_FIELD_RET(s, iter->cpu);
2301 SEQ_PUT_FIELD_RET(s, entry->pid); 1602 SEQ_PUT_FIELD_RET(s, iter->ts);
2302 SEQ_PUT_FIELD_RET(s, entry->cpu);
2303 SEQ_PUT_FIELD_RET(s, iter->ts);
2304
2305 switch (entry->type) {
2306 case TRACE_FN: {
2307 struct ftrace_entry *field;
2308
2309 trace_assign_type(field, entry);
2310
2311 SEQ_PUT_FIELD_RET(s, field->ip);
2312 SEQ_PUT_FIELD_RET(s, field->parent_ip);
2313 break;
2314 } 1603 }
2315 case TRACE_CTX: {
2316 struct ctx_switch_entry *field;
2317 1604
2318 trace_assign_type(field, entry); 1605 event = ftrace_find_event(entry->type);
2319 1606 return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
2320 SEQ_PUT_FIELD_RET(s, field->prev_pid);
2321 SEQ_PUT_FIELD_RET(s, field->prev_prio);
2322 SEQ_PUT_FIELD_RET(s, field->prev_state);
2323 SEQ_PUT_FIELD_RET(s, field->next_pid);
2324 SEQ_PUT_FIELD_RET(s, field->next_prio);
2325 SEQ_PUT_FIELD_RET(s, field->next_state);
2326 break;
2327 }
2328 case TRACE_SPECIAL:
2329 case TRACE_USER_STACK:
2330 case TRACE_STACK: {
2331 struct special_entry *field;
2332
2333 trace_assign_type(field, entry);
2334
2335 SEQ_PUT_FIELD_RET(s, field->arg1);
2336 SEQ_PUT_FIELD_RET(s, field->arg2);
2337 SEQ_PUT_FIELD_RET(s, field->arg3);
2338 break;
2339 }
2340 }
2341 return 1;
2342} 1607}
2343 1608
2344static int trace_empty(struct trace_iterator *iter) 1609static int trace_empty(struct trace_iterator *iter)
@@ -2383,7 +1648,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
2383 return print_raw_fmt(iter); 1648 return print_raw_fmt(iter);
2384 1649
2385 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 1650 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2386 return print_lat_fmt(iter, iter->idx, iter->cpu); 1651 return print_lat_fmt(iter);
2387 1652
2388 return print_trace_fmt(iter); 1653 return print_trace_fmt(iter);
2389} 1654}
@@ -2426,30 +1691,40 @@ static struct seq_operations tracer_seq_ops = {
2426}; 1691};
2427 1692
2428static struct trace_iterator * 1693static struct trace_iterator *
2429__tracing_open(struct inode *inode, struct file *file, int *ret) 1694__tracing_open(struct inode *inode, struct file *file)
2430{ 1695{
1696 long cpu_file = (long) inode->i_private;
1697 void *fail_ret = ERR_PTR(-ENOMEM);
2431 struct trace_iterator *iter; 1698 struct trace_iterator *iter;
2432 struct seq_file *m; 1699 struct seq_file *m;
2433 int cpu; 1700 int cpu, ret;
2434 1701
2435 if (tracing_disabled) { 1702 if (tracing_disabled)
2436 *ret = -ENODEV; 1703 return ERR_PTR(-ENODEV);
2437 return NULL;
2438 }
2439 1704
2440 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 1705 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2441 if (!iter) { 1706 if (!iter)
2442 *ret = -ENOMEM; 1707 return ERR_PTR(-ENOMEM);
2443 goto out;
2444 }
2445 1708
1709 /*
1710 * We make a copy of the current tracer to avoid concurrent
1711 * changes on it while we are reading.
1712 */
2446 mutex_lock(&trace_types_lock); 1713 mutex_lock(&trace_types_lock);
1714 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
1715 if (!iter->trace)
1716 goto fail;
1717
1718 if (current_trace)
1719 *iter->trace = *current_trace;
1720
2447 if (current_trace && current_trace->print_max) 1721 if (current_trace && current_trace->print_max)
2448 iter->tr = &max_tr; 1722 iter->tr = &max_tr;
2449 else 1723 else
2450 iter->tr = inode->i_private; 1724 iter->tr = &global_trace;
2451 iter->trace = current_trace;
2452 iter->pos = -1; 1725 iter->pos = -1;
1726 mutex_init(&iter->mutex);
1727 iter->cpu_file = cpu_file;
2453 1728
2454 /* Notify the tracer early; before we stop tracing. */ 1729 /* Notify the tracer early; before we stop tracing. */
2455 if (iter->trace && iter->trace->open) 1730 if (iter->trace && iter->trace->open)
@@ -2459,20 +1734,30 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2459 if (ring_buffer_overruns(iter->tr->buffer)) 1734 if (ring_buffer_overruns(iter->tr->buffer))
2460 iter->iter_flags |= TRACE_FILE_ANNOTATE; 1735 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2461 1736
1737 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1738 for_each_tracing_cpu(cpu) {
2462 1739
2463 for_each_tracing_cpu(cpu) { 1740 iter->buffer_iter[cpu] =
1741 ring_buffer_read_start(iter->tr->buffer, cpu);
2464 1742
1743 if (!iter->buffer_iter[cpu])
1744 goto fail_buffer;
1745 }
1746 } else {
1747 cpu = iter->cpu_file;
2465 iter->buffer_iter[cpu] = 1748 iter->buffer_iter[cpu] =
2466 ring_buffer_read_start(iter->tr->buffer, cpu); 1749 ring_buffer_read_start(iter->tr->buffer, cpu);
2467 1750
2468 if (!iter->buffer_iter[cpu]) 1751 if (!iter->buffer_iter[cpu])
2469 goto fail_buffer; 1752 goto fail;
2470 } 1753 }
2471 1754
2472 /* TODO stop tracer */ 1755 /* TODO stop tracer */
2473 *ret = seq_open(file, &tracer_seq_ops); 1756 ret = seq_open(file, &tracer_seq_ops);
2474 if (*ret) 1757 if (ret < 0) {
1758 fail_ret = ERR_PTR(ret);
2475 goto fail_buffer; 1759 goto fail_buffer;
1760 }
2476 1761
2477 m = file->private_data; 1762 m = file->private_data;
2478 m->private = iter; 1763 m->private = iter;
@@ -2482,7 +1767,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2482 1767
2483 mutex_unlock(&trace_types_lock); 1768 mutex_unlock(&trace_types_lock);
2484 1769
2485 out:
2486 return iter; 1770 return iter;
2487 1771
2488 fail_buffer: 1772 fail_buffer:
@@ -2490,10 +1774,12 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2490 if (iter->buffer_iter[cpu]) 1774 if (iter->buffer_iter[cpu])
2491 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1775 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2492 } 1776 }
1777 fail:
2493 mutex_unlock(&trace_types_lock); 1778 mutex_unlock(&trace_types_lock);
1779 kfree(iter->trace);
2494 kfree(iter); 1780 kfree(iter);
2495 1781
2496 return ERR_PTR(-ENOMEM); 1782 return fail_ret;
2497} 1783}
2498 1784
2499int tracing_open_generic(struct inode *inode, struct file *filp) 1785int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2505,7 +1791,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
2505 return 0; 1791 return 0;
2506} 1792}
2507 1793
2508int tracing_release(struct inode *inode, struct file *file) 1794static int tracing_release(struct inode *inode, struct file *file)
2509{ 1795{
2510 struct seq_file *m = (struct seq_file *)file->private_data; 1796 struct seq_file *m = (struct seq_file *)file->private_data;
2511 struct trace_iterator *iter = m->private; 1797 struct trace_iterator *iter = m->private;
@@ -2525,15 +1811,20 @@ int tracing_release(struct inode *inode, struct file *file)
2525 mutex_unlock(&trace_types_lock); 1811 mutex_unlock(&trace_types_lock);
2526 1812
2527 seq_release(inode, file); 1813 seq_release(inode, file);
1814 mutex_destroy(&iter->mutex);
1815 kfree(iter->trace);
2528 kfree(iter); 1816 kfree(iter);
2529 return 0; 1817 return 0;
2530} 1818}
2531 1819
2532static int tracing_open(struct inode *inode, struct file *file) 1820static int tracing_open(struct inode *inode, struct file *file)
2533{ 1821{
2534 int ret; 1822 struct trace_iterator *iter;
1823 int ret = 0;
2535 1824
2536 __tracing_open(inode, file, &ret); 1825 iter = __tracing_open(inode, file);
1826 if (IS_ERR(iter))
1827 ret = PTR_ERR(iter);
2537 1828
2538 return ret; 1829 return ret;
2539} 1830}
@@ -2541,11 +1832,13 @@ static int tracing_open(struct inode *inode, struct file *file)
2541static int tracing_lt_open(struct inode *inode, struct file *file) 1832static int tracing_lt_open(struct inode *inode, struct file *file)
2542{ 1833{
2543 struct trace_iterator *iter; 1834 struct trace_iterator *iter;
2544 int ret; 1835 int ret = 0;
2545 1836
2546 iter = __tracing_open(inode, file, &ret); 1837 iter = __tracing_open(inode, file);
2547 1838
2548 if (!ret) 1839 if (IS_ERR(iter))
1840 ret = PTR_ERR(iter);
1841 else
2549 iter->iter_flags |= TRACE_FILE_LAT_FMT; 1842 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2550 1843
2551 return ret; 1844 return ret;
@@ -2740,57 +2033,62 @@ static ssize_t
2740tracing_trace_options_read(struct file *filp, char __user *ubuf, 2033tracing_trace_options_read(struct file *filp, char __user *ubuf,
2741 size_t cnt, loff_t *ppos) 2034 size_t cnt, loff_t *ppos)
2742{ 2035{
2743 int i; 2036 struct tracer_opt *trace_opts;
2037 u32 tracer_flags;
2038 int len = 0;
2744 char *buf; 2039 char *buf;
2745 int r = 0; 2040 int r = 0;
2746 int len = 0; 2041 int i;
2747 u32 tracer_flags = current_trace->flags->val;
2748 struct tracer_opt *trace_opts = current_trace->flags->opts;
2749 2042
2750 2043
2751 /* calulate max size */ 2044 /* calculate max size */
2752 for (i = 0; trace_options[i]; i++) { 2045 for (i = 0; trace_options[i]; i++) {
2753 len += strlen(trace_options[i]); 2046 len += strlen(trace_options[i]);
2754 len += 3; /* "no" and space */ 2047 len += 3; /* "no" and newline */
2755 } 2048 }
2756 2049
2050 mutex_lock(&trace_types_lock);
2051 tracer_flags = current_trace->flags->val;
2052 trace_opts = current_trace->flags->opts;
2053
2757 /* 2054 /*
2758 * Increase the size with names of options specific 2055 * Increase the size with names of options specific
2759 * of the current tracer. 2056 * of the current tracer.
2760 */ 2057 */
2761 for (i = 0; trace_opts[i].name; i++) { 2058 for (i = 0; trace_opts[i].name; i++) {
2762 len += strlen(trace_opts[i].name); 2059 len += strlen(trace_opts[i].name);
2763 len += 3; /* "no" and space */ 2060 len += 3; /* "no" and newline */
2764 } 2061 }
2765 2062
2766 /* +2 for \n and \0 */ 2063 /* +2 for \n and \0 */
2767 buf = kmalloc(len + 2, GFP_KERNEL); 2064 buf = kmalloc(len + 2, GFP_KERNEL);
2768 if (!buf) 2065 if (!buf) {
2066 mutex_unlock(&trace_types_lock);
2769 return -ENOMEM; 2067 return -ENOMEM;
2068 }
2770 2069
2771 for (i = 0; trace_options[i]; i++) { 2070 for (i = 0; trace_options[i]; i++) {
2772 if (trace_flags & (1 << i)) 2071 if (trace_flags & (1 << i))
2773 r += sprintf(buf + r, "%s ", trace_options[i]); 2072 r += sprintf(buf + r, "%s\n", trace_options[i]);
2774 else 2073 else
2775 r += sprintf(buf + r, "no%s ", trace_options[i]); 2074 r += sprintf(buf + r, "no%s\n", trace_options[i]);
2776 } 2075 }
2777 2076
2778 for (i = 0; trace_opts[i].name; i++) { 2077 for (i = 0; trace_opts[i].name; i++) {
2779 if (tracer_flags & trace_opts[i].bit) 2078 if (tracer_flags & trace_opts[i].bit)
2780 r += sprintf(buf + r, "%s ", 2079 r += sprintf(buf + r, "%s\n",
2781 trace_opts[i].name); 2080 trace_opts[i].name);
2782 else 2081 else
2783 r += sprintf(buf + r, "no%s ", 2082 r += sprintf(buf + r, "no%s\n",
2784 trace_opts[i].name); 2083 trace_opts[i].name);
2785 } 2084 }
2085 mutex_unlock(&trace_types_lock);
2786 2086
2787 r += sprintf(buf + r, "\n");
2788 WARN_ON(r >= len + 2); 2087 WARN_ON(r >= len + 2);
2789 2088
2790 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2089 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2791 2090
2792 kfree(buf); 2091 kfree(buf);
2793
2794 return r; 2092 return r;
2795} 2093}
2796 2094
@@ -2865,7 +2163,9 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2865 2163
2866 /* If no option could be set, test the specific tracer options */ 2164 /* If no option could be set, test the specific tracer options */
2867 if (!trace_options[i]) { 2165 if (!trace_options[i]) {
2166 mutex_lock(&trace_types_lock);
2868 ret = set_tracer_option(current_trace, cmp, neg); 2167 ret = set_tracer_option(current_trace, cmp, neg);
2168 mutex_unlock(&trace_types_lock);
2869 if (ret) 2169 if (ret)
2870 return ret; 2170 return ret;
2871 } 2171 }
@@ -2930,7 +2230,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2930{ 2230{
2931 struct trace_array *tr = filp->private_data; 2231 struct trace_array *tr = filp->private_data;
2932 char buf[64]; 2232 char buf[64];
2933 long val; 2233 unsigned long val;
2934 int ret; 2234 int ret;
2935 2235
2936 if (cnt >= sizeof(buf)) 2236 if (cnt >= sizeof(buf))
@@ -2985,8 +2285,23 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2985 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2285 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2986} 2286}
2987 2287
2988static int tracing_set_tracer(char *buf) 2288int tracer_init(struct tracer *t, struct trace_array *tr)
2289{
2290 tracing_reset_online_cpus(tr);
2291 return t->init(tr);
2292}
2293
2294struct trace_option_dentry;
2295
2296static struct trace_option_dentry *
2297create_trace_option_files(struct tracer *tracer);
2298
2299static void
2300destroy_trace_option_files(struct trace_option_dentry *topts);
2301
2302static int tracing_set_tracer(const char *buf)
2989{ 2303{
2304 static struct trace_option_dentry *topts;
2990 struct trace_array *tr = &global_trace; 2305 struct trace_array *tr = &global_trace;
2991 struct tracer *t; 2306 struct tracer *t;
2992 int ret = 0; 2307 int ret = 0;
@@ -3007,9 +2322,14 @@ static int tracing_set_tracer(char *buf)
3007 if (current_trace && current_trace->reset) 2322 if (current_trace && current_trace->reset)
3008 current_trace->reset(tr); 2323 current_trace->reset(tr);
3009 2324
2325 destroy_trace_option_files(topts);
2326
3010 current_trace = t; 2327 current_trace = t;
2328
2329 topts = create_trace_option_files(current_trace);
2330
3011 if (t->init) { 2331 if (t->init) {
3012 ret = t->init(tr); 2332 ret = tracer_init(t, tr);
3013 if (ret) 2333 if (ret)
3014 goto out; 2334 goto out;
3015 } 2335 }
@@ -3072,9 +2392,9 @@ static ssize_t
3072tracing_max_lat_write(struct file *filp, const char __user *ubuf, 2392tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3073 size_t cnt, loff_t *ppos) 2393 size_t cnt, loff_t *ppos)
3074{ 2394{
3075 long *ptr = filp->private_data; 2395 unsigned long *ptr = filp->private_data;
3076 char buf[64]; 2396 char buf[64];
3077 long val; 2397 unsigned long val;
3078 int ret; 2398 int ret;
3079 2399
3080 if (cnt >= sizeof(buf)) 2400 if (cnt >= sizeof(buf))
@@ -3094,54 +2414,96 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3094 return cnt; 2414 return cnt;
3095} 2415}
3096 2416
3097static atomic_t tracing_reader;
3098
3099static int tracing_open_pipe(struct inode *inode, struct file *filp) 2417static int tracing_open_pipe(struct inode *inode, struct file *filp)
3100{ 2418{
2419 long cpu_file = (long) inode->i_private;
3101 struct trace_iterator *iter; 2420 struct trace_iterator *iter;
2421 int ret = 0;
3102 2422
3103 if (tracing_disabled) 2423 if (tracing_disabled)
3104 return -ENODEV; 2424 return -ENODEV;
3105 2425
3106 /* We only allow for reader of the pipe */ 2426 mutex_lock(&trace_types_lock);
3107 if (atomic_inc_return(&tracing_reader) != 1) { 2427
3108 atomic_dec(&tracing_reader); 2428 /* We only allow one reader per cpu */
3109 return -EBUSY; 2429 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2430 if (!cpumask_empty(tracing_reader_cpumask)) {
2431 ret = -EBUSY;
2432 goto out;
2433 }
2434 cpumask_setall(tracing_reader_cpumask);
2435 } else {
2436 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2437 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2438 else {
2439 ret = -EBUSY;
2440 goto out;
2441 }
3110 } 2442 }
3111 2443
3112 /* create a buffer to store the information to pass to userspace */ 2444 /* create a buffer to store the information to pass to userspace */
3113 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2445 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3114 if (!iter) 2446 if (!iter) {
3115 return -ENOMEM; 2447 ret = -ENOMEM;
2448 goto out;
2449 }
3116 2450
3117 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 2451 /*
3118 kfree(iter); 2452 * We make a copy of the current tracer to avoid concurrent
3119 return -ENOMEM; 2453 * changes on it while we are reading.
2454 */
2455 iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
2456 if (!iter->trace) {
2457 ret = -ENOMEM;
2458 goto fail;
3120 } 2459 }
2460 if (current_trace)
2461 *iter->trace = *current_trace;
3121 2462
3122 mutex_lock(&trace_types_lock); 2463 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
2464 ret = -ENOMEM;
2465 goto fail;
2466 }
3123 2467
3124 /* trace pipe does not show start of buffer */ 2468 /* trace pipe does not show start of buffer */
3125 cpumask_setall(iter->started); 2469 cpumask_setall(iter->started);
3126 2470
2471 iter->cpu_file = cpu_file;
3127 iter->tr = &global_trace; 2472 iter->tr = &global_trace;
3128 iter->trace = current_trace; 2473 mutex_init(&iter->mutex);
3129 filp->private_data = iter; 2474 filp->private_data = iter;
3130 2475
3131 if (iter->trace->pipe_open) 2476 if (iter->trace->pipe_open)
3132 iter->trace->pipe_open(iter); 2477 iter->trace->pipe_open(iter);
2478
2479out:
3133 mutex_unlock(&trace_types_lock); 2480 mutex_unlock(&trace_types_lock);
2481 return ret;
3134 2482
3135 return 0; 2483fail:
2484 kfree(iter->trace);
2485 kfree(iter);
2486 mutex_unlock(&trace_types_lock);
2487 return ret;
3136} 2488}
3137 2489
3138static int tracing_release_pipe(struct inode *inode, struct file *file) 2490static int tracing_release_pipe(struct inode *inode, struct file *file)
3139{ 2491{
3140 struct trace_iterator *iter = file->private_data; 2492 struct trace_iterator *iter = file->private_data;
3141 2493
2494 mutex_lock(&trace_types_lock);
2495
2496 if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2497 cpumask_clear(tracing_reader_cpumask);
2498 else
2499 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2500
2501 mutex_unlock(&trace_types_lock);
2502
3142 free_cpumask_var(iter->started); 2503 free_cpumask_var(iter->started);
2504 mutex_destroy(&iter->mutex);
2505 kfree(iter->trace);
3143 kfree(iter); 2506 kfree(iter);
3144 atomic_dec(&tracing_reader);
3145 2507
3146 return 0; 2508 return 0;
3147} 2509}
@@ -3167,67 +2529,57 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
3167 } 2529 }
3168} 2530}
3169 2531
3170/* 2532
3171 * Consumer reader. 2533void default_wait_pipe(struct trace_iterator *iter)
3172 */
3173static ssize_t
3174tracing_read_pipe(struct file *filp, char __user *ubuf,
3175 size_t cnt, loff_t *ppos)
3176{ 2534{
3177 struct trace_iterator *iter = filp->private_data; 2535 DEFINE_WAIT(wait);
3178 ssize_t sret;
3179 2536
3180 /* return any leftover data */ 2537 prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
3181 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3182 if (sret != -EBUSY)
3183 return sret;
3184 2538
3185 trace_seq_reset(&iter->seq); 2539 if (trace_empty(iter))
2540 schedule();
3186 2541
3187 mutex_lock(&trace_types_lock); 2542 finish_wait(&trace_wait, &wait);
3188 if (iter->trace->read) { 2543}
3189 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 2544
3190 if (sret) 2545/*
3191 goto out; 2546 * This is a make-shift waitqueue.
3192 } 2547 * A tracer might use this callback on some rare cases:
2548 *
2549 * 1) the current tracer might hold the runqueue lock when it wakes up
2550 * a reader, hence a deadlock (sched, function, and function graph tracers)
2551 * 2) the function tracers, trace all functions, we don't want
2552 * the overhead of calling wake_up and friends
2553 * (and tracing them too)
2554 *
2555 * Anyway, this is really very primitive wakeup.
2556 */
2557void poll_wait_pipe(struct trace_iterator *iter)
2558{
2559 set_current_state(TASK_INTERRUPTIBLE);
2560 /* sleep for 100 msecs, and try again. */
2561 schedule_timeout(HZ / 10);
2562}
2563
2564/* Must be called with trace_types_lock mutex held. */
2565static int tracing_wait_pipe(struct file *filp)
2566{
2567 struct trace_iterator *iter = filp->private_data;
3193 2568
3194waitagain:
3195 sret = 0;
3196 while (trace_empty(iter)) { 2569 while (trace_empty(iter)) {
3197 2570
3198 if ((filp->f_flags & O_NONBLOCK)) { 2571 if ((filp->f_flags & O_NONBLOCK)) {
3199 sret = -EAGAIN; 2572 return -EAGAIN;
3200 goto out;
3201 } 2573 }
3202 2574
3203 /* 2575 mutex_unlock(&iter->mutex);
3204 * This is a make-shift waitqueue. The reason we don't use
3205 * an actual wait queue is because:
3206 * 1) we only ever have one waiter
3207 * 2) the tracing, traces all functions, we don't want
3208 * the overhead of calling wake_up and friends
3209 * (and tracing them too)
3210 * Anyway, this is really very primitive wakeup.
3211 */
3212 set_current_state(TASK_INTERRUPTIBLE);
3213 iter->tr->waiter = current;
3214
3215 mutex_unlock(&trace_types_lock);
3216
3217 /* sleep for 100 msecs, and try again. */
3218 schedule_timeout(HZ/10);
3219 2576
3220 mutex_lock(&trace_types_lock); 2577 iter->trace->wait_pipe(iter);
3221 2578
3222 iter->tr->waiter = NULL; 2579 mutex_lock(&iter->mutex);
3223
3224 if (signal_pending(current)) {
3225 sret = -EINTR;
3226 goto out;
3227 }
3228 2580
3229 if (iter->trace != current_trace) 2581 if (signal_pending(current))
3230 goto out; 2582 return -EINTR;
3231 2583
3232 /* 2584 /*
3233 * We block until we read something and tracing is disabled. 2585 * We block until we read something and tracing is disabled.
@@ -3240,13 +2592,59 @@ waitagain:
3240 */ 2592 */
3241 if (!tracer_enabled && iter->pos) 2593 if (!tracer_enabled && iter->pos)
3242 break; 2594 break;
2595 }
3243 2596
3244 continue; 2597 return 1;
2598}
2599
2600/*
2601 * Consumer reader.
2602 */
2603static ssize_t
2604tracing_read_pipe(struct file *filp, char __user *ubuf,
2605 size_t cnt, loff_t *ppos)
2606{
2607 struct trace_iterator *iter = filp->private_data;
2608 static struct tracer *old_tracer;
2609 ssize_t sret;
2610
2611 /* return any leftover data */
2612 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2613 if (sret != -EBUSY)
2614 return sret;
2615
2616 trace_seq_init(&iter->seq);
2617
2618 /* copy the tracer to avoid using a global lock all around */
2619 mutex_lock(&trace_types_lock);
2620 if (unlikely(old_tracer != current_trace && current_trace)) {
2621 old_tracer = current_trace;
2622 *iter->trace = *current_trace;
3245 } 2623 }
2624 mutex_unlock(&trace_types_lock);
2625
2626 /*
2627 * Avoid more than one consumer on a single file descriptor
2628 * This is just a matter of traces coherency, the ring buffer itself
2629 * is protected.
2630 */
2631 mutex_lock(&iter->mutex);
2632 if (iter->trace->read) {
2633 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2634 if (sret)
2635 goto out;
2636 }
2637
2638waitagain:
2639 sret = tracing_wait_pipe(filp);
2640 if (sret <= 0)
2641 goto out;
3246 2642
3247 /* stop when tracing is finished */ 2643 /* stop when tracing is finished */
3248 if (trace_empty(iter)) 2644 if (trace_empty(iter)) {
2645 sret = 0;
3249 goto out; 2646 goto out;
2647 }
3250 2648
3251 if (cnt >= PAGE_SIZE) 2649 if (cnt >= PAGE_SIZE)
3252 cnt = PAGE_SIZE - 1; 2650 cnt = PAGE_SIZE - 1;
@@ -3267,8 +2665,8 @@ waitagain:
3267 iter->seq.len = len; 2665 iter->seq.len = len;
3268 break; 2666 break;
3269 } 2667 }
3270 2668 if (ret != TRACE_TYPE_NO_CONSUME)
3271 trace_consume(iter); 2669 trace_consume(iter);
3272 2670
3273 if (iter->seq.len >= cnt) 2671 if (iter->seq.len >= cnt)
3274 break; 2672 break;
@@ -3277,7 +2675,7 @@ waitagain:
3277 /* Now copy what we have to the user */ 2675 /* Now copy what we have to the user */
3278 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2676 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3279 if (iter->seq.readpos >= iter->seq.len) 2677 if (iter->seq.readpos >= iter->seq.len)
3280 trace_seq_reset(&iter->seq); 2678 trace_seq_init(&iter->seq);
3281 2679
3282 /* 2680 /*
3283 * If there was nothing to send to user, inspite of consuming trace 2681 * If there was nothing to send to user, inspite of consuming trace
@@ -3287,11 +2685,148 @@ waitagain:
3287 goto waitagain; 2685 goto waitagain;
3288 2686
3289out: 2687out:
3290 mutex_unlock(&trace_types_lock); 2688 mutex_unlock(&iter->mutex);
3291 2689
3292 return sret; 2690 return sret;
3293} 2691}
3294 2692
2693static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
2694 struct pipe_buffer *buf)
2695{
2696 __free_page(buf->page);
2697}
2698
2699static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
2700 unsigned int idx)
2701{
2702 __free_page(spd->pages[idx]);
2703}
2704
2705static struct pipe_buf_operations tracing_pipe_buf_ops = {
2706 .can_merge = 0,
2707 .map = generic_pipe_buf_map,
2708 .unmap = generic_pipe_buf_unmap,
2709 .confirm = generic_pipe_buf_confirm,
2710 .release = tracing_pipe_buf_release,
2711 .steal = generic_pipe_buf_steal,
2712 .get = generic_pipe_buf_get,
2713};
2714
2715static size_t
2716tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
2717{
2718 size_t count;
2719 int ret;
2720
2721 /* Seq buffer is page-sized, exactly what we need. */
2722 for (;;) {
2723 count = iter->seq.len;
2724 ret = print_trace_line(iter);
2725 count = iter->seq.len - count;
2726 if (rem < count) {
2727 rem = 0;
2728 iter->seq.len -= count;
2729 break;
2730 }
2731 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2732 iter->seq.len -= count;
2733 break;
2734 }
2735
2736 trace_consume(iter);
2737 rem -= count;
2738 if (!find_next_entry_inc(iter)) {
2739 rem = 0;
2740 iter->ent = NULL;
2741 break;
2742 }
2743 }
2744
2745 return rem;
2746}
2747
2748static ssize_t tracing_splice_read_pipe(struct file *filp,
2749 loff_t *ppos,
2750 struct pipe_inode_info *pipe,
2751 size_t len,
2752 unsigned int flags)
2753{
2754 struct page *pages[PIPE_BUFFERS];
2755 struct partial_page partial[PIPE_BUFFERS];
2756 struct trace_iterator *iter = filp->private_data;
2757 struct splice_pipe_desc spd = {
2758 .pages = pages,
2759 .partial = partial,
2760 .nr_pages = 0, /* This gets updated below. */
2761 .flags = flags,
2762 .ops = &tracing_pipe_buf_ops,
2763 .spd_release = tracing_spd_release_pipe,
2764 };
2765 static struct tracer *old_tracer;
2766 ssize_t ret;
2767 size_t rem;
2768 unsigned int i;
2769
2770 /* copy the tracer to avoid using a global lock all around */
2771 mutex_lock(&trace_types_lock);
2772 if (unlikely(old_tracer != current_trace && current_trace)) {
2773 old_tracer = current_trace;
2774 *iter->trace = *current_trace;
2775 }
2776 mutex_unlock(&trace_types_lock);
2777
2778 mutex_lock(&iter->mutex);
2779
2780 if (iter->trace->splice_read) {
2781 ret = iter->trace->splice_read(iter, filp,
2782 ppos, pipe, len, flags);
2783 if (ret)
2784 goto out_err;
2785 }
2786
2787 ret = tracing_wait_pipe(filp);
2788 if (ret <= 0)
2789 goto out_err;
2790
2791 if (!iter->ent && !find_next_entry_inc(iter)) {
2792 ret = -EFAULT;
2793 goto out_err;
2794 }
2795
2796 /* Fill as many pages as possible. */
2797 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
2798 pages[i] = alloc_page(GFP_KERNEL);
2799 if (!pages[i])
2800 break;
2801
2802 rem = tracing_fill_pipe_page(rem, iter);
2803
2804 /* Copy the data into the page, so we can start over. */
2805 ret = trace_seq_to_buffer(&iter->seq,
2806 page_address(pages[i]),
2807 iter->seq.len);
2808 if (ret < 0) {
2809 __free_page(pages[i]);
2810 break;
2811 }
2812 partial[i].offset = 0;
2813 partial[i].len = iter->seq.len;
2814
2815 trace_seq_init(&iter->seq);
2816 }
2817
2818 mutex_unlock(&iter->mutex);
2819
2820 spd.nr_pages = i;
2821
2822 return splice_to_pipe(pipe, &spd);
2823
2824out_err:
2825 mutex_unlock(&iter->mutex);
2826
2827 return ret;
2828}
2829
3295static ssize_t 2830static ssize_t
3296tracing_entries_read(struct file *filp, char __user *ubuf, 2831tracing_entries_read(struct file *filp, char __user *ubuf,
3297 size_t cnt, loff_t *ppos) 2832 size_t cnt, loff_t *ppos)
@@ -3455,6 +2990,7 @@ static struct file_operations tracing_pipe_fops = {
3455 .open = tracing_open_pipe, 2990 .open = tracing_open_pipe,
3456 .poll = tracing_poll_pipe, 2991 .poll = tracing_poll_pipe,
3457 .read = tracing_read_pipe, 2992 .read = tracing_read_pipe,
2993 .splice_read = tracing_splice_read_pipe,
3458 .release = tracing_release_pipe, 2994 .release = tracing_release_pipe,
3459}; 2995};
3460 2996
@@ -3526,15 +3062,345 @@ struct dentry *tracing_init_dentry(void)
3526 return d_tracer; 3062 return d_tracer;
3527} 3063}
3528 3064
3065static struct dentry *d_percpu;
3066
3067struct dentry *tracing_dentry_percpu(void)
3068{
3069 static int once;
3070 struct dentry *d_tracer;
3071
3072 if (d_percpu)
3073 return d_percpu;
3074
3075 d_tracer = tracing_init_dentry();
3076
3077 if (!d_tracer)
3078 return NULL;
3079
3080 d_percpu = debugfs_create_dir("per_cpu", d_tracer);
3081
3082 if (!d_percpu && !once) {
3083 once = 1;
3084 pr_warning("Could not create debugfs directory 'per_cpu'\n");
3085 return NULL;
3086 }
3087
3088 return d_percpu;
3089}
3090
3091static void tracing_init_debugfs_percpu(long cpu)
3092{
3093 struct dentry *d_percpu = tracing_dentry_percpu();
3094 struct dentry *entry, *d_cpu;
3095 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3096 char cpu_dir[7];
3097
3098 if (cpu > 999 || cpu < 0)
3099 return;
3100
3101 sprintf(cpu_dir, "cpu%ld", cpu);
3102 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3103 if (!d_cpu) {
3104 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
3105 return;
3106 }
3107
3108 /* per cpu trace_pipe */
3109 entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
3110 (void *) cpu, &tracing_pipe_fops);
3111 if (!entry)
3112 pr_warning("Could not create debugfs 'trace_pipe' entry\n");
3113
3114 /* per cpu trace */
3115 entry = debugfs_create_file("trace", 0444, d_cpu,
3116 (void *) cpu, &tracing_fops);
3117 if (!entry)
3118 pr_warning("Could not create debugfs 'trace' entry\n");
3119}
3120
3529#ifdef CONFIG_FTRACE_SELFTEST 3121#ifdef CONFIG_FTRACE_SELFTEST
3530/* Let selftest have access to static functions in this file */ 3122/* Let selftest have access to static functions in this file */
3531#include "trace_selftest.c" 3123#include "trace_selftest.c"
3532#endif 3124#endif
3533 3125
3126struct trace_option_dentry {
3127 struct tracer_opt *opt;
3128 struct tracer_flags *flags;
3129 struct dentry *entry;
3130};
3131
3132static ssize_t
3133trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
3134 loff_t *ppos)
3135{
3136 struct trace_option_dentry *topt = filp->private_data;
3137 char *buf;
3138
3139 if (topt->flags->val & topt->opt->bit)
3140 buf = "1\n";
3141 else
3142 buf = "0\n";
3143
3144 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3145}
3146
3147static ssize_t
3148trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3149 loff_t *ppos)
3150{
3151 struct trace_option_dentry *topt = filp->private_data;
3152 unsigned long val;
3153 char buf[64];
3154 int ret;
3155
3156 if (cnt >= sizeof(buf))
3157 return -EINVAL;
3158
3159 if (copy_from_user(&buf, ubuf, cnt))
3160 return -EFAULT;
3161
3162 buf[cnt] = 0;
3163
3164 ret = strict_strtoul(buf, 10, &val);
3165 if (ret < 0)
3166 return ret;
3167
3168 ret = 0;
3169 switch (val) {
3170 case 0:
3171 /* do nothing if already cleared */
3172 if (!(topt->flags->val & topt->opt->bit))
3173 break;
3174
3175 mutex_lock(&trace_types_lock);
3176 if (current_trace->set_flag)
3177 ret = current_trace->set_flag(topt->flags->val,
3178 topt->opt->bit, 0);
3179 mutex_unlock(&trace_types_lock);
3180 if (ret)
3181 return ret;
3182 topt->flags->val &= ~topt->opt->bit;
3183 break;
3184 case 1:
3185 /* do nothing if already set */
3186 if (topt->flags->val & topt->opt->bit)
3187 break;
3188
3189 mutex_lock(&trace_types_lock);
3190 if (current_trace->set_flag)
3191 ret = current_trace->set_flag(topt->flags->val,
3192 topt->opt->bit, 1);
3193 mutex_unlock(&trace_types_lock);
3194 if (ret)
3195 return ret;
3196 topt->flags->val |= topt->opt->bit;
3197 break;
3198
3199 default:
3200 return -EINVAL;
3201 }
3202
3203 *ppos += cnt;
3204
3205 return cnt;
3206}
3207
3208
3209static const struct file_operations trace_options_fops = {
3210 .open = tracing_open_generic,
3211 .read = trace_options_read,
3212 .write = trace_options_write,
3213};
3214
3215static ssize_t
3216trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
3217 loff_t *ppos)
3218{
3219 long index = (long)filp->private_data;
3220 char *buf;
3221
3222 if (trace_flags & (1 << index))
3223 buf = "1\n";
3224 else
3225 buf = "0\n";
3226
3227 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3228}
3229
3230static ssize_t
3231trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3232 loff_t *ppos)
3233{
3234 long index = (long)filp->private_data;
3235 char buf[64];
3236 unsigned long val;
3237 int ret;
3238
3239 if (cnt >= sizeof(buf))
3240 return -EINVAL;
3241
3242 if (copy_from_user(&buf, ubuf, cnt))
3243 return -EFAULT;
3244
3245 buf[cnt] = 0;
3246
3247 ret = strict_strtoul(buf, 10, &val);
3248 if (ret < 0)
3249 return ret;
3250
3251 switch (val) {
3252 case 0:
3253 trace_flags &= ~(1 << index);
3254 break;
3255 case 1:
3256 trace_flags |= 1 << index;
3257 break;
3258
3259 default:
3260 return -EINVAL;
3261 }
3262
3263 *ppos += cnt;
3264
3265 return cnt;
3266}
3267
3268static const struct file_operations trace_options_core_fops = {
3269 .open = tracing_open_generic,
3270 .read = trace_options_core_read,
3271 .write = trace_options_core_write,
3272};
3273
3274static struct dentry *trace_options_init_dentry(void)
3275{
3276 struct dentry *d_tracer;
3277 static struct dentry *t_options;
3278
3279 if (t_options)
3280 return t_options;
3281
3282 d_tracer = tracing_init_dentry();
3283 if (!d_tracer)
3284 return NULL;
3285
3286 t_options = debugfs_create_dir("options", d_tracer);
3287 if (!t_options) {
3288 pr_warning("Could not create debugfs directory 'options'\n");
3289 return NULL;
3290 }
3291
3292 return t_options;
3293}
3294
3295static void
3296create_trace_option_file(struct trace_option_dentry *topt,
3297 struct tracer_flags *flags,
3298 struct tracer_opt *opt)
3299{
3300 struct dentry *t_options;
3301 struct dentry *entry;
3302
3303 t_options = trace_options_init_dentry();
3304 if (!t_options)
3305 return;
3306
3307 topt->flags = flags;
3308 topt->opt = opt;
3309
3310 entry = debugfs_create_file(opt->name, 0644, t_options, topt,
3311 &trace_options_fops);
3312
3313 topt->entry = entry;
3314
3315}
3316
3317static struct trace_option_dentry *
3318create_trace_option_files(struct tracer *tracer)
3319{
3320 struct trace_option_dentry *topts;
3321 struct tracer_flags *flags;
3322 struct tracer_opt *opts;
3323 int cnt;
3324
3325 if (!tracer)
3326 return NULL;
3327
3328 flags = tracer->flags;
3329
3330 if (!flags || !flags->opts)
3331 return NULL;
3332
3333 opts = flags->opts;
3334
3335 for (cnt = 0; opts[cnt].name; cnt++)
3336 ;
3337
3338 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
3339 if (!topts)
3340 return NULL;
3341
3342 for (cnt = 0; opts[cnt].name; cnt++)
3343 create_trace_option_file(&topts[cnt], flags,
3344 &opts[cnt]);
3345
3346 return topts;
3347}
3348
3349static void
3350destroy_trace_option_files(struct trace_option_dentry *topts)
3351{
3352 int cnt;
3353
3354 if (!topts)
3355 return;
3356
3357 for (cnt = 0; topts[cnt].opt; cnt++) {
3358 if (topts[cnt].entry)
3359 debugfs_remove(topts[cnt].entry);
3360 }
3361
3362 kfree(topts);
3363}
3364
3365static struct dentry *
3366create_trace_option_core_file(const char *option, long index)
3367{
3368 struct dentry *t_options;
3369 struct dentry *entry;
3370
3371 t_options = trace_options_init_dentry();
3372 if (!t_options)
3373 return NULL;
3374
3375 entry = debugfs_create_file(option, 0644, t_options, (void *)index,
3376 &trace_options_core_fops);
3377
3378 return entry;
3379}
3380
3381static __init void create_trace_options_dir(void)
3382{
3383 struct dentry *t_options;
3384 struct dentry *entry;
3385 int i;
3386
3387 t_options = trace_options_init_dentry();
3388 if (!t_options)
3389 return;
3390
3391 for (i = 0; trace_options[i]; i++) {
3392 entry = create_trace_option_core_file(trace_options[i], i);
3393 if (!entry)
3394 pr_warning("Could not create debugfs %s entry\n",
3395 trace_options[i]);
3396 }
3397}
3398
3534static __init int tracer_init_debugfs(void) 3399static __init int tracer_init_debugfs(void)
3535{ 3400{
3536 struct dentry *d_tracer; 3401 struct dentry *d_tracer;
3537 struct dentry *entry; 3402 struct dentry *entry;
3403 int cpu;
3538 3404
3539 d_tracer = tracing_init_dentry(); 3405 d_tracer = tracing_init_dentry();
3540 3406
@@ -3548,18 +3414,15 @@ static __init int tracer_init_debugfs(void)
3548 if (!entry) 3414 if (!entry)
3549 pr_warning("Could not create debugfs 'trace_options' entry\n"); 3415 pr_warning("Could not create debugfs 'trace_options' entry\n");
3550 3416
3417 create_trace_options_dir();
3418
3551 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3419 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
3552 NULL, &tracing_cpumask_fops); 3420 NULL, &tracing_cpumask_fops);
3553 if (!entry) 3421 if (!entry)
3554 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); 3422 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
3555 3423
3556 entry = debugfs_create_file("latency_trace", 0444, d_tracer,
3557 &global_trace, &tracing_lt_fops);
3558 if (!entry)
3559 pr_warning("Could not create debugfs 'latency_trace' entry\n");
3560
3561 entry = debugfs_create_file("trace", 0444, d_tracer, 3424 entry = debugfs_create_file("trace", 0444, d_tracer,
3562 &global_trace, &tracing_fops); 3425 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
3563 if (!entry) 3426 if (!entry)
3564 pr_warning("Could not create debugfs 'trace' entry\n"); 3427 pr_warning("Could not create debugfs 'trace' entry\n");
3565 3428
@@ -3590,8 +3453,8 @@ static __init int tracer_init_debugfs(void)
3590 if (!entry) 3453 if (!entry)
3591 pr_warning("Could not create debugfs 'README' entry\n"); 3454 pr_warning("Could not create debugfs 'README' entry\n");
3592 3455
3593 entry = debugfs_create_file("trace_pipe", 0644, d_tracer, 3456 entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
3594 NULL, &tracing_pipe_fops); 3457 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
3595 if (!entry) 3458 if (!entry)
3596 pr_warning("Could not create debugfs " 3459 pr_warning("Could not create debugfs "
3597 "'trace_pipe' entry\n"); 3460 "'trace_pipe' entry\n");
@@ -3619,6 +3482,10 @@ static __init int tracer_init_debugfs(void)
3619#ifdef CONFIG_SYSPROF_TRACER 3482#ifdef CONFIG_SYSPROF_TRACER
3620 init_tracer_sysprof_debugfs(d_tracer); 3483 init_tracer_sysprof_debugfs(d_tracer);
3621#endif 3484#endif
3485
3486 for_each_tracing_cpu(cpu)
3487 tracing_init_debugfs_percpu(cpu);
3488
3622 return 0; 3489 return 0;
3623} 3490}
3624 3491
@@ -3653,18 +3520,16 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3653 trace_buf[len] = 0; 3520 trace_buf[len] = 0;
3654 3521
3655 size = sizeof(*entry) + len + 1; 3522 size = sizeof(*entry) + len + 1;
3656 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags); 3523 event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
3657 if (!event) 3524 if (!event)
3658 goto out_unlock; 3525 goto out_unlock;
3659 entry = ring_buffer_event_data(event); 3526 entry = ring_buffer_event_data(event);
3660 tracing_generic_entry_update(&entry->ent, irq_flags, pc);
3661 entry->ent.type = TRACE_PRINT;
3662 entry->ip = ip; 3527 entry->ip = ip;
3663 entry->depth = depth; 3528 entry->depth = depth;
3664 3529
3665 memcpy(&entry->buf, trace_buf, len); 3530 memcpy(&entry->buf, trace_buf, len);
3666 entry->buf[len] = 0; 3531 entry->buf[len] = 0;
3667 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 3532 ring_buffer_unlock_commit(tr->buffer, event);
3668 3533
3669 out_unlock: 3534 out_unlock:
3670 spin_unlock_irqrestore(&trace_buf_lock, irq_flags); 3535 spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
@@ -3691,6 +3556,15 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3691} 3556}
3692EXPORT_SYMBOL_GPL(__ftrace_printk); 3557EXPORT_SYMBOL_GPL(__ftrace_printk);
3693 3558
3559int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
3560{
3561 if (!(trace_flags & TRACE_ITER_PRINTK))
3562 return 0;
3563
3564 return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3565}
3566EXPORT_SYMBOL_GPL(__ftrace_vprintk);
3567
3694static int trace_panic_handler(struct notifier_block *this, 3568static int trace_panic_handler(struct notifier_block *this,
3695 unsigned long event, void *unused) 3569 unsigned long event, void *unused)
3696{ 3570{
@@ -3750,7 +3624,7 @@ trace_printk_seq(struct trace_seq *s)
3750 3624
3751 printk(KERN_TRACE "%s", s->buffer); 3625 printk(KERN_TRACE "%s", s->buffer);
3752 3626
3753 trace_seq_reset(s); 3627 trace_seq_init(s);
3754} 3628}
3755 3629
3756void ftrace_dump(void) 3630void ftrace_dump(void)
@@ -3835,8 +3709,12 @@ __init static int tracer_alloc_buffers(void)
3835 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 3709 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
3836 goto out_free_buffer_mask; 3710 goto out_free_buffer_mask;
3837 3711
3712 if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
3713 goto out_free_tracing_cpumask;
3714
3838 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 3715 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
3839 cpumask_copy(tracing_cpumask, cpu_all_mask); 3716 cpumask_copy(tracing_cpumask, cpu_all_mask);
3717 cpumask_clear(tracing_reader_cpumask);
3840 3718
3841 /* TODO: make the number of buffers hot pluggable with CPUS */ 3719 /* TODO: make the number of buffers hot pluggable with CPUS */
3842 global_trace.buffer = ring_buffer_alloc(trace_buf_size, 3720 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
@@ -3871,14 +3749,10 @@ __init static int tracer_alloc_buffers(void)
3871 trace_init_cmdlines(); 3749 trace_init_cmdlines();
3872 3750
3873 register_tracer(&nop_trace); 3751 register_tracer(&nop_trace);
3752 current_trace = &nop_trace;
3874#ifdef CONFIG_BOOT_TRACER 3753#ifdef CONFIG_BOOT_TRACER
3875 register_tracer(&boot_tracer); 3754 register_tracer(&boot_tracer);
3876 current_trace = &boot_tracer;
3877 current_trace->init(&global_trace);
3878#else
3879 current_trace = &nop_trace;
3880#endif 3755#endif
3881
3882 /* All seems OK, enable tracing */ 3756 /* All seems OK, enable tracing */
3883 tracing_disabled = 0; 3757 tracing_disabled = 0;
3884 3758
@@ -3889,11 +3763,34 @@ __init static int tracer_alloc_buffers(void)
3889 ret = 0; 3763 ret = 0;
3890 3764
3891out_free_cpumask: 3765out_free_cpumask:
3766 free_cpumask_var(tracing_reader_cpumask);
3767out_free_tracing_cpumask:
3892 free_cpumask_var(tracing_cpumask); 3768 free_cpumask_var(tracing_cpumask);
3893out_free_buffer_mask: 3769out_free_buffer_mask:
3894 free_cpumask_var(tracing_buffer_mask); 3770 free_cpumask_var(tracing_buffer_mask);
3895out: 3771out:
3896 return ret; 3772 return ret;
3897} 3773}
3774
3775__init static int clear_boot_tracer(void)
3776{
3777 /*
3778 * The default tracer at boot buffer is an init section.
3779 * This function is called in lateinit. If we did not
3780 * find the boot tracer, then clear it out, to prevent
3781 * later registration from accessing the buffer that is
3782 * about to be freed.
3783 */
3784 if (!default_bootup_tracer)
3785 return 0;
3786
3787 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
3788 default_bootup_tracer);
3789 default_bootup_tracer = NULL;
3790
3791 return 0;
3792}
3793
3898early_initcall(tracer_alloc_buffers); 3794early_initcall(tracer_alloc_buffers);
3899fs_initcall(tracer_init_debugfs); 3795fs_initcall(tracer_init_debugfs);
3796late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381bfd95..e606633fb498 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,8 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h> 11#include <trace/boot.h>
12#include <trace/kmemtrace.h>
13#include <trace/power.h>
12 14
13enum trace_type { 15enum trace_type {
14 __TRACE_FIRST_TYPE = 0, 16 __TRACE_FIRST_TYPE = 0,
@@ -16,7 +18,6 @@ enum trace_type {
16 TRACE_FN, 18 TRACE_FN,
17 TRACE_CTX, 19 TRACE_CTX,
18 TRACE_WAKE, 20 TRACE_WAKE,
19 TRACE_CONT,
20 TRACE_STACK, 21 TRACE_STACK,
21 TRACE_PRINT, 22 TRACE_PRINT,
22 TRACE_SPECIAL, 23 TRACE_SPECIAL,
@@ -29,9 +30,12 @@ enum trace_type {
29 TRACE_GRAPH_ENT, 30 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK, 31 TRACE_USER_STACK,
31 TRACE_HW_BRANCHES, 32 TRACE_HW_BRANCHES,
33 TRACE_KMEM_ALLOC,
34 TRACE_KMEM_FREE,
32 TRACE_POWER, 35 TRACE_POWER,
36 TRACE_BLK,
33 37
34 __TRACE_LAST_TYPE 38 __TRACE_LAST_TYPE,
35}; 39};
36 40
37/* 41/*
@@ -42,7 +46,6 @@ enum trace_type {
42 */ 46 */
43struct trace_entry { 47struct trace_entry {
44 unsigned char type; 48 unsigned char type;
45 unsigned char cpu;
46 unsigned char flags; 49 unsigned char flags;
47 unsigned char preempt_count; 50 unsigned char preempt_count;
48 int pid; 51 int pid;
@@ -60,13 +63,13 @@ struct ftrace_entry {
60 63
61/* Function call entry */ 64/* Function call entry */
62struct ftrace_graph_ent_entry { 65struct ftrace_graph_ent_entry {
63 struct trace_entry ent; 66 struct trace_entry ent;
64 struct ftrace_graph_ent graph_ent; 67 struct ftrace_graph_ent graph_ent;
65}; 68};
66 69
67/* Function return entry */ 70/* Function return entry */
68struct ftrace_graph_ret_entry { 71struct ftrace_graph_ret_entry {
69 struct trace_entry ent; 72 struct trace_entry ent;
70 struct ftrace_graph_ret ret; 73 struct ftrace_graph_ret ret;
71}; 74};
72extern struct tracer boot_tracer; 75extern struct tracer boot_tracer;
@@ -170,6 +173,24 @@ struct trace_power {
170 struct power_trace state_data; 173 struct power_trace state_data;
171}; 174};
172 175
176struct kmemtrace_alloc_entry {
177 struct trace_entry ent;
178 enum kmemtrace_type_id type_id;
179 unsigned long call_site;
180 const void *ptr;
181 size_t bytes_req;
182 size_t bytes_alloc;
183 gfp_t gfp_flags;
184 int node;
185};
186
187struct kmemtrace_free_entry {
188 struct trace_entry ent;
189 enum kmemtrace_type_id type_id;
190 unsigned long call_site;
191 const void *ptr;
192};
193
173/* 194/*
174 * trace_flag_type is an enumeration that holds different 195 * trace_flag_type is an enumeration that holds different
175 * states when a trace occurs. These are: 196 * states when a trace occurs. These are:
@@ -178,7 +199,6 @@ struct trace_power {
178 * NEED_RESCED - reschedule is requested 199 * NEED_RESCED - reschedule is requested
179 * HARDIRQ - inside an interrupt handler 200 * HARDIRQ - inside an interrupt handler
180 * SOFTIRQ - inside a softirq handler 201 * SOFTIRQ - inside a softirq handler
181 * CONT - multiple entries hold the trace item
182 */ 202 */
183enum trace_flag_type { 203enum trace_flag_type {
184 TRACE_FLAG_IRQS_OFF = 0x01, 204 TRACE_FLAG_IRQS_OFF = 0x01,
@@ -186,7 +206,6 @@ enum trace_flag_type {
186 TRACE_FLAG_NEED_RESCHED = 0x04, 206 TRACE_FLAG_NEED_RESCHED = 0x04,
187 TRACE_FLAG_HARDIRQ = 0x08, 207 TRACE_FLAG_HARDIRQ = 0x08,
188 TRACE_FLAG_SOFTIRQ = 0x10, 208 TRACE_FLAG_SOFTIRQ = 0x10,
189 TRACE_FLAG_CONT = 0x20,
190}; 209};
191 210
192#define TRACE_BUF_SIZE 1024 211#define TRACE_BUF_SIZE 1024
@@ -262,7 +281,6 @@ extern void __ftrace_bad_type(void);
262 do { \ 281 do { \
263 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ 282 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
264 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 283 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
265 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
266 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 284 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
267 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ 285 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
268 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 286 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
@@ -280,6 +298,10 @@ extern void __ftrace_bad_type(void);
280 TRACE_GRAPH_RET); \ 298 TRACE_GRAPH_RET); \
281 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ 299 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
282 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ 300 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
301 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
302 TRACE_KMEM_ALLOC); \
303 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
304 TRACE_KMEM_FREE); \
283 __ftrace_bad_type(); \ 305 __ftrace_bad_type(); \
284 } while (0) 306 } while (0)
285 307
@@ -287,7 +309,8 @@ extern void __ftrace_bad_type(void);
287enum print_line_t { 309enum print_line_t {
288 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ 310 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
289 TRACE_TYPE_HANDLED = 1, 311 TRACE_TYPE_HANDLED = 1,
290 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 312 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
313 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
291}; 314};
292 315
293 316
@@ -313,22 +336,45 @@ struct tracer_flags {
313/* Makes more easy to define a tracer opt */ 336/* Makes more easy to define a tracer opt */
314#define TRACER_OPT(s, b) .name = #s, .bit = b 337#define TRACER_OPT(s, b) .name = #s, .bit = b
315 338
316/* 339
317 * A specific tracer, represented by methods that operate on a trace array: 340/**
341 * struct tracer - a specific tracer and its callbacks to interact with debugfs
342 * @name: the name chosen to select it on the available_tracers file
343 * @init: called when one switches to this tracer (echo name > current_tracer)
344 * @reset: called when one switches to another tracer
345 * @start: called when tracing is unpaused (echo 1 > tracing_enabled)
346 * @stop: called when tracing is paused (echo 0 > tracing_enabled)
347 * @open: called when the trace file is opened
348 * @pipe_open: called when the trace_pipe file is opened
349 * @wait_pipe: override how the user waits for traces on trace_pipe
350 * @close: called when the trace file is released
351 * @read: override the default read callback on trace_pipe
352 * @splice_read: override the default splice_read callback on trace_pipe
353 * @selftest: selftest to run on boot (see trace_selftest.c)
354 * @print_headers: override the first lines that describe your columns
355 * @print_line: callback that prints a trace
356 * @set_flag: signals one of your private flags changed (trace_options file)
357 * @flags: your private flags
318 */ 358 */
319struct tracer { 359struct tracer {
320 const char *name; 360 const char *name;
321 /* Your tracer should raise a warning if init fails */
322 int (*init)(struct trace_array *tr); 361 int (*init)(struct trace_array *tr);
323 void (*reset)(struct trace_array *tr); 362 void (*reset)(struct trace_array *tr);
324 void (*start)(struct trace_array *tr); 363 void (*start)(struct trace_array *tr);
325 void (*stop)(struct trace_array *tr); 364 void (*stop)(struct trace_array *tr);
326 void (*open)(struct trace_iterator *iter); 365 void (*open)(struct trace_iterator *iter);
327 void (*pipe_open)(struct trace_iterator *iter); 366 void (*pipe_open)(struct trace_iterator *iter);
367 void (*wait_pipe)(struct trace_iterator *iter);
328 void (*close)(struct trace_iterator *iter); 368 void (*close)(struct trace_iterator *iter);
329 ssize_t (*read)(struct trace_iterator *iter, 369 ssize_t (*read)(struct trace_iterator *iter,
330 struct file *filp, char __user *ubuf, 370 struct file *filp, char __user *ubuf,
331 size_t cnt, loff_t *ppos); 371 size_t cnt, loff_t *ppos);
372 ssize_t (*splice_read)(struct trace_iterator *iter,
373 struct file *filp,
374 loff_t *ppos,
375 struct pipe_inode_info *pipe,
376 size_t len,
377 unsigned int flags);
332#ifdef CONFIG_FTRACE_STARTUP_TEST 378#ifdef CONFIG_FTRACE_STARTUP_TEST
333 int (*selftest)(struct tracer *trace, 379 int (*selftest)(struct tracer *trace,
334 struct trace_array *tr); 380 struct trace_array *tr);
@@ -340,6 +386,7 @@ struct tracer {
340 struct tracer *next; 386 struct tracer *next;
341 int print_max; 387 int print_max;
342 struct tracer_flags *flags; 388 struct tracer_flags *flags;
389 struct tracer_stat *stats;
343}; 390};
344 391
345struct trace_seq { 392struct trace_seq {
@@ -348,6 +395,16 @@ struct trace_seq {
348 unsigned int readpos; 395 unsigned int readpos;
349}; 396};
350 397
398static inline void
399trace_seq_init(struct trace_seq *s)
400{
401 s->len = 0;
402 s->readpos = 0;
403}
404
405
406#define TRACE_PIPE_ALL_CPU -1
407
351/* 408/*
352 * Trace iterator - used by printout routines who present trace 409 * Trace iterator - used by printout routines who present trace
353 * results to users and which routines might sleep, etc: 410 * results to users and which routines might sleep, etc:
@@ -356,6 +413,8 @@ struct trace_iterator {
356 struct trace_array *tr; 413 struct trace_array *tr;
357 struct tracer *trace; 414 struct tracer *trace;
358 void *private; 415 void *private;
416 int cpu_file;
417 struct mutex mutex;
359 struct ring_buffer_iter *buffer_iter[NR_CPUS]; 418 struct ring_buffer_iter *buffer_iter[NR_CPUS];
360 419
361 /* The below is zeroed out in pipe_read */ 420 /* The below is zeroed out in pipe_read */
@@ -371,6 +430,7 @@ struct trace_iterator {
371 cpumask_var_t started; 430 cpumask_var_t started;
372}; 431};
373 432
433int tracer_init(struct tracer *t, struct trace_array *tr);
374int tracing_is_enabled(void); 434int tracing_is_enabled(void);
375void trace_wake_up(void); 435void trace_wake_up(void);
376void tracing_reset(struct trace_array *tr, int cpu); 436void tracing_reset(struct trace_array *tr, int cpu);
@@ -379,26 +439,48 @@ int tracing_open_generic(struct inode *inode, struct file *filp);
379struct dentry *tracing_init_dentry(void); 439struct dentry *tracing_init_dentry(void);
380void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 440void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
381 441
442struct ring_buffer_event;
443
444struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
445 unsigned char type,
446 unsigned long len,
447 unsigned long flags,
448 int pc);
449void trace_buffer_unlock_commit(struct trace_array *tr,
450 struct ring_buffer_event *event,
451 unsigned long flags, int pc);
452
453struct ring_buffer_event *
454trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
455 unsigned long flags, int pc);
456void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
457 unsigned long flags, int pc);
458
382struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 459struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
383 struct trace_array_cpu *data); 460 struct trace_array_cpu *data);
461
462struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
463 int *ent_cpu, u64 *ent_ts);
464
384void tracing_generic_entry_update(struct trace_entry *entry, 465void tracing_generic_entry_update(struct trace_entry *entry,
385 unsigned long flags, 466 unsigned long flags,
386 int pc); 467 int pc);
387 468
469void default_wait_pipe(struct trace_iterator *iter);
470void poll_wait_pipe(struct trace_iterator *iter);
471
388void ftrace(struct trace_array *tr, 472void ftrace(struct trace_array *tr,
389 struct trace_array_cpu *data, 473 struct trace_array_cpu *data,
390 unsigned long ip, 474 unsigned long ip,
391 unsigned long parent_ip, 475 unsigned long parent_ip,
392 unsigned long flags, int pc); 476 unsigned long flags, int pc);
393void tracing_sched_switch_trace(struct trace_array *tr, 477void tracing_sched_switch_trace(struct trace_array *tr,
394 struct trace_array_cpu *data,
395 struct task_struct *prev, 478 struct task_struct *prev,
396 struct task_struct *next, 479 struct task_struct *next,
397 unsigned long flags, int pc); 480 unsigned long flags, int pc);
398void tracing_record_cmdline(struct task_struct *tsk); 481void tracing_record_cmdline(struct task_struct *tsk);
399 482
400void tracing_sched_wakeup_trace(struct trace_array *tr, 483void tracing_sched_wakeup_trace(struct trace_array *tr,
401 struct trace_array_cpu *data,
402 struct task_struct *wakee, 484 struct task_struct *wakee,
403 struct task_struct *cur, 485 struct task_struct *cur,
404 unsigned long flags, int pc); 486 unsigned long flags, int pc);
@@ -408,14 +490,12 @@ void trace_special(struct trace_array *tr,
408 unsigned long arg2, 490 unsigned long arg2,
409 unsigned long arg3, int pc); 491 unsigned long arg3, int pc);
410void trace_function(struct trace_array *tr, 492void trace_function(struct trace_array *tr,
411 struct trace_array_cpu *data,
412 unsigned long ip, 493 unsigned long ip,
413 unsigned long parent_ip, 494 unsigned long parent_ip,
414 unsigned long flags, int pc); 495 unsigned long flags, int pc);
415 496
416void trace_graph_return(struct ftrace_graph_ret *trace); 497void trace_graph_return(struct ftrace_graph_ret *trace);
417int trace_graph_entry(struct ftrace_graph_ent *trace); 498int trace_graph_entry(struct ftrace_graph_ent *trace);
418void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
419 499
420void tracing_start_cmdline_record(void); 500void tracing_start_cmdline_record(void);
421void tracing_stop_cmdline_record(void); 501void tracing_stop_cmdline_record(void);
@@ -434,15 +514,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
434void update_max_tr_single(struct trace_array *tr, 514void update_max_tr_single(struct trace_array *tr,
435 struct task_struct *tsk, int cpu); 515 struct task_struct *tsk, int cpu);
436 516
437extern cycle_t ftrace_now(int cpu); 517void __trace_stack(struct trace_array *tr,
518 unsigned long flags,
519 int skip, int pc);
438 520
439#ifdef CONFIG_FUNCTION_TRACER 521extern cycle_t ftrace_now(int cpu);
440void tracing_start_function_trace(void);
441void tracing_stop_function_trace(void);
442#else
443# define tracing_start_function_trace() do { } while (0)
444# define tracing_stop_function_trace() do { } while (0)
445#endif
446 522
447#ifdef CONFIG_CONTEXT_SWITCH_TRACER 523#ifdef CONFIG_CONTEXT_SWITCH_TRACER
448typedef void 524typedef void
@@ -456,10 +532,10 @@ struct tracer_switch_ops {
456 void *private; 532 void *private;
457 struct tracer_switch_ops *next; 533 struct tracer_switch_ops *next;
458}; 534};
459
460char *trace_find_cmdline(int pid);
461#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 535#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
462 536
537extern char *trace_find_cmdline(int pid);
538
463#ifdef CONFIG_DYNAMIC_FTRACE 539#ifdef CONFIG_DYNAMIC_FTRACE
464extern unsigned long ftrace_update_tot_cnt; 540extern unsigned long ftrace_update_tot_cnt;
465#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func 541#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -469,6 +545,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
469#ifdef CONFIG_FTRACE_STARTUP_TEST 545#ifdef CONFIG_FTRACE_STARTUP_TEST
470extern int trace_selftest_startup_function(struct tracer *trace, 546extern int trace_selftest_startup_function(struct tracer *trace,
471 struct trace_array *tr); 547 struct trace_array *tr);
548extern int trace_selftest_startup_function_graph(struct tracer *trace,
549 struct trace_array *tr);
472extern int trace_selftest_startup_irqsoff(struct tracer *trace, 550extern int trace_selftest_startup_irqsoff(struct tracer *trace,
473 struct trace_array *tr); 551 struct trace_array *tr);
474extern int trace_selftest_startup_preemptoff(struct tracer *trace, 552extern int trace_selftest_startup_preemptoff(struct tracer *trace,
@@ -488,15 +566,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
488#endif /* CONFIG_FTRACE_STARTUP_TEST */ 566#endif /* CONFIG_FTRACE_STARTUP_TEST */
489 567
490extern void *head_page(struct trace_array_cpu *data); 568extern void *head_page(struct trace_array_cpu *data);
491extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
492extern void trace_seq_print_cont(struct trace_seq *s,
493 struct trace_iterator *iter);
494
495extern int
496seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
497 unsigned long sym_flags);
498extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
499 size_t cnt);
500extern long ns2usecs(cycle_t nsec); 569extern long ns2usecs(cycle_t nsec);
501extern int 570extern int
502trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); 571trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
@@ -580,7 +649,8 @@ enum trace_iterator_flags {
580 TRACE_ITER_ANNOTATE = 0x2000, 649 TRACE_ITER_ANNOTATE = 0x2000,
581 TRACE_ITER_USERSTACKTRACE = 0x4000, 650 TRACE_ITER_USERSTACKTRACE = 0x4000,
582 TRACE_ITER_SYM_USEROBJ = 0x8000, 651 TRACE_ITER_SYM_USEROBJ = 0x8000,
583 TRACE_ITER_PRINTK_MSGONLY = 0x10000 652 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
653 TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */
584}; 654};
585 655
586/* 656/*
@@ -601,12 +671,12 @@ extern struct tracer nop_trace;
601 * preempt_enable (after a disable), a schedule might take place 671 * preempt_enable (after a disable), a schedule might take place
602 * causing an infinite recursion. 672 * causing an infinite recursion.
603 * 673 *
604 * To prevent this, we read the need_recshed flag before 674 * To prevent this, we read the need_resched flag before
605 * disabling preemption. When we want to enable preemption we 675 * disabling preemption. When we want to enable preemption we
606 * check the flag, if it is set, then we call preempt_enable_no_resched. 676 * check the flag, if it is set, then we call preempt_enable_no_resched.
607 * Otherwise, we call preempt_enable. 677 * Otherwise, we call preempt_enable.
608 * 678 *
609 * The rational for doing the above is that if need resched is set 679 * The rational for doing the above is that if need_resched is set
610 * and we have yet to reschedule, we are either in an atomic location 680 * and we have yet to reschedule, we are either in an atomic location
611 * (where we do not need to check for scheduling) or we are inside 681 * (where we do not need to check for scheduling) or we are inside
612 * the scheduler and do not want to resched. 682 * the scheduler and do not want to resched.
@@ -627,7 +697,7 @@ static inline int ftrace_preempt_disable(void)
627 * 697 *
628 * This is a scheduler safe way to enable preemption and not miss 698 * This is a scheduler safe way to enable preemption and not miss
629 * any preemption checks. The disabled saved the state of preemption. 699 * any preemption checks. The disabled saved the state of preemption.
630 * If resched is set, then we were either inside an atomic or 700 * If resched is set, then we are either inside an atomic or
631 * are inside the scheduler (we would have already scheduled 701 * are inside the scheduler (we would have already scheduled
632 * otherwise). In this case, we do not want to call normal 702 * otherwise). In this case, we do not want to call normal
633 * preempt_enable, but preempt_enable_no_resched instead. 703 * preempt_enable, but preempt_enable_no_resched instead.
@@ -664,4 +734,31 @@ static inline void trace_branch_disable(void)
664} 734}
665#endif /* CONFIG_BRANCH_TRACER */ 735#endif /* CONFIG_BRANCH_TRACER */
666 736
737/* trace event type bit fields, not numeric */
738enum {
739 TRACE_EVENT_TYPE_PRINTF = 1,
740 TRACE_EVENT_TYPE_RAW = 2,
741};
742
743struct ftrace_event_call {
744 char *name;
745 char *system;
746 struct dentry *dir;
747 int enabled;
748 int (*regfunc)(void);
749 void (*unregfunc)(void);
750 int id;
751 struct dentry *raw_dir;
752 int raw_enabled;
753 int type;
754 int (*raw_init)(void);
755 int (*raw_reg)(void);
756 void (*raw_unreg)(void);
757 int (*show_format)(struct trace_seq *s);
758};
759
760void event_trace_printk(unsigned long ip, const char *fmt, ...);
761extern struct ftrace_event_call __start_ftrace_events[];
762extern struct ftrace_event_call __stop_ftrace_events[];
763
667#endif /* _LINUX_KERNEL_TRACE_H */ 764#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 366c8c333e13..7a30fc4c3642 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -11,6 +11,7 @@
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12 12
13#include "trace.h" 13#include "trace.h"
14#include "trace_output.h"
14 15
15static struct trace_array *boot_trace; 16static struct trace_array *boot_trace;
16static bool pre_initcalls_finished; 17static bool pre_initcalls_finished;
@@ -27,13 +28,13 @@ void start_boot_trace(void)
27 28
28void enable_boot_trace(void) 29void enable_boot_trace(void)
29{ 30{
30 if (pre_initcalls_finished) 31 if (boot_trace && pre_initcalls_finished)
31 tracing_start_sched_switch_record(); 32 tracing_start_sched_switch_record();
32} 33}
33 34
34void disable_boot_trace(void) 35void disable_boot_trace(void)
35{ 36{
36 if (pre_initcalls_finished) 37 if (boot_trace && pre_initcalls_finished)
37 tracing_stop_sched_switch_record(); 38 tracing_stop_sched_switch_record();
38} 39}
39 40
@@ -42,6 +43,9 @@ static int boot_trace_init(struct trace_array *tr)
42 int cpu; 43 int cpu;
43 boot_trace = tr; 44 boot_trace = tr;
44 45
46 if (!tr)
47 return 0;
48
45 for_each_cpu(cpu, cpu_possible_mask) 49 for_each_cpu(cpu, cpu_possible_mask)
46 tracing_reset(tr, cpu); 50 tracing_reset(tr, cpu);
47 51
@@ -128,10 +132,9 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
128{ 132{
129 struct ring_buffer_event *event; 133 struct ring_buffer_event *event;
130 struct trace_boot_call *entry; 134 struct trace_boot_call *entry;
131 unsigned long irq_flags;
132 struct trace_array *tr = boot_trace; 135 struct trace_array *tr = boot_trace;
133 136
134 if (!pre_initcalls_finished) 137 if (!tr || !pre_initcalls_finished)
135 return; 138 return;
136 139
137 /* Get its name now since this function could 140 /* Get its name now since this function could
@@ -140,18 +143,13 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
140 sprint_symbol(bt->func, (unsigned long)fn); 143 sprint_symbol(bt->func, (unsigned long)fn);
141 preempt_disable(); 144 preempt_disable();
142 145
143 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 146 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
144 &irq_flags); 147 sizeof(*entry), 0, 0);
145 if (!event) 148 if (!event)
146 goto out; 149 goto out;
147 entry = ring_buffer_event_data(event); 150 entry = ring_buffer_event_data(event);
148 tracing_generic_entry_update(&entry->ent, 0, 0);
149 entry->ent.type = TRACE_BOOT_CALL;
150 entry->boot_call = *bt; 151 entry->boot_call = *bt;
151 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 152 trace_buffer_unlock_commit(tr, event, 0, 0);
152
153 trace_wake_up();
154
155 out: 153 out:
156 preempt_enable(); 154 preempt_enable();
157} 155}
@@ -160,27 +158,21 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
160{ 158{
161 struct ring_buffer_event *event; 159 struct ring_buffer_event *event;
162 struct trace_boot_ret *entry; 160 struct trace_boot_ret *entry;
163 unsigned long irq_flags;
164 struct trace_array *tr = boot_trace; 161 struct trace_array *tr = boot_trace;
165 162
166 if (!pre_initcalls_finished) 163 if (!tr || !pre_initcalls_finished)
167 return; 164 return;
168 165
169 sprint_symbol(bt->func, (unsigned long)fn); 166 sprint_symbol(bt->func, (unsigned long)fn);
170 preempt_disable(); 167 preempt_disable();
171 168
172 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 169 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
173 &irq_flags); 170 sizeof(*entry), 0, 0);
174 if (!event) 171 if (!event)
175 goto out; 172 goto out;
176 entry = ring_buffer_event_data(event); 173 entry = ring_buffer_event_data(event);
177 tracing_generic_entry_update(&entry->ent, 0, 0);
178 entry->ent.type = TRACE_BOOT_RET;
179 entry->boot_ret = *bt; 174 entry->boot_ret = *bt;
180 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 175 trace_buffer_unlock_commit(tr, event, 0, 0);
181
182 trace_wake_up();
183
184 out: 176 out:
185 preempt_enable(); 177 preempt_enable();
186} 178}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 6c00feb3bac7..c2e68d440c4d 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -14,12 +14,17 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <asm/local.h> 16#include <asm/local.h>
17
17#include "trace.h" 18#include "trace.h"
19#include "trace_stat.h"
20#include "trace_output.h"
18 21
19#ifdef CONFIG_BRANCH_TRACER 22#ifdef CONFIG_BRANCH_TRACER
20 23
24static struct tracer branch_trace;
21static int branch_tracing_enabled __read_mostly; 25static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex); 26static DEFINE_MUTEX(branch_tracing_mutex);
27
23static struct trace_array *branch_tracer; 28static struct trace_array *branch_tracer;
24 29
25static void 30static void
@@ -28,7 +33,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
28 struct trace_array *tr = branch_tracer; 33 struct trace_array *tr = branch_tracer;
29 struct ring_buffer_event *event; 34 struct ring_buffer_event *event;
30 struct trace_branch *entry; 35 struct trace_branch *entry;
31 unsigned long flags, irq_flags; 36 unsigned long flags;
32 int cpu, pc; 37 int cpu, pc;
33 const char *p; 38 const char *p;
34 39
@@ -47,15 +52,13 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
47 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) 52 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
48 goto out; 53 goto out;
49 54
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 55 pc = preempt_count();
51 &irq_flags); 56 event = trace_buffer_lock_reserve(tr, TRACE_BRANCH,
57 sizeof(*entry), flags, pc);
52 if (!event) 58 if (!event)
53 goto out; 59 goto out;
54 60
55 pc = preempt_count();
56 entry = ring_buffer_event_data(event); 61 entry = ring_buffer_event_data(event);
57 tracing_generic_entry_update(&entry->ent, flags, pc);
58 entry->ent.type = TRACE_BRANCH;
59 62
60 /* Strip off the path, only save the file */ 63 /* Strip off the path, only save the file */
61 p = f->file + strlen(f->file); 64 p = f->file + strlen(f->file);
@@ -70,7 +73,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
70 entry->line = f->line; 73 entry->line = f->line;
71 entry->correct = val == expect; 74 entry->correct = val == expect;
72 75
73 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 76 ring_buffer_unlock_commit(tr->buffer, event);
74 77
75 out: 78 out:
76 atomic_dec(&tr->data[cpu]->disabled); 79 atomic_dec(&tr->data[cpu]->disabled);
@@ -88,8 +91,6 @@ void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
88 91
89int enable_branch_tracing(struct trace_array *tr) 92int enable_branch_tracing(struct trace_array *tr)
90{ 93{
91 int ret = 0;
92
93 mutex_lock(&branch_tracing_mutex); 94 mutex_lock(&branch_tracing_mutex);
94 branch_tracer = tr; 95 branch_tracer = tr;
95 /* 96 /*
@@ -100,7 +101,7 @@ int enable_branch_tracing(struct trace_array *tr)
100 branch_tracing_enabled++; 101 branch_tracing_enabled++;
101 mutex_unlock(&branch_tracing_mutex); 102 mutex_unlock(&branch_tracing_mutex);
102 103
103 return ret; 104 return 0;
104} 105}
105 106
106void disable_branch_tracing(void) 107void disable_branch_tracing(void)
@@ -128,11 +129,6 @@ static void stop_branch_trace(struct trace_array *tr)
128 129
129static int branch_trace_init(struct trace_array *tr) 130static int branch_trace_init(struct trace_array *tr)
130{ 131{
131 int cpu;
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr); 132 start_branch_trace(tr);
137 return 0; 133 return 0;
138} 134}
@@ -142,22 +138,54 @@ static void branch_trace_reset(struct trace_array *tr)
142 stop_branch_trace(tr); 138 stop_branch_trace(tr);
143} 139}
144 140
145struct tracer branch_trace __read_mostly = 141static enum print_line_t trace_branch_print(struct trace_iterator *iter,
142 int flags)
143{
144 struct trace_branch *field;
145
146 trace_assign_type(field, iter->ent);
147
148 if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n",
149 field->correct ? " ok " : " MISS ",
150 field->func,
151 field->file,
152 field->line))
153 return TRACE_TYPE_PARTIAL_LINE;
154
155 return TRACE_TYPE_HANDLED;
156}
157
158
159static struct trace_event trace_branch_event = {
160 .type = TRACE_BRANCH,
161 .trace = trace_branch_print,
162 .latency_trace = trace_branch_print,
163};
164
165static struct tracer branch_trace __read_mostly =
146{ 166{
147 .name = "branch", 167 .name = "branch",
148 .init = branch_trace_init, 168 .init = branch_trace_init,
149 .reset = branch_trace_reset, 169 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST 170#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch, 171 .selftest = trace_selftest_startup_branch,
152#endif 172#endif /* CONFIG_FTRACE_SELFTEST */
153}; 173};
154 174
155__init static int init_branch_trace(void) 175__init static int init_branch_tracer(void)
156{ 176{
177 int ret;
178
179 ret = register_ftrace_event(&trace_branch_event);
180 if (!ret) {
181 printk(KERN_WARNING "Warning: could not register "
182 "branch events\n");
183 return 1;
184 }
157 return register_tracer(&branch_trace); 185 return register_tracer(&branch_trace);
158} 186}
187device_initcall(init_branch_tracer);
159 188
160device_initcall(init_branch_trace);
161#else 189#else
162static inline 190static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) 191void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
@@ -183,66 +211,39 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
183} 211}
184EXPORT_SYMBOL(ftrace_likely_update); 212EXPORT_SYMBOL(ftrace_likely_update);
185 213
186struct ftrace_pointer { 214extern unsigned long __start_annotated_branch_profile[];
187 void *start; 215extern unsigned long __stop_annotated_branch_profile[];
188 void *stop;
189 int hit;
190};
191 216
192static void * 217static int annotated_branch_stat_headers(struct seq_file *m)
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{ 218{
195 const struct ftrace_pointer *f = m->private; 219 seq_printf(m, " correct incorrect %% ");
196 struct ftrace_branch_data *p = v; 220 seq_printf(m, " Function "
197 221 " File Line\n"
198 (*pos)++; 222 " ------- --------- - "
199 223 " -------- "
200 if (v == (void *)1) 224 " ---- ----\n");
201 return f->start; 225 return 0;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209} 226}
210 227
211static void *t_start(struct seq_file *m, loff_t *pos) 228static inline long get_incorrect_percent(struct ftrace_branch_data *p)
212{ 229{
213 void *t = (void *)1; 230 long percent;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218 231
219 return t; 232 if (p->correct) {
220} 233 percent = p->incorrect * 100;
234 percent /= p->correct + p->incorrect;
235 } else
236 percent = p->incorrect ? 100 : -1;
221 237
222static void t_stop(struct seq_file *m, void *p) 238 return percent;
223{
224} 239}
225 240
226static int t_show(struct seq_file *m, void *v) 241static int branch_stat_show(struct seq_file *m, void *v)
227{ 242{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v; 243 struct ftrace_branch_data *p = v;
230 const char *f; 244 const char *f;
231 long percent; 245 long percent;
232 246
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */ 247 /* Only print the file, not the path */
247 f = p->file + strlen(p->file); 248 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/') 249 while (f >= p->file && *f != '/')
@@ -252,11 +253,7 @@ static int t_show(struct seq_file *m, void *v)
252 /* 253 /*
253 * The miss is overlayed on correct, and hit on incorrect. 254 * The miss is overlayed on correct, and hit on incorrect.
254 */ 255 */
255 if (p->correct) { 256 percent = get_incorrect_percent(p);
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260 257
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); 258 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0) 259 if (percent < 0)
@@ -267,76 +264,118 @@ static int t_show(struct seq_file *m, void *v)
267 return 0; 264 return 0;
268} 265}
269 266
270static struct seq_operations tracing_likely_seq_ops = { 267static void *annotated_branch_stat_start(void)
271 .start = t_start, 268{
272 .next = t_next, 269 return __start_annotated_branch_profile;
273 .stop = t_stop, 270}
274 .show = t_show, 271
272static void *
273annotated_branch_stat_next(void *v, int idx)
274{
275 struct ftrace_branch_data *p = v;
276
277 ++p;
278
279 if ((void *)p >= (void *)__stop_annotated_branch_profile)
280 return NULL;
281
282 return p;
283}
284
285static int annotated_branch_stat_cmp(void *p1, void *p2)
286{
287 struct ftrace_branch_data *a = p1;
288 struct ftrace_branch_data *b = p2;
289
290 long percent_a, percent_b;
291
292 percent_a = get_incorrect_percent(a);
293 percent_b = get_incorrect_percent(b);
294
295 if (percent_a < percent_b)
296 return -1;
297 if (percent_a > percent_b)
298 return 1;
299 else
300 return 0;
301}
302
303static struct tracer_stat annotated_branch_stats = {
304 .name = "branch_annotated",
305 .stat_start = annotated_branch_stat_start,
306 .stat_next = annotated_branch_stat_next,
307 .stat_cmp = annotated_branch_stat_cmp,
308 .stat_headers = annotated_branch_stat_headers,
309 .stat_show = branch_stat_show
275}; 310};
276 311
277static int tracing_branch_open(struct inode *inode, struct file *file) 312__init static int init_annotated_branch_stats(void)
278{ 313{
279 int ret; 314 int ret;
280 315
281 ret = seq_open(file, &tracing_likely_seq_ops); 316 ret = register_stat_tracer(&annotated_branch_stats);
282 if (!ret) { 317 if (!ret) {
283 struct seq_file *m = file->private_data; 318 printk(KERN_WARNING "Warning: could not register "
284 m->private = (void *)inode->i_private; 319 "annotated branches stats\n");
320 return 1;
285 } 321 }
286 322 return 0;
287 return ret;
288} 323}
289 324fs_initcall(init_annotated_branch_stats);
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295 325
296#ifdef CONFIG_PROFILE_ALL_BRANCHES 326#ifdef CONFIG_PROFILE_ALL_BRANCHES
327
297extern unsigned long __start_branch_profile[]; 328extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[]; 329extern unsigned long __stop_branch_profile[];
299 330
300static const struct ftrace_pointer ftrace_branch_pos = { 331static int all_branch_stat_headers(struct seq_file *m)
301 .start = __start_branch_profile, 332{
302 .stop = __stop_branch_profile, 333 seq_printf(m, " miss hit %% ");
303 .hit = 1, 334 seq_printf(m, " Function "
304}; 335 " File Line\n"
336 " ------- --------- - "
337 " -------- "
338 " ---- ----\n");
339 return 0;
340}
305 341
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */ 342static void *all_branch_stat_start(void)
343{
344 return __start_branch_profile;
345}
307 346
308extern unsigned long __start_annotated_branch_profile[]; 347static void *
309extern unsigned long __stop_annotated_branch_profile[]; 348all_branch_stat_next(void *v, int idx)
349{
350 struct ftrace_branch_data *p = v;
310 351
311static const struct ftrace_pointer ftrace_annotated_branch_pos = { 352 ++p;
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315 353
316static __init int ftrace_branch_init(void) 354 if ((void *)p >= (void *)__stop_branch_profile)
317{ 355 return NULL;
318 struct dentry *d_tracer;
319 struct dentry *entry;
320 356
321 d_tracer = tracing_init_dentry(); 357 return p;
358}
322 359
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer, 360static struct tracer_stat all_branch_stats = {
324 (void *)&ftrace_annotated_branch_pos, 361 .name = "branch_all",
325 &tracing_branch_fops); 362 .stat_start = all_branch_stat_start,
326 if (!entry) 363 .stat_next = all_branch_stat_next,
327 pr_warning("Could not create debugfs " 364 .stat_headers = all_branch_stat_headers,
328 "'profile_annotatet_branch' entry\n"); 365 .stat_show = branch_stat_show
366};
329 367
330#ifdef CONFIG_PROFILE_ALL_BRANCHES 368__init static int all_annotated_branch_stats(void)
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer, 369{
332 (void *)&ftrace_branch_pos, 370 int ret;
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338 371
372 ret = register_stat_tracer(&all_branch_stats);
373 if (!ret) {
374 printk(KERN_WARNING "Warning: could not register "
375 "all branches stats\n");
376 return 1;
377 }
339 return 0; 378 return 0;
340} 379}
341 380fs_initcall(all_annotated_branch_stats);
342device_initcall(ftrace_branch_init); 381#endif /* CONFIG_PROFILE_ALL_BRANCHES */
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
new file mode 100644
index 000000000000..2d4953f93560
--- /dev/null
+++ b/kernel/trace/trace_clock.c
@@ -0,0 +1,101 @@
1/*
2 * tracing clocks
3 *
4 * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
5 *
6 * Implements 3 trace clock variants, with differing scalability/precision
7 * tradeoffs:
8 *
9 * - local: CPU-local trace clock
10 * - medium: scalable global clock with some jitter
11 * - global: globally monotonic, serialized clock
12 *
13 * Tracer plugins will chose a default from these clocks.
14 */
15#include <linux/spinlock.h>
16#include <linux/hardirq.h>
17#include <linux/module.h>
18#include <linux/percpu.h>
19#include <linux/sched.h>
20#include <linux/ktime.h>
21
22/*
23 * trace_clock_local(): the simplest and least coherent tracing clock.
24 *
25 * Useful for tracing that does not cross to other CPUs nor
26 * does it go through idle events.
27 */
28u64 notrace trace_clock_local(void)
29{
30 /*
31 * sched_clock() is an architecture implemented, fast, scalable,
32 * lockless clock. It is not guaranteed to be coherent across
33 * CPUs, nor across CPU idle events.
34 */
35 return sched_clock();
36}
37
38/*
39 * trace_clock(): 'inbetween' trace clock. Not completely serialized,
40 * but not completely incorrect when crossing CPUs either.
41 *
42 * This is based on cpu_clock(), which will allow at most ~1 jiffy of
43 * jitter between CPUs. So it's a pretty scalable clock, but there
44 * can be offsets in the trace data.
45 */
46u64 notrace trace_clock(void)
47{
48 return cpu_clock(raw_smp_processor_id());
49}
50
51
52/*
53 * trace_clock_global(): special globally coherent trace clock
54 *
55 * It has higher overhead than the other trace clocks but is still
56 * an order of magnitude faster than GTOD derived hardware clocks.
57 *
58 * Used by plugins that need globally coherent timestamps.
59 */
60
61static u64 prev_trace_clock_time;
62
63static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
64 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
65
66u64 notrace trace_clock_global(void)
67{
68 unsigned long flags;
69 int this_cpu;
70 u64 now;
71
72 raw_local_irq_save(flags);
73
74 this_cpu = raw_smp_processor_id();
75 now = cpu_clock(this_cpu);
76 /*
77 * If in an NMI context then dont risk lockups and return the
78 * cpu_clock() time:
79 */
80 if (unlikely(in_nmi()))
81 goto out;
82
83 __raw_spin_lock(&trace_clock_lock);
84
85 /*
86 * TODO: if this happens often then maybe we should reset
87 * my_scd->clock to prev_trace_clock_time+1, to make sure
88 * we start ticking with the local clock from now on?
89 */
90 if ((s64)(now - prev_trace_clock_time) < 0)
91 now = prev_trace_clock_time + 1;
92
93 prev_trace_clock_time = now;
94
95 __raw_spin_unlock(&trace_clock_lock);
96
97 out:
98 raw_local_irq_restore(flags);
99
100 return now;
101}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
new file mode 100644
index 000000000000..210e71ff82db
--- /dev/null
+++ b/kernel/trace/trace_events.c
@@ -0,0 +1,731 @@
1/*
2 * event tracer
3 *
4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5 *
6 * - Added format output of fields of the trace point.
7 * This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8 *
9 */
10
11#include <linux/debugfs.h>
12#include <linux/uaccess.h>
13#include <linux/module.h>
14#include <linux/ctype.h>
15
16#include "trace_output.h"
17
18#define TRACE_SYSTEM "TRACE_SYSTEM"
19
20static DEFINE_MUTEX(event_mutex);
21
22#define events_for_each(event) \
23 for (event = __start_ftrace_events; \
24 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
25 event++)
26
27void event_trace_printk(unsigned long ip, const char *fmt, ...)
28{
29 va_list ap;
30
31 va_start(ap, fmt);
32 tracing_record_cmdline(current);
33 trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
34 va_end(ap);
35}
36
37static void ftrace_clear_events(void)
38{
39 struct ftrace_event_call *call = (void *)__start_ftrace_events;
40
41
42 while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
43
44 if (call->enabled) {
45 call->enabled = 0;
46 call->unregfunc();
47 }
48 call++;
49 }
50}
51
52static void ftrace_event_enable_disable(struct ftrace_event_call *call,
53 int enable)
54{
55
56 switch (enable) {
57 case 0:
58 if (call->enabled) {
59 call->enabled = 0;
60 call->unregfunc();
61 }
62 if (call->raw_enabled) {
63 call->raw_enabled = 0;
64 call->raw_unreg();
65 }
66 break;
67 case 1:
68 if (!call->enabled &&
69 (call->type & TRACE_EVENT_TYPE_PRINTF)) {
70 call->enabled = 1;
71 call->regfunc();
72 }
73 if (!call->raw_enabled &&
74 (call->type & TRACE_EVENT_TYPE_RAW)) {
75 call->raw_enabled = 1;
76 call->raw_reg();
77 }
78 break;
79 }
80}
81
82static int ftrace_set_clr_event(char *buf, int set)
83{
84 struct ftrace_event_call *call = __start_ftrace_events;
85 char *event = NULL, *sub = NULL, *match;
86 int ret = -EINVAL;
87
88 /*
89 * The buf format can be <subsystem>:<event-name>
90 * *:<event-name> means any event by that name.
91 * :<event-name> is the same.
92 *
93 * <subsystem>:* means all events in that subsystem
94 * <subsystem>: means the same.
95 *
96 * <name> (no ':') means all events in a subsystem with
97 * the name <name> or any event that matches <name>
98 */
99
100 match = strsep(&buf, ":");
101 if (buf) {
102 sub = match;
103 event = buf;
104 match = NULL;
105
106 if (!strlen(sub) || strcmp(sub, "*") == 0)
107 sub = NULL;
108 if (!strlen(event) || strcmp(event, "*") == 0)
109 event = NULL;
110 }
111
112 mutex_lock(&event_mutex);
113 events_for_each(call) {
114
115 if (!call->name)
116 continue;
117
118 if (match &&
119 strcmp(match, call->name) != 0 &&
120 strcmp(match, call->system) != 0)
121 continue;
122
123 if (sub && strcmp(sub, call->system) != 0)
124 continue;
125
126 if (event && strcmp(event, call->name) != 0)
127 continue;
128
129 ftrace_event_enable_disable(call, set);
130
131 ret = 0;
132 }
133 mutex_unlock(&event_mutex);
134
135 return ret;
136}
137
138/* 128 should be much more than enough */
139#define EVENT_BUF_SIZE 127
140
141static ssize_t
142ftrace_event_write(struct file *file, const char __user *ubuf,
143 size_t cnt, loff_t *ppos)
144{
145 size_t read = 0;
146 int i, set = 1;
147 ssize_t ret;
148 char *buf;
149 char ch;
150
151 if (!cnt || cnt < 0)
152 return 0;
153
154 ret = get_user(ch, ubuf++);
155 if (ret)
156 return ret;
157 read++;
158 cnt--;
159
160 /* skip white space */
161 while (cnt && isspace(ch)) {
162 ret = get_user(ch, ubuf++);
163 if (ret)
164 return ret;
165 read++;
166 cnt--;
167 }
168
169 /* Only white space found? */
170 if (isspace(ch)) {
171 file->f_pos += read;
172 ret = read;
173 return ret;
174 }
175
176 buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
177 if (!buf)
178 return -ENOMEM;
179
180 if (cnt > EVENT_BUF_SIZE)
181 cnt = EVENT_BUF_SIZE;
182
183 i = 0;
184 while (cnt && !isspace(ch)) {
185 if (!i && ch == '!')
186 set = 0;
187 else
188 buf[i++] = ch;
189
190 ret = get_user(ch, ubuf++);
191 if (ret)
192 goto out_free;
193 read++;
194 cnt--;
195 }
196 buf[i] = 0;
197
198 file->f_pos += read;
199
200 ret = ftrace_set_clr_event(buf, set);
201 if (ret)
202 goto out_free;
203
204 ret = read;
205
206 out_free:
207 kfree(buf);
208
209 return ret;
210}
211
212static void *
213t_next(struct seq_file *m, void *v, loff_t *pos)
214{
215 struct ftrace_event_call *call = m->private;
216 struct ftrace_event_call *next = call;
217
218 (*pos)++;
219
220 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
221 return NULL;
222
223 m->private = ++next;
224
225 return call;
226}
227
228static void *t_start(struct seq_file *m, loff_t *pos)
229{
230 return t_next(m, NULL, pos);
231}
232
233static void *
234s_next(struct seq_file *m, void *v, loff_t *pos)
235{
236 struct ftrace_event_call *call = m->private;
237 struct ftrace_event_call *next;
238
239 (*pos)++;
240
241 retry:
242 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
243 return NULL;
244
245 if (!call->enabled) {
246 call++;
247 goto retry;
248 }
249
250 next = call;
251 m->private = ++next;
252
253 return call;
254}
255
256static void *s_start(struct seq_file *m, loff_t *pos)
257{
258 return s_next(m, NULL, pos);
259}
260
261static int t_show(struct seq_file *m, void *v)
262{
263 struct ftrace_event_call *call = v;
264
265 if (strcmp(call->system, TRACE_SYSTEM) != 0)
266 seq_printf(m, "%s:", call->system);
267 seq_printf(m, "%s\n", call->name);
268
269 return 0;
270}
271
272static void t_stop(struct seq_file *m, void *p)
273{
274}
275
276static int
277ftrace_event_seq_open(struct inode *inode, struct file *file)
278{
279 int ret;
280 const struct seq_operations *seq_ops;
281
282 if ((file->f_mode & FMODE_WRITE) &&
283 !(file->f_flags & O_APPEND))
284 ftrace_clear_events();
285
286 seq_ops = inode->i_private;
287 ret = seq_open(file, seq_ops);
288 if (!ret) {
289 struct seq_file *m = file->private_data;
290
291 m->private = __start_ftrace_events;
292 }
293 return ret;
294}
295
296static ssize_t
297event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
298 loff_t *ppos)
299{
300 struct ftrace_event_call *call = filp->private_data;
301 char *buf;
302
303 if (call->enabled || call->raw_enabled)
304 buf = "1\n";
305 else
306 buf = "0\n";
307
308 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
309}
310
311static ssize_t
312event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
313 loff_t *ppos)
314{
315 struct ftrace_event_call *call = filp->private_data;
316 char buf[64];
317 unsigned long val;
318 int ret;
319
320 if (cnt >= sizeof(buf))
321 return -EINVAL;
322
323 if (copy_from_user(&buf, ubuf, cnt))
324 return -EFAULT;
325
326 buf[cnt] = 0;
327
328 ret = strict_strtoul(buf, 10, &val);
329 if (ret < 0)
330 return ret;
331
332 switch (val) {
333 case 0:
334 case 1:
335 mutex_lock(&event_mutex);
336 ftrace_event_enable_disable(call, val);
337 mutex_unlock(&event_mutex);
338 break;
339
340 default:
341 return -EINVAL;
342 }
343
344 *ppos += cnt;
345
346 return cnt;
347}
348
349static ssize_t
350event_type_read(struct file *filp, char __user *ubuf, size_t cnt,
351 loff_t *ppos)
352{
353 struct ftrace_event_call *call = filp->private_data;
354 char buf[16];
355 int r = 0;
356
357 if (call->type & TRACE_EVENT_TYPE_PRINTF)
358 r += sprintf(buf, "printf\n");
359
360 if (call->type & TRACE_EVENT_TYPE_RAW)
361 r += sprintf(buf+r, "raw\n");
362
363 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
364}
365
366static ssize_t
367event_type_write(struct file *filp, const char __user *ubuf, size_t cnt,
368 loff_t *ppos)
369{
370 struct ftrace_event_call *call = filp->private_data;
371 char buf[64];
372
373 /*
374 * If there's only one type, we can't change it.
375 * And currently we always have printf type, and we
376 * may or may not have raw type.
377 *
378 * This is a redundant check, the file should be read
379 * only if this is the case anyway.
380 */
381
382 if (!call->raw_init)
383 return -EPERM;
384
385 if (cnt >= sizeof(buf))
386 return -EINVAL;
387
388 if (copy_from_user(&buf, ubuf, cnt))
389 return -EFAULT;
390
391 buf[cnt] = 0;
392
393 if (!strncmp(buf, "printf", 6) &&
394 (!buf[6] || isspace(buf[6]))) {
395
396 call->type = TRACE_EVENT_TYPE_PRINTF;
397
398 /*
399 * If raw enabled, the disable it and enable
400 * printf type.
401 */
402 if (call->raw_enabled) {
403 call->raw_enabled = 0;
404 call->raw_unreg();
405
406 call->enabled = 1;
407 call->regfunc();
408 }
409
410 } else if (!strncmp(buf, "raw", 3) &&
411 (!buf[3] || isspace(buf[3]))) {
412
413 call->type = TRACE_EVENT_TYPE_RAW;
414
415 /*
416 * If printf enabled, the disable it and enable
417 * raw type.
418 */
419 if (call->enabled) {
420 call->enabled = 0;
421 call->unregfunc();
422
423 call->raw_enabled = 1;
424 call->raw_reg();
425 }
426 } else
427 return -EINVAL;
428
429 *ppos += cnt;
430
431 return cnt;
432}
433
434static ssize_t
435event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt,
436 loff_t *ppos)
437{
438 struct ftrace_event_call *call = filp->private_data;
439 char buf[16];
440 int r = 0;
441
442 r += sprintf(buf, "printf\n");
443
444 if (call->raw_init)
445 r += sprintf(buf+r, "raw\n");
446
447 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
448}
449
450#undef FIELD
451#define FIELD(type, name) \
452 #type, #name, offsetof(typeof(field), name), sizeof(field.name)
453
454static int trace_write_header(struct trace_seq *s)
455{
456 struct trace_entry field;
457
458 /* struct trace_entry */
459 return trace_seq_printf(s,
460 "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n"
461 "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n"
462 "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n"
463 "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n"
464 "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n"
465 "\n",
466 FIELD(unsigned char, type),
467 FIELD(unsigned char, flags),
468 FIELD(unsigned char, preempt_count),
469 FIELD(int, pid),
470 FIELD(int, tgid));
471}
472static ssize_t
473event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
474 loff_t *ppos)
475{
476 struct ftrace_event_call *call = filp->private_data;
477 struct trace_seq *s;
478 char *buf;
479 int r;
480
481 s = kmalloc(sizeof(*s), GFP_KERNEL);
482 if (!s)
483 return -ENOMEM;
484
485 trace_seq_init(s);
486
487 if (*ppos)
488 return 0;
489
490 /* If any of the first writes fail, so will the show_format. */
491
492 trace_seq_printf(s, "name: %s\n", call->name);
493 trace_seq_printf(s, "ID: %d\n", call->id);
494 trace_seq_printf(s, "format:\n");
495 trace_write_header(s);
496
497 r = call->show_format(s);
498 if (!r) {
499 /*
500 * ug! The format output is bigger than a PAGE!!
501 */
502 buf = "FORMAT TOO BIG\n";
503 r = simple_read_from_buffer(ubuf, cnt, ppos,
504 buf, strlen(buf));
505 goto out;
506 }
507
508 r = simple_read_from_buffer(ubuf, cnt, ppos,
509 s->buffer, s->len);
510 out:
511 kfree(s);
512 return r;
513}
514
515static const struct seq_operations show_event_seq_ops = {
516 .start = t_start,
517 .next = t_next,
518 .show = t_show,
519 .stop = t_stop,
520};
521
522static const struct seq_operations show_set_event_seq_ops = {
523 .start = s_start,
524 .next = s_next,
525 .show = t_show,
526 .stop = t_stop,
527};
528
529static const struct file_operations ftrace_avail_fops = {
530 .open = ftrace_event_seq_open,
531 .read = seq_read,
532 .llseek = seq_lseek,
533 .release = seq_release,
534};
535
536static const struct file_operations ftrace_set_event_fops = {
537 .open = ftrace_event_seq_open,
538 .read = seq_read,
539 .write = ftrace_event_write,
540 .llseek = seq_lseek,
541 .release = seq_release,
542};
543
544static const struct file_operations ftrace_enable_fops = {
545 .open = tracing_open_generic,
546 .read = event_enable_read,
547 .write = event_enable_write,
548};
549
550static const struct file_operations ftrace_type_fops = {
551 .open = tracing_open_generic,
552 .read = event_type_read,
553 .write = event_type_write,
554};
555
556static const struct file_operations ftrace_available_types_fops = {
557 .open = tracing_open_generic,
558 .read = event_available_types_read,
559};
560
561static const struct file_operations ftrace_event_format_fops = {
562 .open = tracing_open_generic,
563 .read = event_format_read,
564};
565
566static struct dentry *event_trace_events_dir(void)
567{
568 static struct dentry *d_tracer;
569 static struct dentry *d_events;
570
571 if (d_events)
572 return d_events;
573
574 d_tracer = tracing_init_dentry();
575 if (!d_tracer)
576 return NULL;
577
578 d_events = debugfs_create_dir("events", d_tracer);
579 if (!d_events)
580 pr_warning("Could not create debugfs "
581 "'events' directory\n");
582
583 return d_events;
584}
585
586struct event_subsystem {
587 struct list_head list;
588 const char *name;
589 struct dentry *entry;
590};
591
592static LIST_HEAD(event_subsystems);
593
594static struct dentry *
595event_subsystem_dir(const char *name, struct dentry *d_events)
596{
597 struct event_subsystem *system;
598
599 /* First see if we did not already create this dir */
600 list_for_each_entry(system, &event_subsystems, list) {
601 if (strcmp(system->name, name) == 0)
602 return system->entry;
603 }
604
605 /* need to create new entry */
606 system = kmalloc(sizeof(*system), GFP_KERNEL);
607 if (!system) {
608 pr_warning("No memory to create event subsystem %s\n",
609 name);
610 return d_events;
611 }
612
613 system->entry = debugfs_create_dir(name, d_events);
614 if (!system->entry) {
615 pr_warning("Could not create event subsystem %s\n",
616 name);
617 kfree(system);
618 return d_events;
619 }
620
621 system->name = name;
622 list_add(&system->list, &event_subsystems);
623
624 return system->entry;
625}
626
627static int
628event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
629{
630 struct dentry *entry;
631 int ret;
632
633 /*
634 * If the trace point header did not define TRACE_SYSTEM
635 * then the system would be called "TRACE_SYSTEM".
636 */
637 if (strcmp(call->system, "TRACE_SYSTEM") != 0)
638 d_events = event_subsystem_dir(call->system, d_events);
639
640 if (call->raw_init) {
641 ret = call->raw_init();
642 if (ret < 0) {
643 pr_warning("Could not initialize trace point"
644 " events/%s\n", call->name);
645 return ret;
646 }
647 }
648
649 /* default the output to printf */
650 call->type = TRACE_EVENT_TYPE_PRINTF;
651
652 call->dir = debugfs_create_dir(call->name, d_events);
653 if (!call->dir) {
654 pr_warning("Could not create debugfs "
655 "'%s' directory\n", call->name);
656 return -1;
657 }
658
659 entry = debugfs_create_file("enable", 0644, call->dir, call,
660 &ftrace_enable_fops);
661 if (!entry)
662 pr_warning("Could not create debugfs "
663 "'%s/enable' entry\n", call->name);
664
665 /* Only let type be writable, if we can change it */
666 entry = debugfs_create_file("type",
667 call->raw_init ? 0644 : 0444,
668 call->dir, call,
669 &ftrace_type_fops);
670 if (!entry)
671 pr_warning("Could not create debugfs "
672 "'%s/type' entry\n", call->name);
673
674 entry = debugfs_create_file("available_types", 0444, call->dir, call,
675 &ftrace_available_types_fops);
676 if (!entry)
677 pr_warning("Could not create debugfs "
678 "'%s/available_types' entry\n", call->name);
679
680 /* A trace may not want to export its format */
681 if (!call->show_format)
682 return 0;
683
684 entry = debugfs_create_file("format", 0444, call->dir, call,
685 &ftrace_event_format_fops);
686 if (!entry)
687 pr_warning("Could not create debugfs "
688 "'%s/format' entry\n", call->name);
689
690 return 0;
691}
692
693static __init int event_trace_init(void)
694{
695 struct ftrace_event_call *call = __start_ftrace_events;
696 struct dentry *d_tracer;
697 struct dentry *entry;
698 struct dentry *d_events;
699
700 d_tracer = tracing_init_dentry();
701 if (!d_tracer)
702 return 0;
703
704 entry = debugfs_create_file("available_events", 0444, d_tracer,
705 (void *)&show_event_seq_ops,
706 &ftrace_avail_fops);
707 if (!entry)
708 pr_warning("Could not create debugfs "
709 "'available_events' entry\n");
710
711 entry = debugfs_create_file("set_event", 0644, d_tracer,
712 (void *)&show_set_event_seq_ops,
713 &ftrace_set_event_fops);
714 if (!entry)
715 pr_warning("Could not create debugfs "
716 "'set_event' entry\n");
717
718 d_events = event_trace_events_dir();
719 if (!d_events)
720 return 0;
721
722 events_for_each(call) {
723 /* The linker may leave blanks */
724 if (!call->name)
725 continue;
726 event_create_dir(call, d_events);
727 }
728
729 return 0;
730}
731fs_initcall(event_trace_init);
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
new file mode 100644
index 000000000000..3830a731424c
--- /dev/null
+++ b/kernel/trace/trace_events_stage_1.h
@@ -0,0 +1,36 @@
1/*
2 * Stage 1 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * struct ftrace_raw_<call> {
7 * struct trace_entry ent;
8 * <type> <item>;
9 * [...]
10 * };
11 *
12 * The <type> <item> is created by the TRACE_FIELD(type, item, assign)
13 * macro. We simply do "type item;", and that will create the fields
14 * in the structure.
15 */
16
17#undef TRACE_FORMAT
18#define TRACE_FORMAT(call, proto, args, fmt)
19
20#undef TRACE_EVENT_FORMAT
21#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \
22 struct ftrace_raw_##name { \
23 struct trace_entry ent; \
24 tstruct \
25 }; \
26 static struct ftrace_event_call event_##name
27
28#undef TRACE_STRUCT
29#define TRACE_STRUCT(args...) args
30
31#define TRACE_FIELD(type, item, assign) \
32 type item;
33#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
34 type_item;
35
36#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
new file mode 100644
index 000000000000..b1cebba1d9b4
--- /dev/null
+++ b/kernel/trace/trace_events_stage_2.h
@@ -0,0 +1,130 @@
1/*
2 * Stage 2 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * enum print_line_t
7 * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
8 * {
9 * struct trace_seq *s = &iter->seq;
10 * struct ftrace_raw_<call> *field; <-- defined in stage 1
11 * struct trace_entry *entry;
12 * int ret;
13 *
14 * entry = iter->ent;
15 *
16 * if (entry->type != event_<call>.id) {
17 * WARN_ON_ONCE(1);
18 * return TRACE_TYPE_UNHANDLED;
19 * }
20 *
21 * field = (typeof(field))entry;
22 *
23 * ret = trace_seq_printf(s, <TPRAWFMT> "%s", <ARGS> "\n");
24 * if (!ret)
25 * return TRACE_TYPE_PARTIAL_LINE;
26 *
27 * return TRACE_TYPE_HANDLED;
28 * }
29 *
30 * This is the method used to print the raw event to the trace
31 * output format. Note, this is not needed if the data is read
32 * in binary.
33 */
34
35#undef TRACE_STRUCT
36#define TRACE_STRUCT(args...) args
37
38#undef TRACE_FIELD
39#define TRACE_FIELD(type, item, assign) \
40 field->item,
41
42#undef TRACE_FIELD_SPECIAL
43#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
44 field->item,
45
46
47#undef TPRAWFMT
48#define TPRAWFMT(args...) args
49
50#undef TRACE_EVENT_FORMAT
51#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
52enum print_line_t \
53ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
54{ \
55 struct trace_seq *s = &iter->seq; \
56 struct ftrace_raw_##call *field; \
57 struct trace_entry *entry; \
58 int ret; \
59 \
60 entry = iter->ent; \
61 \
62 if (entry->type != event_##call.id) { \
63 WARN_ON_ONCE(1); \
64 return TRACE_TYPE_UNHANDLED; \
65 } \
66 \
67 field = (typeof(field))entry; \
68 \
69 ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \
70 if (!ret) \
71 return TRACE_TYPE_PARTIAL_LINE; \
72 \
73 return TRACE_TYPE_HANDLED; \
74}
75
76#include <trace/trace_event_types.h>
77
78/*
79 * Setup the showing format of trace point.
80 *
81 * int
82 * ftrace_format_##call(struct trace_seq *s)
83 * {
84 * struct ftrace_raw_##call field;
85 * int ret;
86 *
87 * ret = trace_seq_printf(s, #type " " #item ";"
88 * " size:%d; offset:%d;\n",
89 * sizeof(field.type),
90 * offsetof(struct ftrace_raw_##call,
91 * item));
92 *
93 * }
94 */
95
96#undef TRACE_FIELD
97#define TRACE_FIELD(type, item, assign) \
98 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
99 "offset:%lu;\tsize:%lu;\n", \
100 offsetof(typeof(field), item), \
101 sizeof(field.item)); \
102 if (!ret) \
103 return 0;
104
105
106#undef TRACE_FIELD_SPECIAL
107#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
108 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \
109 "offset:%lu;\tsize:%lu;\n", \
110 offsetof(typeof(field), item), \
111 sizeof(field.item)); \
112 if (!ret) \
113 return 0;
114
115#undef TRACE_EVENT_FORMAT
116#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
117int \
118ftrace_format_##call(struct trace_seq *s) \
119{ \
120 struct ftrace_raw_##call field; \
121 int ret; \
122 \
123 tstruct; \
124 \
125 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
126 \
127 return ret; \
128}
129
130#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
new file mode 100644
index 000000000000..041789ffbac1
--- /dev/null
+++ b/kernel/trace/trace_events_stage_3.h
@@ -0,0 +1,235 @@
1/*
2 * Stage 3 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * static void ftrace_event_<call>(proto)
7 * {
8 * event_trace_printk(_RET_IP_, "(<call>) " <fmt>);
9 * }
10 *
11 * static int ftrace_reg_event_<call>(void)
12 * {
13 * int ret;
14 *
15 * ret = register_trace_<call>(ftrace_event_<call>);
16 * if (!ret)
17 * pr_info("event trace: Could not activate trace point "
18 * "probe to <call>");
19 * return ret;
20 * }
21 *
22 * static void ftrace_unreg_event_<call>(void)
23 * {
24 * unregister_trace_<call>(ftrace_event_<call>);
25 * }
26 *
27 * For those macros defined with TRACE_FORMAT:
28 *
29 * static struct ftrace_event_call __used
30 * __attribute__((__aligned__(4)))
31 * __attribute__((section("_ftrace_events"))) event_<call> = {
32 * .name = "<call>",
33 * .regfunc = ftrace_reg_event_<call>,
34 * .unregfunc = ftrace_unreg_event_<call>,
35 * }
36 *
37 *
38 * For those macros defined with TRACE_EVENT_FORMAT:
39 *
40 * static struct ftrace_event_call event_<call>;
41 *
42 * static void ftrace_raw_event_<call>(proto)
43 * {
44 * struct ring_buffer_event *event;
45 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
46 * unsigned long irq_flags;
47 * int pc;
48 *
49 * local_save_flags(irq_flags);
50 * pc = preempt_count();
51 *
52 * event = trace_current_buffer_lock_reserve(event_<call>.id,
53 * sizeof(struct ftrace_raw_<call>),
54 * irq_flags, pc);
55 * if (!event)
56 * return;
57 * entry = ring_buffer_event_data(event);
58 *
59 * <tstruct>; <-- Here we assign the entries by the TRACE_FIELD.
60 *
61 * trace_current_buffer_unlock_commit(event, irq_flags, pc);
62 * }
63 *
64 * static int ftrace_raw_reg_event_<call>(void)
65 * {
66 * int ret;
67 *
68 * ret = register_trace_<call>(ftrace_raw_event_<call>);
69 * if (!ret)
70 * pr_info("event trace: Could not activate trace point "
71 * "probe to <call>");
72 * return ret;
73 * }
74 *
75 * static void ftrace_unreg_event_<call>(void)
76 * {
77 * unregister_trace_<call>(ftrace_raw_event_<call>);
78 * }
79 *
80 * static struct trace_event ftrace_event_type_<call> = {
81 * .trace = ftrace_raw_output_<call>, <-- stage 2
82 * };
83 *
84 * static int ftrace_raw_init_event_<call>(void)
85 * {
86 * int id;
87 *
88 * id = register_ftrace_event(&ftrace_event_type_<call>);
89 * if (!id)
90 * return -ENODEV;
91 * event_<call>.id = id;
92 * return 0;
93 * }
94 *
95 * static struct ftrace_event_call __used
96 * __attribute__((__aligned__(4)))
97 * __attribute__((section("_ftrace_events"))) event_<call> = {
98 * .name = "<call>",
99 * .regfunc = ftrace_reg_event_<call>,
100 * .unregfunc = ftrace_unreg_event_<call>,
101 * .raw_init = ftrace_raw_init_event_<call>,
102 * .raw_reg = ftrace_raw_reg_event_<call>,
103 * .raw_unreg = ftrace_raw_unreg_event_<call>,
104 * .show_format = ftrace_format_<call>,
105 * }
106 *
107 */
108
109#undef TPFMT
110#define TPFMT(fmt, args...) fmt "\n", ##args
111
112#define _TRACE_FORMAT(call, proto, args, fmt) \
113static void ftrace_event_##call(proto) \
114{ \
115 event_trace_printk(_RET_IP_, "(" #call ") " fmt); \
116} \
117 \
118static int ftrace_reg_event_##call(void) \
119{ \
120 int ret; \
121 \
122 ret = register_trace_##call(ftrace_event_##call); \
123 if (ret) \
124 pr_info("event trace: Could not activate trace point " \
125 "probe to " #call "\n"); \
126 return ret; \
127} \
128 \
129static void ftrace_unreg_event_##call(void) \
130{ \
131 unregister_trace_##call(ftrace_event_##call); \
132} \
133
134
135#undef TRACE_FORMAT
136#define TRACE_FORMAT(call, proto, args, fmt) \
137_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
138static struct ftrace_event_call __used \
139__attribute__((__aligned__(4))) \
140__attribute__((section("_ftrace_events"))) event_##call = { \
141 .name = #call, \
142 .system = STR(TRACE_SYSTEM), \
143 .regfunc = ftrace_reg_event_##call, \
144 .unregfunc = ftrace_unreg_event_##call, \
145}
146
147#undef TRACE_FIELD
148#define TRACE_FIELD(type, item, assign)\
149 entry->item = assign;
150
151#undef TRACE_FIELD
152#define TRACE_FIELD(type, item, assign)\
153 entry->item = assign;
154
155#undef TPCMD
156#define TPCMD(cmd...) cmd
157
158#undef TRACE_ENTRY
159#define TRACE_ENTRY entry
160
161#undef TRACE_FIELD_SPECIAL
162#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
163 cmd;
164
165#undef TRACE_EVENT_FORMAT
166#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
167_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
168 \
169static struct ftrace_event_call event_##call; \
170 \
171static void ftrace_raw_event_##call(proto) \
172{ \
173 struct ring_buffer_event *event; \
174 struct ftrace_raw_##call *entry; \
175 unsigned long irq_flags; \
176 int pc; \
177 \
178 local_save_flags(irq_flags); \
179 pc = preempt_count(); \
180 \
181 event = trace_current_buffer_lock_reserve(event_##call.id, \
182 sizeof(struct ftrace_raw_##call), \
183 irq_flags, pc); \
184 if (!event) \
185 return; \
186 entry = ring_buffer_event_data(event); \
187 \
188 tstruct; \
189 \
190 trace_current_buffer_unlock_commit(event, irq_flags, pc); \
191} \
192 \
193static int ftrace_raw_reg_event_##call(void) \
194{ \
195 int ret; \
196 \
197 ret = register_trace_##call(ftrace_raw_event_##call); \
198 if (ret) \
199 pr_info("event trace: Could not activate trace point " \
200 "probe to " #call "\n"); \
201 return ret; \
202} \
203 \
204static void ftrace_raw_unreg_event_##call(void) \
205{ \
206 unregister_trace_##call(ftrace_raw_event_##call); \
207} \
208 \
209static struct trace_event ftrace_event_type_##call = { \
210 .trace = ftrace_raw_output_##call, \
211}; \
212 \
213static int ftrace_raw_init_event_##call(void) \
214{ \
215 int id; \
216 \
217 id = register_ftrace_event(&ftrace_event_type_##call); \
218 if (!id) \
219 return -ENODEV; \
220 event_##call.id = id; \
221 return 0; \
222} \
223 \
224static struct ftrace_event_call __used \
225__attribute__((__aligned__(4))) \
226__attribute__((section("_ftrace_events"))) event_##call = { \
227 .name = #call, \
228 .system = STR(TRACE_SYSTEM), \
229 .regfunc = ftrace_reg_event_##call, \
230 .unregfunc = ftrace_unreg_event_##call, \
231 .raw_init = ftrace_raw_init_event_##call, \
232 .raw_reg = ftrace_raw_reg_event_##call, \
233 .raw_unreg = ftrace_raw_unreg_event_##call, \
234 .show_format = ftrace_format_##call, \
235}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9236d7e25a16..c9a0b7df44ff 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -9,6 +9,7 @@
9 * Copyright (C) 2004-2006 Ingo Molnar 9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III 10 * Copyright (C) 2004 William Lee Irwin III
11 */ 11 */
12#include <linux/ring_buffer.h>
12#include <linux/debugfs.h> 13#include <linux/debugfs.h>
13#include <linux/uaccess.h> 14#include <linux/uaccess.h>
14#include <linux/ftrace.h> 15#include <linux/ftrace.h>
@@ -16,52 +17,388 @@
16 17
17#include "trace.h" 18#include "trace.h"
18 19
19static void start_function_trace(struct trace_array *tr) 20/* function tracing enabled */
21static int ftrace_function_enabled;
22
23static struct trace_array *func_trace;
24
25static void tracing_start_function_trace(void);
26static void tracing_stop_function_trace(void);
27
28static int function_trace_init(struct trace_array *tr)
20{ 29{
30 func_trace = tr;
21 tr->cpu = get_cpu(); 31 tr->cpu = get_cpu();
22 tracing_reset_online_cpus(tr);
23 put_cpu(); 32 put_cpu();
24 33
25 tracing_start_cmdline_record(); 34 tracing_start_cmdline_record();
26 tracing_start_function_trace(); 35 tracing_start_function_trace();
36 return 0;
27} 37}
28 38
29static void stop_function_trace(struct trace_array *tr) 39static void function_trace_reset(struct trace_array *tr)
30{ 40{
31 tracing_stop_function_trace(); 41 tracing_stop_function_trace();
32 tracing_stop_cmdline_record(); 42 tracing_stop_cmdline_record();
33} 43}
34 44
35static int function_trace_init(struct trace_array *tr) 45static void function_trace_start(struct trace_array *tr)
36{ 46{
37 start_function_trace(tr); 47 tracing_reset_online_cpus(tr);
38 return 0;
39} 48}
40 49
41static void function_trace_reset(struct trace_array *tr) 50static void
51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
52{
53 struct trace_array *tr = func_trace;
54 struct trace_array_cpu *data;
55 unsigned long flags;
56 long disabled;
57 int cpu, resched;
58 int pc;
59
60 if (unlikely(!ftrace_function_enabled))
61 return;
62
63 pc = preempt_count();
64 resched = ftrace_preempt_disable();
65 local_save_flags(flags);
66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu];
68 disabled = atomic_inc_return(&data->disabled);
69
70 if (likely(disabled == 1))
71 trace_function(tr, ip, parent_ip, flags, pc);
72
73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched);
75}
76
77static void
78function_trace_call(unsigned long ip, unsigned long parent_ip)
42{ 79{
43 stop_function_trace(tr); 80 struct trace_array *tr = func_trace;
81 struct trace_array_cpu *data;
82 unsigned long flags;
83 long disabled;
84 int cpu;
85 int pc;
86
87 if (unlikely(!ftrace_function_enabled))
88 return;
89
90 /*
91 * Need to use raw, since this must be called before the
92 * recursive protection is performed.
93 */
94 local_irq_save(flags);
95 cpu = raw_smp_processor_id();
96 data = tr->data[cpu];
97 disabled = atomic_inc_return(&data->disabled);
98
99 if (likely(disabled == 1)) {
100 pc = preempt_count();
101 trace_function(tr, ip, parent_ip, flags, pc);
102 }
103
104 atomic_dec(&data->disabled);
105 local_irq_restore(flags);
44} 106}
45 107
46static void function_trace_start(struct trace_array *tr) 108static void
109function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
47{ 110{
48 tracing_reset_online_cpus(tr); 111 struct trace_array *tr = func_trace;
112 struct trace_array_cpu *data;
113 unsigned long flags;
114 long disabled;
115 int cpu;
116 int pc;
117
118 if (unlikely(!ftrace_function_enabled))
119 return;
120
121 /*
122 * Need to use raw, since this must be called before the
123 * recursive protection is performed.
124 */
125 local_irq_save(flags);
126 cpu = raw_smp_processor_id();
127 data = tr->data[cpu];
128 disabled = atomic_inc_return(&data->disabled);
129
130 if (likely(disabled == 1)) {
131 pc = preempt_count();
132 trace_function(tr, ip, parent_ip, flags, pc);
133 /*
134 * skip over 5 funcs:
135 * __ftrace_trace_stack,
136 * __trace_stack,
137 * function_stack_trace_call
138 * ftrace_list_func
139 * ftrace_call
140 */
141 __trace_stack(tr, flags, 5, pc);
142 }
143
144 atomic_dec(&data->disabled);
145 local_irq_restore(flags);
146}
147
148
149static struct ftrace_ops trace_ops __read_mostly =
150{
151 .func = function_trace_call,
152};
153
154static struct ftrace_ops trace_stack_ops __read_mostly =
155{
156 .func = function_stack_trace_call,
157};
158
159/* Our two options */
160enum {
161 TRACE_FUNC_OPT_STACK = 0x1,
162};
163
164static struct tracer_opt func_opts[] = {
165#ifdef CONFIG_STACKTRACE
166 { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
167#endif
168 { } /* Always set a last empty entry */
169};
170
171static struct tracer_flags func_flags = {
172 .val = 0, /* By default: all flags disabled */
173 .opts = func_opts
174};
175
176static void tracing_start_function_trace(void)
177{
178 ftrace_function_enabled = 0;
179
180 if (trace_flags & TRACE_ITER_PREEMPTONLY)
181 trace_ops.func = function_trace_call_preempt_only;
182 else
183 trace_ops.func = function_trace_call;
184
185 if (func_flags.val & TRACE_FUNC_OPT_STACK)
186 register_ftrace_function(&trace_stack_ops);
187 else
188 register_ftrace_function(&trace_ops);
189
190 ftrace_function_enabled = 1;
191}
192
193static void tracing_stop_function_trace(void)
194{
195 ftrace_function_enabled = 0;
196 /* OK if they are not registered */
197 unregister_ftrace_function(&trace_stack_ops);
198 unregister_ftrace_function(&trace_ops);
199}
200
201static int func_set_flag(u32 old_flags, u32 bit, int set)
202{
203 if (bit == TRACE_FUNC_OPT_STACK) {
204 /* do nothing if already set */
205 if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
206 return 0;
207
208 if (set) {
209 unregister_ftrace_function(&trace_ops);
210 register_ftrace_function(&trace_stack_ops);
211 } else {
212 unregister_ftrace_function(&trace_stack_ops);
213 register_ftrace_function(&trace_ops);
214 }
215
216 return 0;
217 }
218
219 return -EINVAL;
49} 220}
50 221
51static struct tracer function_trace __read_mostly = 222static struct tracer function_trace __read_mostly =
52{ 223{
53 .name = "function", 224 .name = "function",
54 .init = function_trace_init, 225 .init = function_trace_init,
55 .reset = function_trace_reset, 226 .reset = function_trace_reset,
56 .start = function_trace_start, 227 .start = function_trace_start,
228 .wait_pipe = poll_wait_pipe,
229 .flags = &func_flags,
230 .set_flag = func_set_flag,
57#ifdef CONFIG_FTRACE_SELFTEST 231#ifdef CONFIG_FTRACE_SELFTEST
58 .selftest = trace_selftest_startup_function, 232 .selftest = trace_selftest_startup_function,
59#endif 233#endif
60}; 234};
61 235
236#ifdef CONFIG_DYNAMIC_FTRACE
237static void
238ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
239{
240 long *count = (long *)data;
241
242 if (tracing_is_on())
243 return;
244
245 if (!*count)
246 return;
247
248 if (*count != -1)
249 (*count)--;
250
251 tracing_on();
252}
253
254static void
255ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
256{
257 long *count = (long *)data;
258
259 if (!tracing_is_on())
260 return;
261
262 if (!*count)
263 return;
264
265 if (*count != -1)
266 (*count)--;
267
268 tracing_off();
269}
270
271static int
272ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
273 struct ftrace_probe_ops *ops, void *data);
274
275static struct ftrace_probe_ops traceon_probe_ops = {
276 .func = ftrace_traceon,
277 .print = ftrace_trace_onoff_print,
278};
279
280static struct ftrace_probe_ops traceoff_probe_ops = {
281 .func = ftrace_traceoff,
282 .print = ftrace_trace_onoff_print,
283};
284
285static int
286ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
287 struct ftrace_probe_ops *ops, void *data)
288{
289 char str[KSYM_SYMBOL_LEN];
290 long count = (long)data;
291
292 kallsyms_lookup(ip, NULL, NULL, NULL, str);
293 seq_printf(m, "%s:", str);
294
295 if (ops == &traceon_probe_ops)
296 seq_printf(m, "traceon");
297 else
298 seq_printf(m, "traceoff");
299
300 if (count == -1)
301 seq_printf(m, ":unlimited\n");
302 else
303 seq_printf(m, ":count=%ld", count);
304 seq_putc(m, '\n');
305
306 return 0;
307}
308
309static int
310ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
311{
312 struct ftrace_probe_ops *ops;
313
314 /* we register both traceon and traceoff to this callback */
315 if (strcmp(cmd, "traceon") == 0)
316 ops = &traceon_probe_ops;
317 else
318 ops = &traceoff_probe_ops;
319
320 unregister_ftrace_function_probe_func(glob, ops);
321
322 return 0;
323}
324
325static int
326ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
327{
328 struct ftrace_probe_ops *ops;
329 void *count = (void *)-1;
330 char *number;
331 int ret;
332
333 /* hash funcs only work with set_ftrace_filter */
334 if (!enable)
335 return -EINVAL;
336
337 if (glob[0] == '!')
338 return ftrace_trace_onoff_unreg(glob+1, cmd, param);
339
340 /* we register both traceon and traceoff to this callback */
341 if (strcmp(cmd, "traceon") == 0)
342 ops = &traceon_probe_ops;
343 else
344 ops = &traceoff_probe_ops;
345
346 if (!param)
347 goto out_reg;
348
349 number = strsep(&param, ":");
350
351 if (!strlen(number))
352 goto out_reg;
353
354 /*
355 * We use the callback data field (which is a pointer)
356 * as our counter.
357 */
358 ret = strict_strtoul(number, 0, (unsigned long *)&count);
359 if (ret)
360 return ret;
361
362 out_reg:
363 ret = register_ftrace_function_probe(glob, ops, count);
364
365 return ret;
366}
367
368static struct ftrace_func_command ftrace_traceon_cmd = {
369 .name = "traceon",
370 .func = ftrace_trace_onoff_callback,
371};
372
373static struct ftrace_func_command ftrace_traceoff_cmd = {
374 .name = "traceoff",
375 .func = ftrace_trace_onoff_callback,
376};
377
378static int __init init_func_cmd_traceon(void)
379{
380 int ret;
381
382 ret = register_ftrace_command(&ftrace_traceoff_cmd);
383 if (ret)
384 return ret;
385
386 ret = register_ftrace_command(&ftrace_traceon_cmd);
387 if (ret)
388 unregister_ftrace_command(&ftrace_traceoff_cmd);
389 return ret;
390}
391#else
392static inline int init_func_cmd_traceon(void)
393{
394 return 0;
395}
396#endif /* CONFIG_DYNAMIC_FTRACE */
397
62static __init int init_function_trace(void) 398static __init int init_function_trace(void)
63{ 399{
400 init_func_cmd_traceon();
64 return register_tracer(&function_trace); 401 return register_tracer(&function_trace);
65} 402}
66
67device_initcall(init_function_trace); 403device_initcall(init_function_trace);
404
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 930c08e5b38e..c009553a8e81 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * 2 *
3 * Function graph tracer. 3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com> 4 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which 5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com> 6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 * 7 *
@@ -12,6 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16#define TRACE_GRAPH_INDENT 2 17#define TRACE_GRAPH_INDENT 2
17 18
@@ -20,9 +21,11 @@
20#define TRACE_GRAPH_PRINT_CPU 0x2 21#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 22#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22#define TRACE_GRAPH_PRINT_PROC 0x8 23#define TRACE_GRAPH_PRINT_PROC 0x8
24#define TRACE_GRAPH_PRINT_DURATION 0x10
25#define TRACE_GRAPH_PRINT_ABS_TIME 0X20
23 26
24static struct tracer_opt trace_opts[] = { 27static struct tracer_opt trace_opts[] = {
25 /* Display overruns ? */ 28 /* Display overruns? (for self-debug purpose) */
26 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 29 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
27 /* Display CPU ? */ 30 /* Display CPU ? */
28 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) }, 31 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
@@ -30,26 +33,101 @@ static struct tracer_opt trace_opts[] = {
30 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) }, 33 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
31 /* Display proc name/pid */ 34 /* Display proc name/pid */
32 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) }, 35 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
36 /* Display duration of execution */
37 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
38 /* Display absolute time of an entry */
39 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
33 { } /* Empty entry */ 40 { } /* Empty entry */
34}; 41};
35 42
36static struct tracer_flags tracer_flags = { 43static struct tracer_flags tracer_flags = {
37 /* Don't display overruns and proc by default */ 44 /* Don't display overruns and proc by default */
38 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD, 45 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
46 TRACE_GRAPH_PRINT_DURATION,
39 .opts = trace_opts 47 .opts = trace_opts
40}; 48};
41 49
42/* pid on the last trace processed */ 50/* pid on the last trace processed */
43static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
44 51
45static int graph_trace_init(struct trace_array *tr) 52
53/* Add a function return address to the trace stack on thread info.*/
54int
55ftrace_push_return_trace(unsigned long ret, unsigned long long time,
56 unsigned long func, int *depth)
57{
58 int index;
59
60 if (!current->ret_stack)
61 return -EBUSY;
62
63 /* The return trace stack is full */
64 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
65 atomic_inc(&current->trace_overrun);
66 return -EBUSY;
67 }
68
69 index = ++current->curr_ret_stack;
70 barrier();
71 current->ret_stack[index].ret = ret;
72 current->ret_stack[index].func = func;
73 current->ret_stack[index].calltime = time;
74 *depth = index;
75
76 return 0;
77}
78
79/* Retrieve a function return address to the trace stack on thread info.*/
80void
81ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
46{ 82{
47 int cpu, ret; 83 int index;
84
85 index = current->curr_ret_stack;
48 86
49 for_each_online_cpu(cpu) 87 if (unlikely(index < 0)) {
50 tracing_reset(tr, cpu); 88 ftrace_graph_stop();
89 WARN_ON(1);
90 /* Might as well panic, otherwise we have no where to go */
91 *ret = (unsigned long)panic;
92 return;
93 }
94
95 *ret = current->ret_stack[index].ret;
96 trace->func = current->ret_stack[index].func;
97 trace->calltime = current->ret_stack[index].calltime;
98 trace->overrun = atomic_read(&current->trace_overrun);
99 trace->depth = index;
100 barrier();
101 current->curr_ret_stack--;
102
103}
104
105/*
106 * Send the trace to the ring-buffer.
107 * @return the original return address.
108 */
109unsigned long ftrace_return_to_handler(void)
110{
111 struct ftrace_graph_ret trace;
112 unsigned long ret;
113
114 ftrace_pop_return_trace(&trace, &ret);
115 trace.rettime = cpu_clock(raw_smp_processor_id());
116 ftrace_graph_return(&trace);
117
118 if (unlikely(!ret)) {
119 ftrace_graph_stop();
120 WARN_ON(1);
121 /* Might as well panic. What else to do? */
122 ret = (unsigned long)panic;
123 }
124
125 return ret;
126}
51 127
52 ret = register_ftrace_graph(&trace_graph_return, 128static int graph_trace_init(struct trace_array *tr)
129{
130 int ret = register_ftrace_graph(&trace_graph_return,
53 &trace_graph_entry); 131 &trace_graph_entry);
54 if (ret) 132 if (ret)
55 return ret; 133 return ret;
@@ -153,17 +231,25 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
153 231
154/* If the pid changed since the last trace, output this event */ 232/* If the pid changed since the last trace, output this event */
155static enum print_line_t 233static enum print_line_t
156verif_pid(struct trace_seq *s, pid_t pid, int cpu) 234verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
157{ 235{
158 pid_t prev_pid; 236 pid_t prev_pid;
237 pid_t *last_pid;
159 int ret; 238 int ret;
160 239
161 if (last_pid[cpu] != -1 && last_pid[cpu] == pid) 240 if (!last_pids_cpu)
241 return TRACE_TYPE_HANDLED;
242
243 last_pid = per_cpu_ptr(last_pids_cpu, cpu);
244
245 if (*last_pid == pid)
162 return TRACE_TYPE_HANDLED; 246 return TRACE_TYPE_HANDLED;
163 247
164 prev_pid = last_pid[cpu]; 248 prev_pid = *last_pid;
165 last_pid[cpu] = pid; 249 *last_pid = pid;
166 250
251 if (prev_pid == -1)
252 return TRACE_TYPE_HANDLED;
167/* 253/*
168 * Context-switch trace line: 254 * Context-switch trace line:
169 255
@@ -175,34 +261,34 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu)
175 ret = trace_seq_printf(s, 261 ret = trace_seq_printf(s,
176 " ------------------------------------------\n"); 262 " ------------------------------------------\n");
177 if (!ret) 263 if (!ret)
178 TRACE_TYPE_PARTIAL_LINE; 264 return TRACE_TYPE_PARTIAL_LINE;
179 265
180 ret = print_graph_cpu(s, cpu); 266 ret = print_graph_cpu(s, cpu);
181 if (ret == TRACE_TYPE_PARTIAL_LINE) 267 if (ret == TRACE_TYPE_PARTIAL_LINE)
182 TRACE_TYPE_PARTIAL_LINE; 268 return TRACE_TYPE_PARTIAL_LINE;
183 269
184 ret = print_graph_proc(s, prev_pid); 270 ret = print_graph_proc(s, prev_pid);
185 if (ret == TRACE_TYPE_PARTIAL_LINE) 271 if (ret == TRACE_TYPE_PARTIAL_LINE)
186 TRACE_TYPE_PARTIAL_LINE; 272 return TRACE_TYPE_PARTIAL_LINE;
187 273
188 ret = trace_seq_printf(s, " => "); 274 ret = trace_seq_printf(s, " => ");
189 if (!ret) 275 if (!ret)
190 TRACE_TYPE_PARTIAL_LINE; 276 return TRACE_TYPE_PARTIAL_LINE;
191 277
192 ret = print_graph_proc(s, pid); 278 ret = print_graph_proc(s, pid);
193 if (ret == TRACE_TYPE_PARTIAL_LINE) 279 if (ret == TRACE_TYPE_PARTIAL_LINE)
194 TRACE_TYPE_PARTIAL_LINE; 280 return TRACE_TYPE_PARTIAL_LINE;
195 281
196 ret = trace_seq_printf(s, 282 ret = trace_seq_printf(s,
197 "\n ------------------------------------------\n\n"); 283 "\n ------------------------------------------\n\n");
198 if (!ret) 284 if (!ret)
199 TRACE_TYPE_PARTIAL_LINE; 285 return TRACE_TYPE_PARTIAL_LINE;
200 286
201 return ret; 287 return TRACE_TYPE_HANDLED;
202} 288}
203 289
204static bool 290static struct ftrace_graph_ret_entry *
205trace_branch_is_leaf(struct trace_iterator *iter, 291get_return_for_leaf(struct trace_iterator *iter,
206 struct ftrace_graph_ent_entry *curr) 292 struct ftrace_graph_ent_entry *curr)
207{ 293{
208 struct ring_buffer_iter *ring_iter; 294 struct ring_buffer_iter *ring_iter;
@@ -211,65 +297,123 @@ trace_branch_is_leaf(struct trace_iterator *iter,
211 297
212 ring_iter = iter->buffer_iter[iter->cpu]; 298 ring_iter = iter->buffer_iter[iter->cpu];
213 299
214 if (!ring_iter) 300 /* First peek to compare current entry and the next one */
215 return false; 301 if (ring_iter)
216 302 event = ring_buffer_iter_peek(ring_iter, NULL);
217 event = ring_buffer_iter_peek(ring_iter, NULL); 303 else {
304 /* We need to consume the current entry to see the next one */
305 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
306 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
307 NULL);
308 }
218 309
219 if (!event) 310 if (!event)
220 return false; 311 return NULL;
221 312
222 next = ring_buffer_event_data(event); 313 next = ring_buffer_event_data(event);
223 314
224 if (next->ent.type != TRACE_GRAPH_RET) 315 if (next->ent.type != TRACE_GRAPH_RET)
225 return false; 316 return NULL;
226 317
227 if (curr->ent.pid != next->ent.pid || 318 if (curr->ent.pid != next->ent.pid ||
228 curr->graph_ent.func != next->ret.func) 319 curr->graph_ent.func != next->ret.func)
229 return false; 320 return NULL;
321
322 /* this is a leaf, now advance the iterator */
323 if (ring_iter)
324 ring_buffer_read(ring_iter, NULL);
325
326 return next;
327}
328
329/* Signal a overhead of time execution to the output */
330static int
331print_graph_overhead(unsigned long long duration, struct trace_seq *s)
332{
333 /* If duration disappear, we don't need anything */
334 if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
335 return 1;
336
337 /* Non nested entry or return */
338 if (duration == -1)
339 return trace_seq_printf(s, " ");
340
341 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
342 /* Duration exceeded 100 msecs */
343 if (duration > 100000ULL)
344 return trace_seq_printf(s, "! ");
345
346 /* Duration exceeded 10 msecs */
347 if (duration > 10000ULL)
348 return trace_seq_printf(s, "+ ");
349 }
230 350
231 return true; 351 return trace_seq_printf(s, " ");
352}
353
354static int print_graph_abs_time(u64 t, struct trace_seq *s)
355{
356 unsigned long usecs_rem;
357
358 usecs_rem = do_div(t, NSEC_PER_SEC);
359 usecs_rem /= 1000;
360
361 return trace_seq_printf(s, "%5lu.%06lu | ",
362 (unsigned long)t, usecs_rem);
232} 363}
233 364
234static enum print_line_t 365static enum print_line_t
235print_graph_irq(struct trace_seq *s, unsigned long addr, 366print_graph_irq(struct trace_iterator *iter, unsigned long addr,
236 enum trace_type type, int cpu, pid_t pid) 367 enum trace_type type, int cpu, pid_t pid)
237{ 368{
238 int ret; 369 int ret;
370 struct trace_seq *s = &iter->seq;
239 371
240 if (addr < (unsigned long)__irqentry_text_start || 372 if (addr < (unsigned long)__irqentry_text_start ||
241 addr >= (unsigned long)__irqentry_text_end) 373 addr >= (unsigned long)__irqentry_text_end)
242 return TRACE_TYPE_UNHANDLED; 374 return TRACE_TYPE_UNHANDLED;
243 375
244 if (type == TRACE_GRAPH_ENT) { 376 /* Absolute time */
245 ret = trace_seq_printf(s, "==========> | "); 377 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
246 } else { 378 ret = print_graph_abs_time(iter->ts, s);
247 /* Cpu */ 379 if (!ret)
248 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 380 return TRACE_TYPE_PARTIAL_LINE;
249 ret = print_graph_cpu(s, cpu); 381 }
250 if (ret == TRACE_TYPE_PARTIAL_LINE)
251 return TRACE_TYPE_PARTIAL_LINE;
252 }
253 /* Proc */
254 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
255 ret = print_graph_proc(s, pid);
256 if (ret == TRACE_TYPE_PARTIAL_LINE)
257 return TRACE_TYPE_PARTIAL_LINE;
258 382
259 ret = trace_seq_printf(s, " | "); 383 /* Cpu */
260 if (!ret) 384 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
261 return TRACE_TYPE_PARTIAL_LINE; 385 ret = print_graph_cpu(s, cpu);
262 } 386 if (ret == TRACE_TYPE_PARTIAL_LINE)
387 return TRACE_TYPE_PARTIAL_LINE;
388 }
389 /* Proc */
390 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
391 ret = print_graph_proc(s, pid);
392 if (ret == TRACE_TYPE_PARTIAL_LINE)
393 return TRACE_TYPE_PARTIAL_LINE;
394 ret = trace_seq_printf(s, " | ");
395 if (!ret)
396 return TRACE_TYPE_PARTIAL_LINE;
397 }
263 398
264 /* No overhead */ 399 /* No overhead */
265 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 400 ret = print_graph_overhead(-1, s);
266 ret = trace_seq_printf(s, " "); 401 if (!ret)
267 if (!ret) 402 return TRACE_TYPE_PARTIAL_LINE;
268 return TRACE_TYPE_PARTIAL_LINE; 403
269 } 404 if (type == TRACE_GRAPH_ENT)
405 ret = trace_seq_printf(s, "==========>");
406 else
407 ret = trace_seq_printf(s, "<==========");
408
409 if (!ret)
410 return TRACE_TYPE_PARTIAL_LINE;
411
412 /* Don't close the duration column if haven't one */
413 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
414 trace_seq_printf(s, " |");
415 ret = trace_seq_printf(s, "\n");
270 416
271 ret = trace_seq_printf(s, "<========== |\n");
272 }
273 if (!ret) 417 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE; 418 return TRACE_TYPE_PARTIAL_LINE;
275 return TRACE_TYPE_HANDLED; 419 return TRACE_TYPE_HANDLED;
@@ -288,7 +432,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
288 sprintf(msecs_str, "%lu", (unsigned long) duration); 432 sprintf(msecs_str, "%lu", (unsigned long) duration);
289 433
290 /* Print msecs */ 434 /* Print msecs */
291 ret = trace_seq_printf(s, msecs_str); 435 ret = trace_seq_printf(s, "%s", msecs_str);
292 if (!ret) 436 if (!ret)
293 return TRACE_TYPE_PARTIAL_LINE; 437 return TRACE_TYPE_PARTIAL_LINE;
294 438
@@ -321,51 +465,33 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
321 465
322} 466}
323 467
324/* Signal a overhead of time execution to the output */
325static int
326print_graph_overhead(unsigned long long duration, struct trace_seq *s)
327{
328 /* Duration exceeded 100 msecs */
329 if (duration > 100000ULL)
330 return trace_seq_printf(s, "! ");
331
332 /* Duration exceeded 10 msecs */
333 if (duration > 10000ULL)
334 return trace_seq_printf(s, "+ ");
335
336 return trace_seq_printf(s, " ");
337}
338
339/* Case of a leaf function on its call entry */ 468/* Case of a leaf function on its call entry */
340static enum print_line_t 469static enum print_line_t
341print_graph_entry_leaf(struct trace_iterator *iter, 470print_graph_entry_leaf(struct trace_iterator *iter,
342 struct ftrace_graph_ent_entry *entry, struct trace_seq *s) 471 struct ftrace_graph_ent_entry *entry,
472 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
343{ 473{
344 struct ftrace_graph_ret_entry *ret_entry;
345 struct ftrace_graph_ret *graph_ret; 474 struct ftrace_graph_ret *graph_ret;
346 struct ring_buffer_event *event;
347 struct ftrace_graph_ent *call; 475 struct ftrace_graph_ent *call;
348 unsigned long long duration; 476 unsigned long long duration;
349 int ret; 477 int ret;
350 int i; 478 int i;
351 479
352 event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
353 ret_entry = ring_buffer_event_data(event);
354 graph_ret = &ret_entry->ret; 480 graph_ret = &ret_entry->ret;
355 call = &entry->graph_ent; 481 call = &entry->graph_ent;
356 duration = graph_ret->rettime - graph_ret->calltime; 482 duration = graph_ret->rettime - graph_ret->calltime;
357 483
358 /* Overhead */ 484 /* Overhead */
359 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 485 ret = print_graph_overhead(duration, s);
360 ret = print_graph_overhead(duration, s); 486 if (!ret)
361 if (!ret) 487 return TRACE_TYPE_PARTIAL_LINE;
362 return TRACE_TYPE_PARTIAL_LINE;
363 }
364 488
365 /* Duration */ 489 /* Duration */
366 ret = print_graph_duration(duration, s); 490 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
367 if (ret == TRACE_TYPE_PARTIAL_LINE) 491 ret = print_graph_duration(duration, s);
368 return TRACE_TYPE_PARTIAL_LINE; 492 if (ret == TRACE_TYPE_PARTIAL_LINE)
493 return TRACE_TYPE_PARTIAL_LINE;
494 }
369 495
370 /* Function */ 496 /* Function */
371 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 497 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -394,25 +520,17 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
394 struct ftrace_graph_ent *call = &entry->graph_ent; 520 struct ftrace_graph_ent *call = &entry->graph_ent;
395 521
396 /* No overhead */ 522 /* No overhead */
397 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 523 ret = print_graph_overhead(-1, s);
398 ret = trace_seq_printf(s, " "); 524 if (!ret)
399 if (!ret) 525 return TRACE_TYPE_PARTIAL_LINE;
400 return TRACE_TYPE_PARTIAL_LINE;
401 }
402 526
403 /* Interrupt */ 527 /* No time */
404 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid); 528 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
405 if (ret == TRACE_TYPE_UNHANDLED) {
406 /* No time */
407 ret = trace_seq_printf(s, " | "); 529 ret = trace_seq_printf(s, " | ");
408 if (!ret) 530 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE; 531 return TRACE_TYPE_PARTIAL_LINE;
410 } else {
411 if (ret == TRACE_TYPE_PARTIAL_LINE)
412 return TRACE_TYPE_PARTIAL_LINE;
413 } 532 }
414 533
415
416 /* Function */ 534 /* Function */
417 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 535 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
418 ret = trace_seq_printf(s, " "); 536 ret = trace_seq_printf(s, " ");
@@ -428,20 +546,40 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
428 if (!ret) 546 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE; 547 return TRACE_TYPE_PARTIAL_LINE;
430 548
431 return TRACE_TYPE_HANDLED; 549 /*
550 * we already consumed the current entry to check the next one
551 * and see if this is a leaf.
552 */
553 return TRACE_TYPE_NO_CONSUME;
432} 554}
433 555
434static enum print_line_t 556static enum print_line_t
435print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 557print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
436 struct trace_iterator *iter, int cpu) 558 struct trace_iterator *iter)
437{ 559{
438 int ret; 560 int ret;
561 int cpu = iter->cpu;
562 pid_t *last_entry = iter->private;
439 struct trace_entry *ent = iter->ent; 563 struct trace_entry *ent = iter->ent;
564 struct ftrace_graph_ent *call = &field->graph_ent;
565 struct ftrace_graph_ret_entry *leaf_ret;
440 566
441 /* Pid */ 567 /* Pid */
442 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 568 if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
443 return TRACE_TYPE_PARTIAL_LINE; 569 return TRACE_TYPE_PARTIAL_LINE;
444 570
571 /* Interrupt */
572 ret = print_graph_irq(iter, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
573 if (ret == TRACE_TYPE_PARTIAL_LINE)
574 return TRACE_TYPE_PARTIAL_LINE;
575
576 /* Absolute time */
577 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
578 ret = print_graph_abs_time(iter->ts, s);
579 if (!ret)
580 return TRACE_TYPE_PARTIAL_LINE;
581 }
582
445 /* Cpu */ 583 /* Cpu */
446 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 584 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
447 ret = print_graph_cpu(s, cpu); 585 ret = print_graph_cpu(s, cpu);
@@ -460,8 +598,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
460 return TRACE_TYPE_PARTIAL_LINE; 598 return TRACE_TYPE_PARTIAL_LINE;
461 } 599 }
462 600
463 if (trace_branch_is_leaf(iter, field)) 601 leaf_ret = get_return_for_leaf(iter, field);
464 return print_graph_entry_leaf(iter, field, s); 602 if (leaf_ret)
603 return print_graph_entry_leaf(iter, field, leaf_ret, s);
465 else 604 else
466 return print_graph_entry_nested(field, s, iter->ent->pid, cpu); 605 return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
467 606
@@ -469,16 +608,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
469 608
470static enum print_line_t 609static enum print_line_t
471print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 610print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
472 struct trace_entry *ent, int cpu) 611 struct trace_entry *ent, struct trace_iterator *iter)
473{ 612{
474 int i; 613 int i;
475 int ret; 614 int ret;
615 int cpu = iter->cpu;
616 pid_t *last_pid = iter->private, pid = ent->pid;
476 unsigned long long duration = trace->rettime - trace->calltime; 617 unsigned long long duration = trace->rettime - trace->calltime;
477 618
478 /* Pid */ 619 /* Pid */
479 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 620 if (verif_pid(s, pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
480 return TRACE_TYPE_PARTIAL_LINE; 621 return TRACE_TYPE_PARTIAL_LINE;
481 622
623 /* Absolute time */
624 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
625 ret = print_graph_abs_time(iter->ts, s);
626 if (!ret)
627 return TRACE_TYPE_PARTIAL_LINE;
628 }
629
482 /* Cpu */ 630 /* Cpu */
483 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 631 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
484 ret = print_graph_cpu(s, cpu); 632 ret = print_graph_cpu(s, cpu);
@@ -498,16 +646,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
498 } 646 }
499 647
500 /* Overhead */ 648 /* Overhead */
501 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 649 ret = print_graph_overhead(duration, s);
502 ret = print_graph_overhead(duration, s); 650 if (!ret)
503 if (!ret) 651 return TRACE_TYPE_PARTIAL_LINE;
504 return TRACE_TYPE_PARTIAL_LINE;
505 }
506 652
507 /* Duration */ 653 /* Duration */
508 ret = print_graph_duration(duration, s); 654 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
509 if (ret == TRACE_TYPE_PARTIAL_LINE) 655 ret = print_graph_duration(duration, s);
510 return TRACE_TYPE_PARTIAL_LINE; 656 if (ret == TRACE_TYPE_PARTIAL_LINE)
657 return TRACE_TYPE_PARTIAL_LINE;
658 }
511 659
512 /* Closing brace */ 660 /* Closing brace */
513 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { 661 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
@@ -528,7 +676,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
528 return TRACE_TYPE_PARTIAL_LINE; 676 return TRACE_TYPE_PARTIAL_LINE;
529 } 677 }
530 678
531 ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid); 679 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid);
532 if (ret == TRACE_TYPE_PARTIAL_LINE) 680 if (ret == TRACE_TYPE_PARTIAL_LINE)
533 return TRACE_TYPE_PARTIAL_LINE; 681 return TRACE_TYPE_PARTIAL_LINE;
534 682
@@ -541,14 +689,23 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
541{ 689{
542 int i; 690 int i;
543 int ret; 691 int ret;
692 int cpu = iter->cpu;
693 pid_t *last_pid = iter->private;
544 694
545 /* Pid */ 695 /* Pid */
546 if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE) 696 if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
547 return TRACE_TYPE_PARTIAL_LINE; 697 return TRACE_TYPE_PARTIAL_LINE;
548 698
699 /* Absolute time */
700 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
701 ret = print_graph_abs_time(iter->ts, s);
702 if (!ret)
703 return TRACE_TYPE_PARTIAL_LINE;
704 }
705
549 /* Cpu */ 706 /* Cpu */
550 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 707 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
551 ret = print_graph_cpu(s, iter->cpu); 708 ret = print_graph_cpu(s, cpu);
552 if (ret == TRACE_TYPE_PARTIAL_LINE) 709 if (ret == TRACE_TYPE_PARTIAL_LINE)
553 return TRACE_TYPE_PARTIAL_LINE; 710 return TRACE_TYPE_PARTIAL_LINE;
554 } 711 }
@@ -565,17 +722,17 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
565 } 722 }
566 723
567 /* No overhead */ 724 /* No overhead */
568 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 725 ret = print_graph_overhead(-1, s);
569 ret = trace_seq_printf(s, " "); 726 if (!ret)
727 return TRACE_TYPE_PARTIAL_LINE;
728
729 /* No time */
730 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
731 ret = trace_seq_printf(s, " | ");
570 if (!ret) 732 if (!ret)
571 return TRACE_TYPE_PARTIAL_LINE; 733 return TRACE_TYPE_PARTIAL_LINE;
572 } 734 }
573 735
574 /* No time */
575 ret = trace_seq_printf(s, " | ");
576 if (!ret)
577 return TRACE_TYPE_PARTIAL_LINE;
578
579 /* Indentation */ 736 /* Indentation */
580 if (trace->depth > 0) 737 if (trace->depth > 0)
581 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { 738 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
@@ -589,8 +746,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
589 if (!ret) 746 if (!ret)
590 return TRACE_TYPE_PARTIAL_LINE; 747 return TRACE_TYPE_PARTIAL_LINE;
591 748
592 if (ent->flags & TRACE_FLAG_CONT) 749 /* Strip ending newline */
593 trace_seq_print_cont(s, iter); 750 if (s->buffer[s->len - 1] == '\n') {
751 s->buffer[s->len - 1] = '\0';
752 s->len--;
753 }
594 754
595 ret = trace_seq_printf(s, " */\n"); 755 ret = trace_seq_printf(s, " */\n");
596 if (!ret) 756 if (!ret)
@@ -610,13 +770,12 @@ print_graph_function(struct trace_iterator *iter)
610 case TRACE_GRAPH_ENT: { 770 case TRACE_GRAPH_ENT: {
611 struct ftrace_graph_ent_entry *field; 771 struct ftrace_graph_ent_entry *field;
612 trace_assign_type(field, entry); 772 trace_assign_type(field, entry);
613 return print_graph_entry(field, s, iter, 773 return print_graph_entry(field, s, iter);
614 iter->cpu);
615 } 774 }
616 case TRACE_GRAPH_RET: { 775 case TRACE_GRAPH_RET: {
617 struct ftrace_graph_ret_entry *field; 776 struct ftrace_graph_ret_entry *field;
618 trace_assign_type(field, entry); 777 trace_assign_type(field, entry);
619 return print_graph_return(&field->ret, s, entry, iter->cpu); 778 return print_graph_return(&field->ret, s, entry, iter);
620 } 779 }
621 case TRACE_PRINT: { 780 case TRACE_PRINT: {
622 struct print_entry *field; 781 struct print_entry *field;
@@ -632,33 +791,64 @@ static void print_graph_headers(struct seq_file *s)
632{ 791{
633 /* 1st line */ 792 /* 1st line */
634 seq_printf(s, "# "); 793 seq_printf(s, "# ");
794 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
795 seq_printf(s, " TIME ");
635 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 796 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
636 seq_printf(s, "CPU "); 797 seq_printf(s, "CPU");
637 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 798 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
638 seq_printf(s, "TASK/PID "); 799 seq_printf(s, " TASK/PID ");
639 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) 800 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
640 seq_printf(s, "OVERHEAD/"); 801 seq_printf(s, " DURATION ");
641 seq_printf(s, "DURATION FUNCTION CALLS\n"); 802 seq_printf(s, " FUNCTION CALLS\n");
642 803
643 /* 2nd line */ 804 /* 2nd line */
644 seq_printf(s, "# "); 805 seq_printf(s, "# ");
806 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
807 seq_printf(s, " | ");
645 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 808 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
646 seq_printf(s, "| "); 809 seq_printf(s, "| ");
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 810 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
648 seq_printf(s, "| | "); 811 seq_printf(s, " | | ");
649 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 812 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
650 seq_printf(s, "| "); 813 seq_printf(s, " | | ");
651 seq_printf(s, "| | | | |\n"); 814 seq_printf(s, " | | | |\n");
652 } else
653 seq_printf(s, " | | | | |\n");
654} 815}
816
817static void graph_trace_open(struct trace_iterator *iter)
818{
819 /* pid on the last trace processed */
820 pid_t *last_pid = alloc_percpu(pid_t);
821 int cpu;
822
823 if (!last_pid)
824 pr_warning("function graph tracer: not enough memory\n");
825 else
826 for_each_possible_cpu(cpu) {
827 pid_t *pid = per_cpu_ptr(last_pid, cpu);
828 *pid = -1;
829 }
830
831 iter->private = last_pid;
832}
833
834static void graph_trace_close(struct trace_iterator *iter)
835{
836 percpu_free(iter->private);
837}
838
655static struct tracer graph_trace __read_mostly = { 839static struct tracer graph_trace __read_mostly = {
656 .name = "function_graph", 840 .name = "function_graph",
841 .open = graph_trace_open,
842 .close = graph_trace_close,
843 .wait_pipe = poll_wait_pipe,
657 .init = graph_trace_init, 844 .init = graph_trace_init,
658 .reset = graph_trace_reset, 845 .reset = graph_trace_reset,
659 .print_line = print_graph_function, 846 .print_line = print_graph_function,
660 .print_header = print_graph_headers, 847 .print_header = print_graph_headers,
661 .flags = &tracer_flags, 848 .flags = &tracer_flags,
849#ifdef CONFIG_FTRACE_SELFTEST
850 .selftest = trace_selftest_startup_function_graph,
851#endif
662}; 852};
663 853
664static __init int init_graph_trace(void) 854static __init int init_graph_trace(void)
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 649df22d435f..7bfdf4c2347f 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,30 +1,53 @@
1/* 1/*
2 * h/w branch tracer for x86 based on bts 2 * h/w branch tracer for x86 based on bts
3 * 3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com> 4 * Copyright (C) 2008-2009 Intel Corporation.
5 * 5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */ 6 */
7 7#include <linux/spinlock.h>
8#include <linux/module.h> 8#include <linux/kallsyms.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
11#include <linux/ftrace.h> 10#include <linux/ftrace.h>
12#include <linux/kallsyms.h> 11#include <linux/module.h>
12#include <linux/cpu.h>
13#include <linux/smp.h>
14#include <linux/fs.h>
13 15
14#include <asm/ds.h> 16#include <asm/ds.h>
15 17
16#include "trace.h" 18#include "trace.h"
19#include "trace_output.h"
17 20
18 21
19#define SIZEOF_BTS (1 << 13) 22#define SIZEOF_BTS (1 << 13)
20 23
24/*
25 * The tracer lock protects the below per-cpu tracer array.
26 * It needs to be held to:
27 * - start tracing on all cpus
28 * - stop tracing on all cpus
29 * - start tracing on a single hotplug cpu
30 * - stop tracing on a single hotplug cpu
31 * - read the trace from all cpus
32 * - read the trace from a single cpu
33 */
34static DEFINE_SPINLOCK(bts_tracer_lock);
21static DEFINE_PER_CPU(struct bts_tracer *, tracer); 35static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); 36static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23 37
24#define this_tracer per_cpu(tracer, smp_processor_id()) 38#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id()) 39#define this_buffer per_cpu(buffer, smp_processor_id())
26 40
41static int __read_mostly trace_hw_branches_enabled;
42static struct trace_array *hw_branch_trace __read_mostly;
43
27 44
45/*
46 * Start tracing on the current cpu.
47 * The argument is ignored.
48 *
49 * pre: bts_tracer_lock must be locked.
50 */
28static void bts_trace_start_cpu(void *arg) 51static void bts_trace_start_cpu(void *arg)
29{ 52{
30 if (this_tracer) 53 if (this_tracer)
@@ -42,14 +65,20 @@ static void bts_trace_start_cpu(void *arg)
42 65
43static void bts_trace_start(struct trace_array *tr) 66static void bts_trace_start(struct trace_array *tr)
44{ 67{
45 int cpu; 68 spin_lock(&bts_tracer_lock);
46 69
47 tracing_reset_online_cpus(tr); 70 on_each_cpu(bts_trace_start_cpu, NULL, 1);
71 trace_hw_branches_enabled = 1;
48 72
49 for_each_cpu(cpu, cpu_possible_mask) 73 spin_unlock(&bts_tracer_lock);
50 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
51} 74}
52 75
76/*
77 * Stop tracing on the current cpu.
78 * The argument is ignored.
79 *
80 * pre: bts_tracer_lock must be locked.
81 */
53static void bts_trace_stop_cpu(void *arg) 82static void bts_trace_stop_cpu(void *arg)
54{ 83{
55 if (this_tracer) { 84 if (this_tracer) {
@@ -60,26 +89,60 @@ static void bts_trace_stop_cpu(void *arg)
60 89
61static void bts_trace_stop(struct trace_array *tr) 90static void bts_trace_stop(struct trace_array *tr)
62{ 91{
63 int cpu; 92 spin_lock(&bts_tracer_lock);
93
94 trace_hw_branches_enabled = 0;
95 on_each_cpu(bts_trace_stop_cpu, NULL, 1);
96
97 spin_unlock(&bts_tracer_lock);
98}
99
100static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
101 unsigned long action, void *hcpu)
102{
103 unsigned int cpu = (unsigned long)hcpu;
64 104
65 for_each_cpu(cpu, cpu_possible_mask) 105 spin_lock(&bts_tracer_lock);
106
107 if (!trace_hw_branches_enabled)
108 goto out;
109
110 switch (action) {
111 case CPU_ONLINE:
112 case CPU_DOWN_FAILED:
113 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
114 break;
115 case CPU_DOWN_PREPARE:
66 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); 116 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
117 break;
118 }
119
120 out:
121 spin_unlock(&bts_tracer_lock);
122 return NOTIFY_DONE;
67} 123}
68 124
125static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
126 .notifier_call = bts_hotcpu_handler
127};
128
69static int bts_trace_init(struct trace_array *tr) 129static int bts_trace_init(struct trace_array *tr)
70{ 130{
71 tracing_reset_online_cpus(tr); 131 hw_branch_trace = tr;
132
72 bts_trace_start(tr); 133 bts_trace_start(tr);
73 134
74 return 0; 135 return 0;
75} 136}
76 137
138static void bts_trace_reset(struct trace_array *tr)
139{
140 bts_trace_stop(tr);
141}
142
77static void bts_trace_print_header(struct seq_file *m) 143static void bts_trace_print_header(struct seq_file *m)
78{ 144{
79 seq_puts(m, 145 seq_puts(m, "# CPU# TO <- FROM\n");
80 "# CPU# FROM TO FUNCTION\n");
81 seq_puts(m,
82 "# | | | |\n");
83} 146}
84 147
85static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) 148static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
@@ -87,15 +150,15 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
87 struct trace_entry *entry = iter->ent; 150 struct trace_entry *entry = iter->ent;
88 struct trace_seq *seq = &iter->seq; 151 struct trace_seq *seq = &iter->seq;
89 struct hw_branch_entry *it; 152 struct hw_branch_entry *it;
153 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
90 154
91 trace_assign_type(it, entry); 155 trace_assign_type(it, entry);
92 156
93 if (entry->type == TRACE_HW_BRANCHES) { 157 if (entry->type == TRACE_HW_BRANCHES) {
94 if (trace_seq_printf(seq, "%4d ", entry->cpu) && 158 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
95 trace_seq_printf(seq, "0x%016llx -> 0x%016llx ", 159 seq_print_ip_sym(seq, it->to, symflags) &&
96 it->from, it->to) && 160 trace_seq_printf(seq, "\t <- ") &&
97 (!it->from || 161 seq_print_ip_sym(seq, it->from, symflags) &&
98 seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
99 trace_seq_printf(seq, "\n")) 162 trace_seq_printf(seq, "\n"))
100 return TRACE_TYPE_HANDLED; 163 return TRACE_TYPE_HANDLED;
101 return TRACE_TYPE_PARTIAL_LINE;; 164 return TRACE_TYPE_PARTIAL_LINE;;
@@ -103,26 +166,42 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
103 return TRACE_TYPE_UNHANDLED; 166 return TRACE_TYPE_UNHANDLED;
104} 167}
105 168
106void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) 169void trace_hw_branch(u64 from, u64 to)
107{ 170{
171 struct trace_array *tr = hw_branch_trace;
108 struct ring_buffer_event *event; 172 struct ring_buffer_event *event;
109 struct hw_branch_entry *entry; 173 struct hw_branch_entry *entry;
110 unsigned long irq; 174 unsigned long irq1;
175 int cpu;
111 176
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); 177 if (unlikely(!tr))
113 if (!event)
114 return; 178 return;
179
180 if (unlikely(!trace_hw_branches_enabled))
181 return;
182
183 local_irq_save(irq1);
184 cpu = raw_smp_processor_id();
185 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
186 goto out;
187
188 event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
189 sizeof(*entry), 0, 0);
190 if (!event)
191 goto out;
115 entry = ring_buffer_event_data(event); 192 entry = ring_buffer_event_data(event);
116 tracing_generic_entry_update(&entry->ent, 0, from); 193 tracing_generic_entry_update(&entry->ent, 0, from);
117 entry->ent.type = TRACE_HW_BRANCHES; 194 entry->ent.type = TRACE_HW_BRANCHES;
118 entry->ent.cpu = smp_processor_id();
119 entry->from = from; 195 entry->from = from;
120 entry->to = to; 196 entry->to = to;
121 ring_buffer_unlock_commit(tr->buffer, event, irq); 197 trace_buffer_unlock_commit(tr, event, 0, 0);
198
199 out:
200 atomic_dec(&tr->data[cpu]->disabled);
201 local_irq_restore(irq1);
122} 202}
123 203
124static void trace_bts_at(struct trace_array *tr, 204static void trace_bts_at(const struct bts_trace *trace, void *at)
125 const struct bts_trace *trace, void *at)
126{ 205{
127 struct bts_struct bts; 206 struct bts_struct bts;
128 int err = 0; 207 int err = 0;
@@ -137,18 +216,29 @@ static void trace_bts_at(struct trace_array *tr,
137 216
138 switch (bts.qualifier) { 217 switch (bts.qualifier) {
139 case BTS_BRANCH: 218 case BTS_BRANCH:
140 trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); 219 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
141 break; 220 break;
142 } 221 }
143} 222}
144 223
224/*
225 * Collect the trace on the current cpu and write it into the ftrace buffer.
226 *
227 * pre: bts_tracer_lock must be locked
228 */
145static void trace_bts_cpu(void *arg) 229static void trace_bts_cpu(void *arg)
146{ 230{
147 struct trace_array *tr = (struct trace_array *) arg; 231 struct trace_array *tr = (struct trace_array *) arg;
148 const struct bts_trace *trace; 232 const struct bts_trace *trace;
149 unsigned char *at; 233 unsigned char *at;
150 234
151 if (!this_tracer) 235 if (unlikely(!tr))
236 return;
237
238 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
239 return;
240
241 if (unlikely(!this_tracer))
152 return; 242 return;
153 243
154 ds_suspend_bts(this_tracer); 244 ds_suspend_bts(this_tracer);
@@ -158,11 +248,11 @@ static void trace_bts_cpu(void *arg)
158 248
159 for (at = trace->ds.top; (void *)at < trace->ds.end; 249 for (at = trace->ds.top; (void *)at < trace->ds.end;
160 at += trace->ds.size) 250 at += trace->ds.size)
161 trace_bts_at(tr, trace, at); 251 trace_bts_at(trace, at);
162 252
163 for (at = trace->ds.begin; (void *)at < trace->ds.top; 253 for (at = trace->ds.begin; (void *)at < trace->ds.top;
164 at += trace->ds.size) 254 at += trace->ds.size)
165 trace_bts_at(tr, trace, at); 255 trace_bts_at(trace, at);
166 256
167out: 257out:
168 ds_resume_bts(this_tracer); 258 ds_resume_bts(this_tracer);
@@ -170,26 +260,43 @@ out:
170 260
171static void trace_bts_prepare(struct trace_iterator *iter) 261static void trace_bts_prepare(struct trace_iterator *iter)
172{ 262{
173 int cpu; 263 spin_lock(&bts_tracer_lock);
264
265 on_each_cpu(trace_bts_cpu, iter->tr, 1);
266
267 spin_unlock(&bts_tracer_lock);
268}
269
270static void trace_bts_close(struct trace_iterator *iter)
271{
272 tracing_reset_online_cpus(iter->tr);
273}
274
275void trace_hw_branch_oops(void)
276{
277 spin_lock(&bts_tracer_lock);
278
279 trace_bts_cpu(hw_branch_trace);
174 280
175 for_each_cpu(cpu, cpu_possible_mask) 281 spin_unlock(&bts_tracer_lock);
176 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
177} 282}
178 283
179struct tracer bts_tracer __read_mostly = 284struct tracer bts_tracer __read_mostly =
180{ 285{
181 .name = "hw-branch-tracer", 286 .name = "hw-branch-tracer",
182 .init = bts_trace_init, 287 .init = bts_trace_init,
183 .reset = bts_trace_stop, 288 .reset = bts_trace_reset,
184 .print_header = bts_trace_print_header, 289 .print_header = bts_trace_print_header,
185 .print_line = bts_trace_print_line, 290 .print_line = bts_trace_print_line,
186 .start = bts_trace_start, 291 .start = bts_trace_start,
187 .stop = bts_trace_stop, 292 .stop = bts_trace_stop,
188 .open = trace_bts_prepare 293 .open = trace_bts_prepare,
294 .close = trace_bts_close
189}; 295};
190 296
191__init static int init_bts_trace(void) 297__init static int init_bts_trace(void)
192{ 298{
299 register_hotcpu_notifier(&bts_hotcpu_notifier);
193 return register_tracer(&bts_tracer); 300 return register_tracer(&bts_tracer);
194} 301}
195device_initcall(init_bts_trace); 302device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 62a78d943534..9e5ebd844158 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * trace irqs off criticall timings 2 * trace irqs off critical timings
3 * 3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 95 disabled = atomic_inc_return(&data->disabled);
96 96
97 if (likely(disabled == 1)) 97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 98 trace_function(tr, ip, parent_ip, flags, preempt_count());
99 99
100 atomic_dec(&data->disabled); 100 atomic_dec(&data->disabled);
101} 101}
@@ -153,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
153 if (!report_latency(delta)) 153 if (!report_latency(delta))
154 goto out_unlock; 154 goto out_unlock;
155 155
156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); 156 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
157 157
158 latency = nsecs_to_usecs(delta); 158 latency = nsecs_to_usecs(delta);
159 159
@@ -177,7 +177,7 @@ out:
177 data->critical_sequence = max_sequence; 177 data->critical_sequence = max_sequence;
178 data->preempt_timestamp = ftrace_now(cpu); 178 data->preempt_timestamp = ftrace_now(cpu);
179 tracing_reset(tr, cpu); 179 tracing_reset(tr, cpu);
180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); 180 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
181} 181}
182 182
183static inline void 183static inline void
@@ -210,7 +210,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
210 210
211 local_save_flags(flags); 211 local_save_flags(flags);
212 212
213 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 213 trace_function(tr, ip, parent_ip, flags, preempt_count());
214 214
215 per_cpu(tracing_cpu, cpu) = 1; 215 per_cpu(tracing_cpu, cpu) = 1;
216 216
@@ -244,7 +244,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
244 atomic_inc(&data->disabled); 244 atomic_inc(&data->disabled);
245 245
246 local_save_flags(flags); 246 local_save_flags(flags);
247 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 247 trace_function(tr, ip, parent_ip, flags, preempt_count());
248 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 248 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
249 data->critical_start = 0; 249 data->critical_start = 0;
250 atomic_dec(&data->disabled); 250 atomic_dec(&data->disabled);
@@ -353,28 +353,18 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
362static void start_irqsoff_tracer(struct trace_array *tr) 356static void start_irqsoff_tracer(struct trace_array *tr)
363{ 357{
364 register_ftrace_function(&trace_ops); 358 register_ftrace_function(&trace_ops);
365 if (tracing_is_enabled()) { 359 if (tracing_is_enabled())
366 tracer_enabled = 1; 360 tracer_enabled = 1;
367 save_tracer_enabled = 1; 361 else
368 } else {
369 tracer_enabled = 0; 362 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
372} 363}
373 364
374static void stop_irqsoff_tracer(struct trace_array *tr) 365static void stop_irqsoff_tracer(struct trace_array *tr)
375{ 366{
376 tracer_enabled = 0; 367 tracer_enabled = 0;
377 save_tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 368 unregister_ftrace_function(&trace_ops);
379} 369}
380 370
@@ -395,25 +385,11 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
395static void irqsoff_tracer_start(struct trace_array *tr) 385static void irqsoff_tracer_start(struct trace_array *tr)
396{ 386{
397 tracer_enabled = 1; 387 tracer_enabled = 1;
398 save_tracer_enabled = 1;
399} 388}
400 389
401static void irqsoff_tracer_stop(struct trace_array *tr) 390static void irqsoff_tracer_stop(struct trace_array *tr)
402{ 391{
403 tracer_enabled = 0; 392 tracer_enabled = 0;
404 save_tracer_enabled = 0;
405}
406
407static void irqsoff_tracer_open(struct trace_iterator *iter)
408{
409 /* stop the trace while dumping */
410 tracer_enabled = 0;
411}
412
413static void irqsoff_tracer_close(struct trace_iterator *iter)
414{
415 /* restart tracing */
416 tracer_enabled = save_tracer_enabled;
417} 393}
418 394
419#ifdef CONFIG_IRQSOFF_TRACER 395#ifdef CONFIG_IRQSOFF_TRACER
@@ -431,8 +407,6 @@ static struct tracer irqsoff_tracer __read_mostly =
431 .reset = irqsoff_tracer_reset, 407 .reset = irqsoff_tracer_reset,
432 .start = irqsoff_tracer_start, 408 .start = irqsoff_tracer_start,
433 .stop = irqsoff_tracer_stop, 409 .stop = irqsoff_tracer_stop,
434 .open = irqsoff_tracer_open,
435 .close = irqsoff_tracer_close,
436 .print_max = 1, 410 .print_max = 1,
437#ifdef CONFIG_FTRACE_SELFTEST 411#ifdef CONFIG_FTRACE_SELFTEST
438 .selftest = trace_selftest_startup_irqsoff, 412 .selftest = trace_selftest_startup_irqsoff,
@@ -459,8 +433,6 @@ static struct tracer preemptoff_tracer __read_mostly =
459 .reset = irqsoff_tracer_reset, 433 .reset = irqsoff_tracer_reset,
460 .start = irqsoff_tracer_start, 434 .start = irqsoff_tracer_start,
461 .stop = irqsoff_tracer_stop, 435 .stop = irqsoff_tracer_stop,
462 .open = irqsoff_tracer_open,
463 .close = irqsoff_tracer_close,
464 .print_max = 1, 436 .print_max = 1,
465#ifdef CONFIG_FTRACE_SELFTEST 437#ifdef CONFIG_FTRACE_SELFTEST
466 .selftest = trace_selftest_startup_preemptoff, 438 .selftest = trace_selftest_startup_preemptoff,
@@ -489,8 +461,6 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
489 .reset = irqsoff_tracer_reset, 461 .reset = irqsoff_tracer_reset,
490 .start = irqsoff_tracer_start, 462 .start = irqsoff_tracer_start,
491 .stop = irqsoff_tracer_stop, 463 .stop = irqsoff_tracer_stop,
492 .open = irqsoff_tracer_open,
493 .close = irqsoff_tracer_close,
494 .print_max = 1, 464 .print_max = 1,
495#ifdef CONFIG_FTRACE_SELFTEST 465#ifdef CONFIG_FTRACE_SELFTEST
496 .selftest = trace_selftest_startup_preemptirqsoff, 466 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 80e503ef6136..c401b908e805 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,6 +12,7 @@
12#include <asm/atomic.h> 12#include <asm/atomic.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16struct header_iter { 17struct header_iter {
17 struct pci_dev *dev; 18 struct pci_dev *dev;
@@ -183,21 +184,22 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
183 switch (rw->opcode) { 184 switch (rw->opcode) {
184 case MMIO_READ: 185 case MMIO_READ:
185 ret = trace_seq_printf(s, 186 ret = trace_seq_printf(s,
186 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 187 "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
187 rw->width, secs, usec_rem, rw->map_id, 188 rw->width, secs, usec_rem, rw->map_id,
188 (unsigned long long)rw->phys, 189 (unsigned long long)rw->phys,
189 rw->value, rw->pc, 0); 190 rw->value, rw->pc, 0);
190 break; 191 break;
191 case MMIO_WRITE: 192 case MMIO_WRITE:
192 ret = trace_seq_printf(s, 193 ret = trace_seq_printf(s,
193 "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 194 "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
194 rw->width, secs, usec_rem, rw->map_id, 195 rw->width, secs, usec_rem, rw->map_id,
195 (unsigned long long)rw->phys, 196 (unsigned long long)rw->phys,
196 rw->value, rw->pc, 0); 197 rw->value, rw->pc, 0);
197 break; 198 break;
198 case MMIO_UNKNOWN_OP: 199 case MMIO_UNKNOWN_OP:
199 ret = trace_seq_printf(s, 200 ret = trace_seq_printf(s,
200 "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n", 201 "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
202 "%02lx 0x%lx %d\n",
201 secs, usec_rem, rw->map_id, 203 secs, usec_rem, rw->map_id,
202 (unsigned long long)rw->phys, 204 (unsigned long long)rw->phys,
203 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, 205 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
@@ -229,14 +231,14 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
229 switch (m->opcode) { 231 switch (m->opcode) {
230 case MMIO_PROBE: 232 case MMIO_PROBE:
231 ret = trace_seq_printf(s, 233 ret = trace_seq_printf(s,
232 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 234 "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
233 secs, usec_rem, m->map_id, 235 secs, usec_rem, m->map_id,
234 (unsigned long long)m->phys, m->virt, m->len, 236 (unsigned long long)m->phys, m->virt, m->len,
235 0UL, 0); 237 0UL, 0);
236 break; 238 break;
237 case MMIO_UNPROBE: 239 case MMIO_UNPROBE:
238 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
239 "UNMAP %lu.%06lu %d 0x%lx %d\n", 241 "UNMAP %u.%06lu %d 0x%lx %d\n",
240 secs, usec_rem, m->map_id, 0UL, 0); 242 secs, usec_rem, m->map_id, 0UL, 0);
241 break; 243 break;
242 default: 244 default:
@@ -260,13 +262,10 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
260 int ret; 262 int ret;
261 263
262 /* The trailing newline must be in the message. */ 264 /* The trailing newline must be in the message. */
263 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg); 265 ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
264 if (!ret) 266 if (!ret)
265 return TRACE_TYPE_PARTIAL_LINE; 267 return TRACE_TYPE_PARTIAL_LINE;
266 268
267 if (entry->flags & TRACE_FLAG_CONT)
268 trace_seq_print_cont(s, iter);
269
270 return TRACE_TYPE_HANDLED; 269 return TRACE_TYPE_HANDLED;
271} 270}
272 271
@@ -308,21 +307,17 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
308{ 307{
309 struct ring_buffer_event *event; 308 struct ring_buffer_event *event;
310 struct trace_mmiotrace_rw *entry; 309 struct trace_mmiotrace_rw *entry;
311 unsigned long irq_flags; 310 int pc = preempt_count();
312 311
313 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 312 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW,
314 &irq_flags); 313 sizeof(*entry), 0, pc);
315 if (!event) { 314 if (!event) {
316 atomic_inc(&dropped_count); 315 atomic_inc(&dropped_count);
317 return; 316 return;
318 } 317 }
319 entry = ring_buffer_event_data(event); 318 entry = ring_buffer_event_data(event);
320 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
321 entry->ent.type = TRACE_MMIO_RW;
322 entry->rw = *rw; 319 entry->rw = *rw;
323 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 320 trace_buffer_unlock_commit(tr, event, 0, pc);
324
325 trace_wake_up();
326} 321}
327 322
328void mmio_trace_rw(struct mmiotrace_rw *rw) 323void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -338,21 +333,17 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
338{ 333{
339 struct ring_buffer_event *event; 334 struct ring_buffer_event *event;
340 struct trace_mmiotrace_map *entry; 335 struct trace_mmiotrace_map *entry;
341 unsigned long irq_flags; 336 int pc = preempt_count();
342 337
343 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 338 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP,
344 &irq_flags); 339 sizeof(*entry), 0, pc);
345 if (!event) { 340 if (!event) {
346 atomic_inc(&dropped_count); 341 atomic_inc(&dropped_count);
347 return; 342 return;
348 } 343 }
349 entry = ring_buffer_event_data(event); 344 entry = ring_buffer_event_data(event);
350 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
351 entry->ent.type = TRACE_MMIO_MAP;
352 entry->map = *map; 345 entry->map = *map;
353 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 346 trace_buffer_unlock_commit(tr, event, 0, pc);
354
355 trace_wake_up();
356} 347}
357 348
358void mmio_trace_mapping(struct mmiotrace_map *map) 349void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index b9767acd30ac..9aa84bde23cd 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -47,12 +47,7 @@ static void stop_nop_trace(struct trace_array *tr)
47 47
48static int nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
49{ 49{
50 int cpu;
51 ctx_trace = tr; 50 ctx_trace = tr;
52
53 for_each_online_cpu(cpu)
54 tracing_reset(tr, cpu);
55
56 start_nop_trace(tr); 51 start_nop_trace(tr);
57 return 0; 52 return 0;
58} 53}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
new file mode 100644
index 000000000000..9fc815031b09
--- /dev/null
+++ b/kernel/trace/trace_output.c
@@ -0,0 +1,919 @@
1/*
2 * trace_output.c
3 *
4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/mutex.h>
10#include <linux/ftrace.h>
11
12#include "trace_output.h"
13
14/* must be a power of 2 */
15#define EVENT_HASHSIZE 128
16
17static DEFINE_MUTEX(trace_event_mutex);
18static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
19
20static int next_event_type = __TRACE_LAST_TYPE + 1;
21
22/**
23 * trace_seq_printf - sequence printing of trace information
24 * @s: trace sequence descriptor
25 * @fmt: printf format string
26 *
27 * The tracer may use either sequence operations or its own
28 * copy to user routines. To simplify formating of a trace
29 * trace_seq_printf is used to store strings into a special
30 * buffer (@s). Then the output may be either used by
31 * the sequencer or pulled into another buffer.
32 */
33int
34trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
35{
36 int len = (PAGE_SIZE - 1) - s->len;
37 va_list ap;
38 int ret;
39
40 if (!len)
41 return 0;
42
43 va_start(ap, fmt);
44 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
45 va_end(ap);
46
47 /* If we can't write it all, don't bother writing anything */
48 if (ret >= len)
49 return 0;
50
51 s->len += ret;
52
53 return len;
54}
55
56/**
57 * trace_seq_puts - trace sequence printing of simple string
58 * @s: trace sequence descriptor
59 * @str: simple string to record
60 *
61 * The tracer may use either the sequence operations or its own
62 * copy to user routines. This function records a simple string
63 * into a special buffer (@s) for later retrieval by a sequencer
64 * or other mechanism.
65 */
66int trace_seq_puts(struct trace_seq *s, const char *str)
67{
68 int len = strlen(str);
69
70 if (len > ((PAGE_SIZE - 1) - s->len))
71 return 0;
72
73 memcpy(s->buffer + s->len, str, len);
74 s->len += len;
75
76 return len;
77}
78
79int trace_seq_putc(struct trace_seq *s, unsigned char c)
80{
81 if (s->len >= (PAGE_SIZE - 1))
82 return 0;
83
84 s->buffer[s->len++] = c;
85
86 return 1;
87}
88
89int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
90{
91 if (len > ((PAGE_SIZE - 1) - s->len))
92 return 0;
93
94 memcpy(s->buffer + s->len, mem, len);
95 s->len += len;
96
97 return len;
98}
99
100int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
101{
102 unsigned char hex[HEX_CHARS];
103 unsigned char *data = mem;
104 int i, j;
105
106#ifdef __BIG_ENDIAN
107 for (i = 0, j = 0; i < len; i++) {
108#else
109 for (i = len-1, j = 0; i >= 0; i--) {
110#endif
111 hex[j++] = hex_asc_hi(data[i]);
112 hex[j++] = hex_asc_lo(data[i]);
113 }
114 hex[j++] = ' ';
115
116 return trace_seq_putmem(s, hex, j);
117}
118
119int trace_seq_path(struct trace_seq *s, struct path *path)
120{
121 unsigned char *p;
122
123 if (s->len >= (PAGE_SIZE - 1))
124 return 0;
125 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
126 if (!IS_ERR(p)) {
127 p = mangle_path(s->buffer + s->len, p, "\n");
128 if (p) {
129 s->len = p - s->buffer;
130 return 1;
131 }
132 } else {
133 s->buffer[s->len++] = '?';
134 return 1;
135 }
136
137 return 0;
138}
139
140#ifdef CONFIG_KRETPROBES
141static inline const char *kretprobed(const char *name)
142{
143 static const char tramp_name[] = "kretprobe_trampoline";
144 int size = sizeof(tramp_name);
145
146 if (strncmp(tramp_name, name, size) == 0)
147 return "[unknown/kretprobe'd]";
148 return name;
149}
150#else
151static inline const char *kretprobed(const char *name)
152{
153 return name;
154}
155#endif /* CONFIG_KRETPROBES */
156
157static int
158seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
159{
160#ifdef CONFIG_KALLSYMS
161 char str[KSYM_SYMBOL_LEN];
162 const char *name;
163
164 kallsyms_lookup(address, NULL, NULL, NULL, str);
165
166 name = kretprobed(str);
167
168 return trace_seq_printf(s, fmt, name);
169#endif
170 return 1;
171}
172
173static int
174seq_print_sym_offset(struct trace_seq *s, const char *fmt,
175 unsigned long address)
176{
177#ifdef CONFIG_KALLSYMS
178 char str[KSYM_SYMBOL_LEN];
179 const char *name;
180
181 sprint_symbol(str, address);
182 name = kretprobed(str);
183
184 return trace_seq_printf(s, fmt, name);
185#endif
186 return 1;
187}
188
189#ifndef CONFIG_64BIT
190# define IP_FMT "%08lx"
191#else
192# define IP_FMT "%016lx"
193#endif
194
195int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
196 unsigned long ip, unsigned long sym_flags)
197{
198 struct file *file = NULL;
199 unsigned long vmstart = 0;
200 int ret = 1;
201
202 if (mm) {
203 const struct vm_area_struct *vma;
204
205 down_read(&mm->mmap_sem);
206 vma = find_vma(mm, ip);
207 if (vma) {
208 file = vma->vm_file;
209 vmstart = vma->vm_start;
210 }
211 if (file) {
212 ret = trace_seq_path(s, &file->f_path);
213 if (ret)
214 ret = trace_seq_printf(s, "[+0x%lx]",
215 ip - vmstart);
216 }
217 up_read(&mm->mmap_sem);
218 }
219 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
220 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
221 return ret;
222}
223
224int
225seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
226 unsigned long sym_flags)
227{
228 struct mm_struct *mm = NULL;
229 int ret = 1;
230 unsigned int i;
231
232 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
233 struct task_struct *task;
234 /*
235 * we do the lookup on the thread group leader,
236 * since individual threads might have already quit!
237 */
238 rcu_read_lock();
239 task = find_task_by_vpid(entry->ent.tgid);
240 if (task)
241 mm = get_task_mm(task);
242 rcu_read_unlock();
243 }
244
245 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
246 unsigned long ip = entry->caller[i];
247
248 if (ip == ULONG_MAX || !ret)
249 break;
250 if (i && ret)
251 ret = trace_seq_puts(s, " <- ");
252 if (!ip) {
253 if (ret)
254 ret = trace_seq_puts(s, "??");
255 continue;
256 }
257 if (!ret)
258 break;
259 if (ret)
260 ret = seq_print_user_ip(s, mm, ip, sym_flags);
261 }
262
263 if (mm)
264 mmput(mm);
265 return ret;
266}
267
268int
269seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
270{
271 int ret;
272
273 if (!ip)
274 return trace_seq_printf(s, "0");
275
276 if (sym_flags & TRACE_ITER_SYM_OFFSET)
277 ret = seq_print_sym_offset(s, "%s", ip);
278 else
279 ret = seq_print_sym_short(s, "%s", ip);
280
281 if (!ret)
282 return 0;
283
284 if (sym_flags & TRACE_ITER_SYM_ADDR)
285 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
286 return ret;
287}
288
289static int
290lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
291{
292 int hardirq, softirq;
293 char *comm;
294
295 comm = trace_find_cmdline(entry->pid);
296 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
297 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
298
299 if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
300 comm, entry->pid, cpu,
301 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
302 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
303 'X' : '.',
304 (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
305 'N' : '.',
306 (hardirq && softirq) ? 'H' :
307 hardirq ? 'h' : softirq ? 's' : '.'))
308 return 0;
309
310 if (entry->preempt_count)
311 return trace_seq_printf(s, "%x", entry->preempt_count);
312 return trace_seq_puts(s, ".");
313}
314
315static unsigned long preempt_mark_thresh = 100;
316
317static int
318lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
319 unsigned long rel_usecs)
320{
321 return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
322 rel_usecs > preempt_mark_thresh ? '!' :
323 rel_usecs > 1 ? '+' : ' ');
324}
325
326int trace_print_context(struct trace_iterator *iter)
327{
328 struct trace_seq *s = &iter->seq;
329 struct trace_entry *entry = iter->ent;
330 char *comm = trace_find_cmdline(entry->pid);
331 unsigned long long t = ns2usecs(iter->ts);
332 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
333 unsigned long secs = (unsigned long)t;
334
335 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
336 comm, entry->pid, iter->cpu, secs, usec_rem);
337}
338
339int trace_print_lat_context(struct trace_iterator *iter)
340{
341 u64 next_ts;
342 int ret;
343 struct trace_seq *s = &iter->seq;
344 struct trace_entry *entry = iter->ent,
345 *next_entry = trace_find_next_entry(iter, NULL,
346 &next_ts);
347 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
348 unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
349 unsigned long rel_usecs;
350
351 if (!next_entry)
352 next_ts = iter->ts;
353 rel_usecs = ns2usecs(next_ts - iter->ts);
354
355 if (verbose) {
356 char *comm = trace_find_cmdline(entry->pid);
357 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
358 " %ld.%03ldms (+%ld.%03ldms): ", comm,
359 entry->pid, iter->cpu, entry->flags,
360 entry->preempt_count, iter->idx,
361 ns2usecs(iter->ts),
362 abs_usecs / USEC_PER_MSEC,
363 abs_usecs % USEC_PER_MSEC,
364 rel_usecs / USEC_PER_MSEC,
365 rel_usecs % USEC_PER_MSEC);
366 } else {
367 ret = lat_print_generic(s, entry, iter->cpu);
368 if (ret)
369 ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
370 }
371
372 return ret;
373}
374
375static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
376
377static int task_state_char(unsigned long state)
378{
379 int bit = state ? __ffs(state) + 1 : 0;
380
381 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
382}
383
384/**
385 * ftrace_find_event - find a registered event
386 * @type: the type of event to look for
387 *
388 * Returns an event of type @type otherwise NULL
389 */
390struct trace_event *ftrace_find_event(int type)
391{
392 struct trace_event *event;
393 struct hlist_node *n;
394 unsigned key;
395
396 key = type & (EVENT_HASHSIZE - 1);
397
398 hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
399 if (event->type == type)
400 return event;
401 }
402
403 return NULL;
404}
405
406/**
407 * register_ftrace_event - register output for an event type
408 * @event: the event type to register
409 *
410 * Event types are stored in a hash and this hash is used to
411 * find a way to print an event. If the @event->type is set
412 * then it will use that type, otherwise it will assign a
413 * type to use.
414 *
415 * If you assign your own type, please make sure it is added
416 * to the trace_type enum in trace.h, to avoid collisions
417 * with the dynamic types.
418 *
419 * Returns the event type number or zero on error.
420 */
421int register_ftrace_event(struct trace_event *event)
422{
423 unsigned key;
424 int ret = 0;
425
426 mutex_lock(&trace_event_mutex);
427
428 if (!event->type)
429 event->type = next_event_type++;
430 else if (event->type > __TRACE_LAST_TYPE) {
431 printk(KERN_WARNING "Need to add type to trace.h\n");
432 WARN_ON(1);
433 }
434
435 if (ftrace_find_event(event->type))
436 goto out;
437
438 if (event->trace == NULL)
439 event->trace = trace_nop_print;
440 if (event->latency_trace == NULL)
441 event->latency_trace = trace_nop_print;
442 if (event->raw == NULL)
443 event->raw = trace_nop_print;
444 if (event->hex == NULL)
445 event->hex = trace_nop_print;
446 if (event->binary == NULL)
447 event->binary = trace_nop_print;
448
449 key = event->type & (EVENT_HASHSIZE - 1);
450
451 hlist_add_head_rcu(&event->node, &event_hash[key]);
452
453 ret = event->type;
454 out:
455 mutex_unlock(&trace_event_mutex);
456
457 return ret;
458}
459
460/**
461 * unregister_ftrace_event - remove a no longer used event
462 * @event: the event to remove
463 */
464int unregister_ftrace_event(struct trace_event *event)
465{
466 mutex_lock(&trace_event_mutex);
467 hlist_del(&event->node);
468 mutex_unlock(&trace_event_mutex);
469
470 return 0;
471}
472
473/*
474 * Standard events
475 */
476
477enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags)
478{
479 return TRACE_TYPE_HANDLED;
480}
481
482/* TRACE_FN */
483static enum print_line_t trace_fn_latency(struct trace_iterator *iter,
484 int flags)
485{
486 struct ftrace_entry *field;
487 struct trace_seq *s = &iter->seq;
488
489 trace_assign_type(field, iter->ent);
490
491 if (!seq_print_ip_sym(s, field->ip, flags))
492 goto partial;
493 if (!trace_seq_puts(s, " ("))
494 goto partial;
495 if (!seq_print_ip_sym(s, field->parent_ip, flags))
496 goto partial;
497 if (!trace_seq_puts(s, ")\n"))
498 goto partial;
499
500 return TRACE_TYPE_HANDLED;
501
502 partial:
503 return TRACE_TYPE_PARTIAL_LINE;
504}
505
506static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
507{
508 struct ftrace_entry *field;
509 struct trace_seq *s = &iter->seq;
510
511 trace_assign_type(field, iter->ent);
512
513 if (!seq_print_ip_sym(s, field->ip, flags))
514 goto partial;
515
516 if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
517 if (!trace_seq_printf(s, " <-"))
518 goto partial;
519 if (!seq_print_ip_sym(s,
520 field->parent_ip,
521 flags))
522 goto partial;
523 }
524 if (!trace_seq_printf(s, "\n"))
525 goto partial;
526
527 return TRACE_TYPE_HANDLED;
528
529 partial:
530 return TRACE_TYPE_PARTIAL_LINE;
531}
532
533static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
534{
535 struct ftrace_entry *field;
536
537 trace_assign_type(field, iter->ent);
538
539 if (!trace_seq_printf(&iter->seq, "%lx %lx\n",
540 field->ip,
541 field->parent_ip))
542 return TRACE_TYPE_PARTIAL_LINE;
543
544 return TRACE_TYPE_HANDLED;
545}
546
547static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
548{
549 struct ftrace_entry *field;
550 struct trace_seq *s = &iter->seq;
551
552 trace_assign_type(field, iter->ent);
553
554 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
555 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
556
557 return TRACE_TYPE_HANDLED;
558}
559
560static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
561{
562 struct ftrace_entry *field;
563 struct trace_seq *s = &iter->seq;
564
565 trace_assign_type(field, iter->ent);
566
567 SEQ_PUT_FIELD_RET(s, field->ip);
568 SEQ_PUT_FIELD_RET(s, field->parent_ip);
569
570 return TRACE_TYPE_HANDLED;
571}
572
573static struct trace_event trace_fn_event = {
574 .type = TRACE_FN,
575 .trace = trace_fn_trace,
576 .latency_trace = trace_fn_latency,
577 .raw = trace_fn_raw,
578 .hex = trace_fn_hex,
579 .binary = trace_fn_bin,
580};
581
582/* TRACE_CTX an TRACE_WAKE */
583static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
584 char *delim)
585{
586 struct ctx_switch_entry *field;
587 char *comm;
588 int S, T;
589
590 trace_assign_type(field, iter->ent);
591
592 T = task_state_char(field->next_state);
593 S = task_state_char(field->prev_state);
594 comm = trace_find_cmdline(field->next_pid);
595 if (!trace_seq_printf(&iter->seq,
596 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
597 field->prev_pid,
598 field->prev_prio,
599 S, delim,
600 field->next_cpu,
601 field->next_pid,
602 field->next_prio,
603 T, comm))
604 return TRACE_TYPE_PARTIAL_LINE;
605
606 return TRACE_TYPE_HANDLED;
607}
608
609static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags)
610{
611 return trace_ctxwake_print(iter, "==>");
612}
613
614static enum print_line_t trace_wake_print(struct trace_iterator *iter,
615 int flags)
616{
617 return trace_ctxwake_print(iter, " +");
618}
619
620static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
621{
622 struct ctx_switch_entry *field;
623 int T;
624
625 trace_assign_type(field, iter->ent);
626
627 if (!S)
628 task_state_char(field->prev_state);
629 T = task_state_char(field->next_state);
630 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
631 field->prev_pid,
632 field->prev_prio,
633 S,
634 field->next_cpu,
635 field->next_pid,
636 field->next_prio,
637 T))
638 return TRACE_TYPE_PARTIAL_LINE;
639
640 return TRACE_TYPE_HANDLED;
641}
642
643static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags)
644{
645 return trace_ctxwake_raw(iter, 0);
646}
647
648static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags)
649{
650 return trace_ctxwake_raw(iter, '+');
651}
652
653
654static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
655{
656 struct ctx_switch_entry *field;
657 struct trace_seq *s = &iter->seq;
658 int T;
659
660 trace_assign_type(field, iter->ent);
661
662 if (!S)
663 task_state_char(field->prev_state);
664 T = task_state_char(field->next_state);
665
666 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
667 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
668 SEQ_PUT_HEX_FIELD_RET(s, S);
669 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
670 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
671 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
672 SEQ_PUT_HEX_FIELD_RET(s, T);
673
674 return TRACE_TYPE_HANDLED;
675}
676
677static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags)
678{
679 return trace_ctxwake_hex(iter, 0);
680}
681
682static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags)
683{
684 return trace_ctxwake_hex(iter, '+');
685}
686
687static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
688 int flags)
689{
690 struct ctx_switch_entry *field;
691 struct trace_seq *s = &iter->seq;
692
693 trace_assign_type(field, iter->ent);
694
695 SEQ_PUT_FIELD_RET(s, field->prev_pid);
696 SEQ_PUT_FIELD_RET(s, field->prev_prio);
697 SEQ_PUT_FIELD_RET(s, field->prev_state);
698 SEQ_PUT_FIELD_RET(s, field->next_pid);
699 SEQ_PUT_FIELD_RET(s, field->next_prio);
700 SEQ_PUT_FIELD_RET(s, field->next_state);
701
702 return TRACE_TYPE_HANDLED;
703}
704
705static struct trace_event trace_ctx_event = {
706 .type = TRACE_CTX,
707 .trace = trace_ctx_print,
708 .latency_trace = trace_ctx_print,
709 .raw = trace_ctx_raw,
710 .hex = trace_ctx_hex,
711 .binary = trace_ctxwake_bin,
712};
713
714static struct trace_event trace_wake_event = {
715 .type = TRACE_WAKE,
716 .trace = trace_wake_print,
717 .latency_trace = trace_wake_print,
718 .raw = trace_wake_raw,
719 .hex = trace_wake_hex,
720 .binary = trace_ctxwake_bin,
721};
722
723/* TRACE_SPECIAL */
724static enum print_line_t trace_special_print(struct trace_iterator *iter,
725 int flags)
726{
727 struct special_entry *field;
728
729 trace_assign_type(field, iter->ent);
730
731 if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
732 field->arg1,
733 field->arg2,
734 field->arg3))
735 return TRACE_TYPE_PARTIAL_LINE;
736
737 return TRACE_TYPE_HANDLED;
738}
739
740static enum print_line_t trace_special_hex(struct trace_iterator *iter,
741 int flags)
742{
743 struct special_entry *field;
744 struct trace_seq *s = &iter->seq;
745
746 trace_assign_type(field, iter->ent);
747
748 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
749 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
750 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
751
752 return TRACE_TYPE_HANDLED;
753}
754
755static enum print_line_t trace_special_bin(struct trace_iterator *iter,
756 int flags)
757{
758 struct special_entry *field;
759 struct trace_seq *s = &iter->seq;
760
761 trace_assign_type(field, iter->ent);
762
763 SEQ_PUT_FIELD_RET(s, field->arg1);
764 SEQ_PUT_FIELD_RET(s, field->arg2);
765 SEQ_PUT_FIELD_RET(s, field->arg3);
766
767 return TRACE_TYPE_HANDLED;
768}
769
770static struct trace_event trace_special_event = {
771 .type = TRACE_SPECIAL,
772 .trace = trace_special_print,
773 .latency_trace = trace_special_print,
774 .raw = trace_special_print,
775 .hex = trace_special_hex,
776 .binary = trace_special_bin,
777};
778
779/* TRACE_STACK */
780
781static enum print_line_t trace_stack_print(struct trace_iterator *iter,
782 int flags)
783{
784 struct stack_entry *field;
785 struct trace_seq *s = &iter->seq;
786 int i;
787
788 trace_assign_type(field, iter->ent);
789
790 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
791 if (i) {
792 if (!trace_seq_puts(s, " <= "))
793 goto partial;
794
795 if (!seq_print_ip_sym(s, field->caller[i], flags))
796 goto partial;
797 }
798 if (!trace_seq_puts(s, "\n"))
799 goto partial;
800 }
801
802 return TRACE_TYPE_HANDLED;
803
804 partial:
805 return TRACE_TYPE_PARTIAL_LINE;
806}
807
808static struct trace_event trace_stack_event = {
809 .type = TRACE_STACK,
810 .trace = trace_stack_print,
811 .latency_trace = trace_stack_print,
812 .raw = trace_special_print,
813 .hex = trace_special_hex,
814 .binary = trace_special_bin,
815};
816
817/* TRACE_USER_STACK */
818static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
819 int flags)
820{
821 struct userstack_entry *field;
822 struct trace_seq *s = &iter->seq;
823
824 trace_assign_type(field, iter->ent);
825
826 if (!seq_print_userip_objs(field, s, flags))
827 goto partial;
828
829 if (!trace_seq_putc(s, '\n'))
830 goto partial;
831
832 return TRACE_TYPE_HANDLED;
833
834 partial:
835 return TRACE_TYPE_PARTIAL_LINE;
836}
837
838static struct trace_event trace_user_stack_event = {
839 .type = TRACE_USER_STACK,
840 .trace = trace_user_stack_print,
841 .latency_trace = trace_user_stack_print,
842 .raw = trace_special_print,
843 .hex = trace_special_hex,
844 .binary = trace_special_bin,
845};
846
847/* TRACE_PRINT */
848static enum print_line_t trace_print_print(struct trace_iterator *iter,
849 int flags)
850{
851 struct print_entry *field;
852 struct trace_seq *s = &iter->seq;
853
854 trace_assign_type(field, iter->ent);
855
856 if (!seq_print_ip_sym(s, field->ip, flags))
857 goto partial;
858
859 if (!trace_seq_printf(s, ": %s", field->buf))
860 goto partial;
861
862 return TRACE_TYPE_HANDLED;
863
864 partial:
865 return TRACE_TYPE_PARTIAL_LINE;
866}
867
868static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
869{
870 struct print_entry *field;
871
872 trace_assign_type(field, iter->ent);
873
874 if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
875 goto partial;
876
877 return TRACE_TYPE_HANDLED;
878
879 partial:
880 return TRACE_TYPE_PARTIAL_LINE;
881}
882
883static struct trace_event trace_print_event = {
884 .type = TRACE_PRINT,
885 .trace = trace_print_print,
886 .latency_trace = trace_print_print,
887 .raw = trace_print_raw,
888};
889
890static struct trace_event *events[] __initdata = {
891 &trace_fn_event,
892 &trace_ctx_event,
893 &trace_wake_event,
894 &trace_special_event,
895 &trace_stack_event,
896 &trace_user_stack_event,
897 &trace_print_event,
898 NULL
899};
900
901__init static int init_events(void)
902{
903 struct trace_event *event;
904 int i, ret;
905
906 for (i = 0; events[i]; i++) {
907 event = events[i];
908
909 ret = register_ftrace_event(event);
910 if (!ret) {
911 printk(KERN_WARNING "event %d failed to register\n",
912 event->type);
913 WARN_ON_ONCE(1);
914 }
915 }
916
917 return 0;
918}
919device_initcall(init_events);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
new file mode 100644
index 000000000000..551a25a72217
--- /dev/null
+++ b/kernel/trace/trace_output.h
@@ -0,0 +1,62 @@
1#ifndef __TRACE_EVENTS_H
2#define __TRACE_EVENTS_H
3
4#include "trace.h"
5
6typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
7 int flags);
8
9struct trace_event {
10 struct hlist_node node;
11 int type;
12 trace_print_func trace;
13 trace_print_func latency_trace;
14 trace_print_func raw;
15 trace_print_func hex;
16 trace_print_func binary;
17};
18
19extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
20 __attribute__ ((format (printf, 2, 3)));
21extern int
22seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
23 unsigned long sym_flags);
24extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
25 size_t cnt);
26int trace_seq_puts(struct trace_seq *s, const char *str);
27int trace_seq_putc(struct trace_seq *s, unsigned char c);
28int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
29int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
30int trace_seq_path(struct trace_seq *s, struct path *path);
31int seq_print_userip_objs(const struct userstack_entry *entry,
32 struct trace_seq *s, unsigned long sym_flags);
33int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
34 unsigned long ip, unsigned long sym_flags);
35
36int trace_print_context(struct trace_iterator *iter);
37int trace_print_lat_context(struct trace_iterator *iter);
38
39struct trace_event *ftrace_find_event(int type);
40int register_ftrace_event(struct trace_event *event);
41int unregister_ftrace_event(struct trace_event *event);
42
43enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags);
44
45#define MAX_MEMHEX_BYTES 8
46#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
47
48#define SEQ_PUT_FIELD_RET(s, x) \
49do { \
50 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
51 return TRACE_TYPE_PARTIAL_LINE; \
52} while (0)
53
54#define SEQ_PUT_HEX_FIELD_RET(s, x) \
55do { \
56 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
57 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
58 return TRACE_TYPE_PARTIAL_LINE; \
59} while (0)
60
61#endif
62
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 7bda248daf55..91ce672fb037 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -11,24 +11,126 @@
11 11
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
14#include <linux/ftrace.h> 14#include <trace/power.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/module.h> 16#include <linux/module.h>
17 17
18#include "trace.h" 18#include "trace.h"
19#include "trace_output.h"
19 20
20static struct trace_array *power_trace; 21static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled; 22static int __read_mostly trace_power_enabled;
22 23
24static void probe_power_start(struct power_trace *it, unsigned int type,
25 unsigned int level)
26{
27 if (!trace_power_enabled)
28 return;
29
30 memset(it, 0, sizeof(struct power_trace));
31 it->state = level;
32 it->type = type;
33 it->stamp = ktime_get();
34}
35
36
37static void probe_power_end(struct power_trace *it)
38{
39 struct ring_buffer_event *event;
40 struct trace_power *entry;
41 struct trace_array_cpu *data;
42 struct trace_array *tr = power_trace;
43
44 if (!trace_power_enabled)
45 return;
46
47 preempt_disable();
48 it->end = ktime_get();
49 data = tr->data[smp_processor_id()];
50
51 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
52 sizeof(*entry), 0, 0);
53 if (!event)
54 goto out;
55 entry = ring_buffer_event_data(event);
56 entry->state_data = *it;
57 trace_buffer_unlock_commit(tr, event, 0, 0);
58 out:
59 preempt_enable();
60}
61
62static void probe_power_mark(struct power_trace *it, unsigned int type,
63 unsigned int level)
64{
65 struct ring_buffer_event *event;
66 struct trace_power *entry;
67 struct trace_array_cpu *data;
68 struct trace_array *tr = power_trace;
69
70 if (!trace_power_enabled)
71 return;
72
73 memset(it, 0, sizeof(struct power_trace));
74 it->state = level;
75 it->type = type;
76 it->stamp = ktime_get();
77 preempt_disable();
78 it->end = it->stamp;
79 data = tr->data[smp_processor_id()];
80
81 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
82 sizeof(*entry), 0, 0);
83 if (!event)
84 goto out;
85 entry = ring_buffer_event_data(event);
86 entry->state_data = *it;
87 trace_buffer_unlock_commit(tr, event, 0, 0);
88 out:
89 preempt_enable();
90}
91
92static int tracing_power_register(void)
93{
94 int ret;
95
96 ret = register_trace_power_start(probe_power_start);
97 if (ret) {
98 pr_info("power trace: Couldn't activate tracepoint"
99 " probe to trace_power_start\n");
100 return ret;
101 }
102 ret = register_trace_power_end(probe_power_end);
103 if (ret) {
104 pr_info("power trace: Couldn't activate tracepoint"
105 " probe to trace_power_end\n");
106 goto fail_start;
107 }
108 ret = register_trace_power_mark(probe_power_mark);
109 if (ret) {
110 pr_info("power trace: Couldn't activate tracepoint"
111 " probe to trace_power_mark\n");
112 goto fail_end;
113 }
114 return ret;
115fail_end:
116 unregister_trace_power_end(probe_power_end);
117fail_start:
118 unregister_trace_power_start(probe_power_start);
119 return ret;
120}
23 121
24static void start_power_trace(struct trace_array *tr) 122static void start_power_trace(struct trace_array *tr)
25{ 123{
26 trace_power_enabled = 1; 124 trace_power_enabled = 1;
125 tracing_power_register();
27} 126}
28 127
29static void stop_power_trace(struct trace_array *tr) 128static void stop_power_trace(struct trace_array *tr)
30{ 129{
31 trace_power_enabled = 0; 130 trace_power_enabled = 0;
131 unregister_trace_power_start(probe_power_start);
132 unregister_trace_power_end(probe_power_end);
133 unregister_trace_power_mark(probe_power_mark);
32} 134}
33 135
34 136
@@ -38,6 +140,7 @@ static int power_trace_init(struct trace_array *tr)
38 power_trace = tr; 140 power_trace = tr;
39 141
40 trace_power_enabled = 1; 142 trace_power_enabled = 1;
143 tracing_power_register();
41 144
42 for_each_cpu(cpu, cpu_possible_mask) 145 for_each_cpu(cpu, cpu_possible_mask)
43 tracing_reset(tr, cpu); 146 tracing_reset(tr, cpu);
@@ -94,86 +197,3 @@ static int init_power_trace(void)
94 return register_tracer(&power_tracer); 197 return register_tracer(&power_tracer);
95} 198}
96device_initcall(init_power_trace); 199device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index df175cb4564f..77132c2cf3d9 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -43,7 +43,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
43 data = ctx_trace->data[cpu]; 43 data = ctx_trace->data[cpu];
44 44
45 if (likely(!atomic_read(&data->disabled))) 45 if (likely(!atomic_read(&data->disabled)))
46 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc); 46 tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
47 47
48 local_irq_restore(flags); 48 local_irq_restore(flags);
49} 49}
@@ -66,7 +66,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
66 data = ctx_trace->data[cpu]; 66 data = ctx_trace->data[cpu];
67 67
68 if (likely(!atomic_read(&data->disabled))) 68 if (likely(!atomic_read(&data->disabled)))
69 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current, 69 tracing_sched_wakeup_trace(ctx_trace, wakee, current,
70 flags, pc); 70 flags, pc);
71 71
72 local_irq_restore(flags); 72 local_irq_restore(flags);
@@ -93,7 +93,7 @@ static int tracing_sched_register(void)
93 ret = register_trace_sched_switch(probe_sched_switch); 93 ret = register_trace_sched_switch(probe_sched_switch);
94 if (ret) { 94 if (ret) {
95 pr_info("sched trace: Couldn't activate tracepoint" 95 pr_info("sched trace: Couldn't activate tracepoint"
96 " probe to kernel_sched_schedule\n"); 96 " probe to kernel_sched_switch\n");
97 goto fail_deprobe_wake_new; 97 goto fail_deprobe_wake_new;
98 } 98 }
99 99
@@ -185,12 +185,6 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
185 ctx_trace = tr; 185 ctx_trace = tr;
186} 186}
187 187
188static void start_sched_trace(struct trace_array *tr)
189{
190 tracing_reset_online_cpus(tr);
191 tracing_start_sched_switch_record();
192}
193
194static void stop_sched_trace(struct trace_array *tr) 188static void stop_sched_trace(struct trace_array *tr)
195{ 189{
196 tracing_stop_sched_switch_record(); 190 tracing_stop_sched_switch_record();
@@ -199,7 +193,7 @@ static void stop_sched_trace(struct trace_array *tr)
199static int sched_switch_trace_init(struct trace_array *tr) 193static int sched_switch_trace_init(struct trace_array *tr)
200{ 194{
201 ctx_trace = tr; 195 ctx_trace = tr;
202 start_sched_trace(tr); 196 tracing_start_sched_switch_record();
203 return 0; 197 return 0;
204} 198}
205 199
@@ -227,6 +221,7 @@ static struct tracer sched_switch_trace __read_mostly =
227 .reset = sched_switch_trace_reset, 221 .reset = sched_switch_trace_reset,
228 .start = sched_switch_trace_start, 222 .start = sched_switch_trace_start,
229 .stop = sched_switch_trace_stop, 223 .stop = sched_switch_trace_stop,
224 .wait_pipe = poll_wait_pipe,
230#ifdef CONFIG_FTRACE_SELFTEST 225#ifdef CONFIG_FTRACE_SELFTEST
231 .selftest = trace_selftest_startup_sched_switch, 226 .selftest = trace_selftest_startup_sched_switch,
232#endif 227#endif
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 42ae1e77b6b3..db55f7aaa640 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -25,6 +25,7 @@ static int __read_mostly tracer_enabled;
25static struct task_struct *wakeup_task; 25static struct task_struct *wakeup_task;
26static int wakeup_cpu; 26static int wakeup_cpu;
27static unsigned wakeup_prio = -1; 27static unsigned wakeup_prio = -1;
28static int wakeup_rt;
28 29
29static raw_spinlock_t wakeup_lock = 30static raw_spinlock_t wakeup_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
@@ -71,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
71 if (task_cpu(wakeup_task) != cpu) 72 if (task_cpu(wakeup_task) != cpu)
72 goto unlock; 73 goto unlock;
73 74
74 trace_function(tr, data, ip, parent_ip, flags, pc); 75 trace_function(tr, ip, parent_ip, flags, pc);
75 76
76 unlock: 77 unlock:
77 __raw_spin_unlock(&wakeup_lock); 78 __raw_spin_unlock(&wakeup_lock);
@@ -151,7 +152,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
151 if (unlikely(!tracer_enabled || next != wakeup_task)) 152 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock; 153 goto out_unlock;
153 154
154 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 155 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
156 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
155 157
156 /* 158 /*
157 * usecs conversion is slow so we try to delay the conversion 159 * usecs conversion is slow so we try to delay the conversion
@@ -182,13 +184,10 @@ out:
182 184
183static void __wakeup_reset(struct trace_array *tr) 185static void __wakeup_reset(struct trace_array *tr)
184{ 186{
185 struct trace_array_cpu *data;
186 int cpu; 187 int cpu;
187 188
188 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu)
189 data = tr->data[cpu];
190 tracing_reset(tr, cpu); 190 tracing_reset(tr, cpu);
191 }
192 191
193 wakeup_cpu = -1; 192 wakeup_cpu = -1;
194 wakeup_prio = -1; 193 wakeup_prio = -1;
@@ -213,6 +212,7 @@ static void wakeup_reset(struct trace_array *tr)
213static void 212static void
214probe_wakeup(struct rq *rq, struct task_struct *p, int success) 213probe_wakeup(struct rq *rq, struct task_struct *p, int success)
215{ 214{
215 struct trace_array_cpu *data;
216 int cpu = smp_processor_id(); 216 int cpu = smp_processor_id();
217 unsigned long flags; 217 unsigned long flags;
218 long disabled; 218 long disabled;
@@ -224,7 +224,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
224 tracing_record_cmdline(p); 224 tracing_record_cmdline(p);
225 tracing_record_cmdline(current); 225 tracing_record_cmdline(current);
226 226
227 if (likely(!rt_task(p)) || 227 if ((wakeup_rt && !rt_task(p)) ||
228 p->prio >= wakeup_prio || 228 p->prio >= wakeup_prio ||
229 p->prio >= current->prio) 229 p->prio >= current->prio)
230 return; 230 return;
@@ -252,9 +252,10 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
252 252
253 local_save_flags(flags); 253 local_save_flags(flags);
254 254
255 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 255 data = wakeup_trace->data[wakeup_cpu];
256 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu], 256 data->preempt_timestamp = ftrace_now(cpu);
257 CALLER_ADDR1, CALLER_ADDR2, flags, pc); 257 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
258 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
258 259
259out_locked: 260out_locked:
260 __raw_spin_unlock(&wakeup_lock); 261 __raw_spin_unlock(&wakeup_lock);
@@ -262,12 +263,6 @@ out:
262 atomic_dec(&wakeup_trace->data[cpu]->disabled); 263 atomic_dec(&wakeup_trace->data[cpu]->disabled);
263} 264}
264 265
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
271static void start_wakeup_tracer(struct trace_array *tr) 266static void start_wakeup_tracer(struct trace_array *tr)
272{ 267{
273 int ret; 268 int ret;
@@ -289,7 +284,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
289 ret = register_trace_sched_switch(probe_wakeup_sched_switch); 284 ret = register_trace_sched_switch(probe_wakeup_sched_switch);
290 if (ret) { 285 if (ret) {
291 pr_info("sched trace: Couldn't activate tracepoint" 286 pr_info("sched trace: Couldn't activate tracepoint"
292 " probe to kernel_sched_schedule\n"); 287 " probe to kernel_sched_switch\n");
293 goto fail_deprobe_wake_new; 288 goto fail_deprobe_wake_new;
294 } 289 }
295 290
@@ -306,13 +301,10 @@ static void start_wakeup_tracer(struct trace_array *tr)
306 301
307 register_ftrace_function(&trace_ops); 302 register_ftrace_function(&trace_ops);
308 303
309 if (tracing_is_enabled()) { 304 if (tracing_is_enabled())
310 tracer_enabled = 1; 305 tracer_enabled = 1;
311 save_tracer_enabled = 1; 306 else
312 } else {
313 tracer_enabled = 0; 307 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
316 308
317 return; 309 return;
318fail_deprobe_wake_new: 310fail_deprobe_wake_new:
@@ -324,14 +316,13 @@ fail_deprobe:
324static void stop_wakeup_tracer(struct trace_array *tr) 316static void stop_wakeup_tracer(struct trace_array *tr)
325{ 317{
326 tracer_enabled = 0; 318 tracer_enabled = 0;
327 save_tracer_enabled = 0;
328 unregister_ftrace_function(&trace_ops); 319 unregister_ftrace_function(&trace_ops);
329 unregister_trace_sched_switch(probe_wakeup_sched_switch); 320 unregister_trace_sched_switch(probe_wakeup_sched_switch);
330 unregister_trace_sched_wakeup_new(probe_wakeup); 321 unregister_trace_sched_wakeup_new(probe_wakeup);
331 unregister_trace_sched_wakeup(probe_wakeup); 322 unregister_trace_sched_wakeup(probe_wakeup);
332} 323}
333 324
334static int wakeup_tracer_init(struct trace_array *tr) 325static int __wakeup_tracer_init(struct trace_array *tr)
335{ 326{
336 tracing_max_latency = 0; 327 tracing_max_latency = 0;
337 wakeup_trace = tr; 328 wakeup_trace = tr;
@@ -339,6 +330,18 @@ static int wakeup_tracer_init(struct trace_array *tr)
339 return 0; 330 return 0;
340} 331}
341 332
333static int wakeup_tracer_init(struct trace_array *tr)
334{
335 wakeup_rt = 0;
336 return __wakeup_tracer_init(tr);
337}
338
339static int wakeup_rt_tracer_init(struct trace_array *tr)
340{
341 wakeup_rt = 1;
342 return __wakeup_tracer_init(tr);
343}
344
342static void wakeup_tracer_reset(struct trace_array *tr) 345static void wakeup_tracer_reset(struct trace_array *tr)
343{ 346{
344 stop_wakeup_tracer(tr); 347 stop_wakeup_tracer(tr);
@@ -350,28 +353,11 @@ static void wakeup_tracer_start(struct trace_array *tr)
350{ 353{
351 wakeup_reset(tr); 354 wakeup_reset(tr);
352 tracer_enabled = 1; 355 tracer_enabled = 1;
353 save_tracer_enabled = 1;
354} 356}
355 357
356static void wakeup_tracer_stop(struct trace_array *tr) 358static void wakeup_tracer_stop(struct trace_array *tr)
357{ 359{
358 tracer_enabled = 0; 360 tracer_enabled = 0;
359 save_tracer_enabled = 0;
360}
361
362static void wakeup_tracer_open(struct trace_iterator *iter)
363{
364 /* stop the trace while dumping */
365 tracer_enabled = 0;
366}
367
368static void wakeup_tracer_close(struct trace_iterator *iter)
369{
370 /* forget about any processes we were recording */
371 if (save_tracer_enabled) {
372 wakeup_reset(iter->tr);
373 tracer_enabled = 1;
374 }
375} 361}
376 362
377static struct tracer wakeup_tracer __read_mostly = 363static struct tracer wakeup_tracer __read_mostly =
@@ -381,8 +367,20 @@ static struct tracer wakeup_tracer __read_mostly =
381 .reset = wakeup_tracer_reset, 367 .reset = wakeup_tracer_reset,
382 .start = wakeup_tracer_start, 368 .start = wakeup_tracer_start,
383 .stop = wakeup_tracer_stop, 369 .stop = wakeup_tracer_stop,
384 .open = wakeup_tracer_open, 370 .print_max = 1,
385 .close = wakeup_tracer_close, 371#ifdef CONFIG_FTRACE_SELFTEST
372 .selftest = trace_selftest_startup_wakeup,
373#endif
374};
375
376static struct tracer wakeup_rt_tracer __read_mostly =
377{
378 .name = "wakeup_rt",
379 .init = wakeup_rt_tracer_init,
380 .reset = wakeup_tracer_reset,
381 .start = wakeup_tracer_start,
382 .stop = wakeup_tracer_stop,
383 .wait_pipe = poll_wait_pipe,
386 .print_max = 1, 384 .print_max = 1,
387#ifdef CONFIG_FTRACE_SELFTEST 385#ifdef CONFIG_FTRACE_SELFTEST
388 .selftest = trace_selftest_startup_wakeup, 386 .selftest = trace_selftest_startup_wakeup,
@@ -397,6 +395,10 @@ __init static int init_wakeup_tracer(void)
397 if (ret) 395 if (ret)
398 return ret; 396 return ret;
399 397
398 ret = register_tracer(&wakeup_rt_tracer);
399 if (ret)
400 return ret;
401
400 return 0; 402 return 0;
401} 403}
402device_initcall(init_wakeup_tracer); 404device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index bc8e80a86bca..7238646b8723 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,11 +9,12 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 9 case TRACE_FN:
10 case TRACE_CTX: 10 case TRACE_CTX:
11 case TRACE_WAKE: 11 case TRACE_WAKE:
12 case TRACE_CONT:
13 case TRACE_STACK: 12 case TRACE_STACK:
14 case TRACE_PRINT: 13 case TRACE_PRINT:
15 case TRACE_SPECIAL: 14 case TRACE_SPECIAL:
16 case TRACE_BRANCH: 15 case TRACE_BRANCH:
16 case TRACE_GRAPH_ENT:
17 case TRACE_GRAPH_RET:
17 return 1; 18 return 1;
18 } 19 }
19 return 0; 20 return 0;
@@ -125,9 +126,9 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
125 func(); 126 func();
126 127
127 /* 128 /*
128 * Some archs *cough*PowerPC*cough* add charachters to the 129 * Some archs *cough*PowerPC*cough* add characters to the
129 * start of the function names. We simply put a '*' to 130 * start of the function names. We simply put a '*' to
130 * accomodate them. 131 * accommodate them.
131 */ 132 */
132 func_name = "*" STR(DYN_FTRACE_TEST_NAME); 133 func_name = "*" STR(DYN_FTRACE_TEST_NAME);
133 134
@@ -135,7 +136,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
135 ftrace_set_filter(func_name, strlen(func_name), 1); 136 ftrace_set_filter(func_name, strlen(func_name), 1);
136 137
137 /* enable tracing */ 138 /* enable tracing */
138 ret = trace->init(tr); 139 ret = tracer_init(trace, tr);
139 if (ret) { 140 if (ret) {
140 warn_failed_init_tracer(trace, ret); 141 warn_failed_init_tracer(trace, ret);
141 goto out; 142 goto out;
@@ -209,7 +210,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
209 ftrace_enabled = 1; 210 ftrace_enabled = 1;
210 tracer_enabled = 1; 211 tracer_enabled = 1;
211 212
212 ret = trace->init(tr); 213 ret = tracer_init(trace, tr);
213 if (ret) { 214 if (ret) {
214 warn_failed_init_tracer(trace, ret); 215 warn_failed_init_tracer(trace, ret);
215 goto out; 216 goto out;
@@ -247,6 +248,54 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
247} 248}
248#endif /* CONFIG_FUNCTION_TRACER */ 249#endif /* CONFIG_FUNCTION_TRACER */
249 250
251
252#ifdef CONFIG_FUNCTION_GRAPH_TRACER
253/*
254 * Pretty much the same than for the function tracer from which the selftest
255 * has been borrowed.
256 */
257int
258trace_selftest_startup_function_graph(struct tracer *trace,
259 struct trace_array *tr)
260{
261 int ret;
262 unsigned long count;
263
264 ret = tracer_init(trace, tr);
265 if (ret) {
266 warn_failed_init_tracer(trace, ret);
267 goto out;
268 }
269
270 /* Sleep for a 1/10 of a second */
271 msleep(100);
272
273 tracing_stop();
274
275 /* check the trace buffer */
276 ret = trace_test_buffer(tr, &count);
277
278 trace->reset(tr);
279 tracing_start();
280
281 if (!ret && !count) {
282 printk(KERN_CONT ".. no entries found ..");
283 ret = -1;
284 goto out;
285 }
286
287 /* Don't test dynamic tracing, the function tracer already did */
288
289out:
290 /* Stop it if we failed */
291 if (ret)
292 ftrace_graph_stop();
293
294 return ret;
295}
296#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
297
298
250#ifdef CONFIG_IRQSOFF_TRACER 299#ifdef CONFIG_IRQSOFF_TRACER
251int 300int
252trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) 301trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
@@ -256,7 +305,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
256 int ret; 305 int ret;
257 306
258 /* start the tracing */ 307 /* start the tracing */
259 ret = trace->init(tr); 308 ret = tracer_init(trace, tr);
260 if (ret) { 309 if (ret) {
261 warn_failed_init_tracer(trace, ret); 310 warn_failed_init_tracer(trace, ret);
262 return ret; 311 return ret;
@@ -310,7 +359,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
310 } 359 }
311 360
312 /* start the tracing */ 361 /* start the tracing */
313 ret = trace->init(tr); 362 ret = tracer_init(trace, tr);
314 if (ret) { 363 if (ret) {
315 warn_failed_init_tracer(trace, ret); 364 warn_failed_init_tracer(trace, ret);
316 return ret; 365 return ret;
@@ -364,7 +413,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
364 } 413 }
365 414
366 /* start the tracing */ 415 /* start the tracing */
367 ret = trace->init(tr); 416 ret = tracer_init(trace, tr);
368 if (ret) { 417 if (ret) {
369 warn_failed_init_tracer(trace, ret); 418 warn_failed_init_tracer(trace, ret);
370 goto out; 419 goto out;
@@ -496,7 +545,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
496 wait_for_completion(&isrt); 545 wait_for_completion(&isrt);
497 546
498 /* start the tracing */ 547 /* start the tracing */
499 ret = trace->init(tr); 548 ret = tracer_init(trace, tr);
500 if (ret) { 549 if (ret) {
501 warn_failed_init_tracer(trace, ret); 550 warn_failed_init_tracer(trace, ret);
502 return ret; 551 return ret;
@@ -557,7 +606,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
557 int ret; 606 int ret;
558 607
559 /* start the tracing */ 608 /* start the tracing */
560 ret = trace->init(tr); 609 ret = tracer_init(trace, tr);
561 if (ret) { 610 if (ret) {
562 warn_failed_init_tracer(trace, ret); 611 warn_failed_init_tracer(trace, ret);
563 return ret; 612 return ret;
@@ -589,10 +638,10 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
589 int ret; 638 int ret;
590 639
591 /* start the tracing */ 640 /* start the tracing */
592 ret = trace->init(tr); 641 ret = tracer_init(trace, tr);
593 if (ret) { 642 if (ret) {
594 warn_failed_init_tracer(trace, ret); 643 warn_failed_init_tracer(trace, ret);
595 return 0; 644 return ret;
596 } 645 }
597 646
598 /* Sleep for a 1/10 of a second */ 647 /* Sleep for a 1/10 of a second */
@@ -604,6 +653,11 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
604 trace->reset(tr); 653 trace->reset(tr);
605 tracing_start(); 654 tracing_start();
606 655
656 if (!ret && !count) {
657 printk(KERN_CONT ".. no entries found ..");
658 ret = -1;
659 }
660
607 return ret; 661 return ret;
608} 662}
609#endif /* CONFIG_SYSPROF_TRACER */ 663#endif /* CONFIG_SYSPROF_TRACER */
@@ -616,7 +670,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
616 int ret; 670 int ret;
617 671
618 /* start the tracing */ 672 /* start the tracing */
619 ret = trace->init(tr); 673 ret = tracer_init(trace, tr);
620 if (ret) { 674 if (ret) {
621 warn_failed_init_tracer(trace, ret); 675 warn_failed_init_tracer(trace, ret);
622 return ret; 676 return ret;
@@ -631,6 +685,11 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
631 trace->reset(tr); 685 trace->reset(tr);
632 tracing_start(); 686 tracing_start();
633 687
688 if (!ret && !count) {
689 printk(KERN_CONT ".. no entries found ..");
690 ret = -1;
691 }
692
634 return ret; 693 return ret;
635} 694}
636#endif /* CONFIG_BRANCH_TRACER */ 695#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
new file mode 100644
index 000000000000..39310e3434ee
--- /dev/null
+++ b/kernel/trace/trace_stat.c
@@ -0,0 +1,319 @@
1/*
2 * Infrastructure for statistic tracing (histogram output).
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 * Based on the code from trace_branch.c which is
7 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
8 *
9 */
10
11
12#include <linux/list.h>
13#include <linux/debugfs.h>
14#include "trace_stat.h"
15#include "trace.h"
16
17
18/* List of stat entries from a tracer */
19struct trace_stat_list {
20 struct list_head list;
21 void *stat;
22};
23
24/* A stat session is the stats output in one file */
25struct tracer_stat_session {
26 struct list_head session_list;
27 struct tracer_stat *ts;
28 struct list_head stat_list;
29 struct mutex stat_mutex;
30 struct dentry *file;
31};
32
33/* All of the sessions currently in use. Each stat file embed one session */
34static LIST_HEAD(all_stat_sessions);
35static DEFINE_MUTEX(all_stat_sessions_mutex);
36
37/* The root directory for all stat files */
38static struct dentry *stat_dir;
39
40
41static void reset_stat_session(struct tracer_stat_session *session)
42{
43 struct trace_stat_list *node, *next;
44
45 list_for_each_entry_safe(node, next, &session->stat_list, list)
46 kfree(node);
47
48 INIT_LIST_HEAD(&session->stat_list);
49}
50
51static void destroy_session(struct tracer_stat_session *session)
52{
53 debugfs_remove(session->file);
54 reset_stat_session(session);
55 mutex_destroy(&session->stat_mutex);
56 kfree(session);
57}
58
59/*
60 * For tracers that don't provide a stat_cmp callback.
61 * This one will force an immediate insertion on tail of
62 * the list.
63 */
64static int dummy_cmp(void *p1, void *p2)
65{
66 return 1;
67}
68
69/*
70 * Initialize the stat list at each trace_stat file opening.
71 * All of these copies and sorting are required on all opening
72 * since the stats could have changed between two file sessions.
73 */
74static int stat_seq_init(struct tracer_stat_session *session)
75{
76 struct trace_stat_list *iter_entry, *new_entry;
77 struct tracer_stat *ts = session->ts;
78 void *prev_stat;
79 int ret = 0;
80 int i;
81
82 mutex_lock(&session->stat_mutex);
83 reset_stat_session(session);
84
85 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp;
87
88 /*
89 * The first entry. Actually this is the second, but the first
90 * one (the stat_list head) is pointless.
91 */
92 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
93 if (!new_entry) {
94 ret = -ENOMEM;
95 goto exit;
96 }
97
98 INIT_LIST_HEAD(&new_entry->list);
99
100 list_add(&new_entry->list, &session->stat_list);
101
102 new_entry->stat = ts->stat_start();
103 prev_stat = new_entry->stat;
104
105 /*
106 * Iterate over the tracer stat entries and store them in a sorted
107 * list.
108 */
109 for (i = 1; ; i++) {
110 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
111 if (!new_entry) {
112 ret = -ENOMEM;
113 goto exit_free_list;
114 }
115
116 INIT_LIST_HEAD(&new_entry->list);
117 new_entry->stat = ts->stat_next(prev_stat, i);
118
119 /* End of insertion */
120 if (!new_entry->stat)
121 break;
122
123 list_for_each_entry(iter_entry, &session->stat_list, list) {
124
125 /* Insertion with a descendent sorting */
126 if (ts->stat_cmp(new_entry->stat,
127 iter_entry->stat) > 0) {
128
129 list_add_tail(&new_entry->list,
130 &iter_entry->list);
131 break;
132
133 /* The current smaller value */
134 } else if (list_is_last(&iter_entry->list,
135 &session->stat_list)) {
136 list_add(&new_entry->list, &iter_entry->list);
137 break;
138 }
139 }
140
141 prev_stat = new_entry->stat;
142 }
143exit:
144 mutex_unlock(&session->stat_mutex);
145 return ret;
146
147exit_free_list:
148 reset_stat_session(session);
149 mutex_unlock(&session->stat_mutex);
150 return ret;
151}
152
153
154static void *stat_seq_start(struct seq_file *s, loff_t *pos)
155{
156 struct tracer_stat_session *session = s->private;
157
158 /* Prevent from tracer switch or stat_list modification */
159 mutex_lock(&session->stat_mutex);
160
161 /* If we are in the beginning of the file, print the headers */
162 if (!*pos && session->ts->stat_headers)
163 session->ts->stat_headers(s);
164
165 return seq_list_start(&session->stat_list, *pos);
166}
167
168static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
169{
170 struct tracer_stat_session *session = s->private;
171
172 return seq_list_next(p, &session->stat_list, pos);
173}
174
175static void stat_seq_stop(struct seq_file *s, void *p)
176{
177 struct tracer_stat_session *session = s->private;
178 mutex_unlock(&session->stat_mutex);
179}
180
181static int stat_seq_show(struct seq_file *s, void *v)
182{
183 struct tracer_stat_session *session = s->private;
184 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
185
186 return session->ts->stat_show(s, l->stat);
187}
188
189static const struct seq_operations trace_stat_seq_ops = {
190 .start = stat_seq_start,
191 .next = stat_seq_next,
192 .stop = stat_seq_stop,
193 .show = stat_seq_show
194};
195
196/* The session stat is refilled and resorted at each stat file opening */
197static int tracing_stat_open(struct inode *inode, struct file *file)
198{
199 int ret;
200
201 struct tracer_stat_session *session = inode->i_private;
202
203 ret = seq_open(file, &trace_stat_seq_ops);
204 if (!ret) {
205 struct seq_file *m = file->private_data;
206 m->private = session;
207 ret = stat_seq_init(session);
208 }
209
210 return ret;
211}
212
213/*
214 * Avoid consuming memory with our now useless list.
215 */
216static int tracing_stat_release(struct inode *i, struct file *f)
217{
218 struct tracer_stat_session *session = i->i_private;
219
220 mutex_lock(&session->stat_mutex);
221 reset_stat_session(session);
222 mutex_unlock(&session->stat_mutex);
223
224 return 0;
225}
226
227static const struct file_operations tracing_stat_fops = {
228 .open = tracing_stat_open,
229 .read = seq_read,
230 .llseek = seq_lseek,
231 .release = tracing_stat_release
232};
233
234static int tracing_stat_init(void)
235{
236 struct dentry *d_tracing;
237
238 d_tracing = tracing_init_dentry();
239
240 stat_dir = debugfs_create_dir("trace_stat", d_tracing);
241 if (!stat_dir)
242 pr_warning("Could not create debugfs "
243 "'trace_stat' entry\n");
244 return 0;
245}
246
247static int init_stat_file(struct tracer_stat_session *session)
248{
249 if (!stat_dir && tracing_stat_init())
250 return -ENODEV;
251
252 session->file = debugfs_create_file(session->ts->name, 0644,
253 stat_dir,
254 session, &tracing_stat_fops);
255 if (!session->file)
256 return -ENOMEM;
257 return 0;
258}
259
260int register_stat_tracer(struct tracer_stat *trace)
261{
262 struct tracer_stat_session *session, *node, *tmp;
263 int ret;
264
265 if (!trace)
266 return -EINVAL;
267
268 if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
269 return -EINVAL;
270
271 /* Already registered? */
272 mutex_lock(&all_stat_sessions_mutex);
273 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
274 if (node->ts == trace) {
275 mutex_unlock(&all_stat_sessions_mutex);
276 return -EINVAL;
277 }
278 }
279 mutex_unlock(&all_stat_sessions_mutex);
280
281 /* Init the session */
282 session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
283 if (!session)
284 return -ENOMEM;
285
286 session->ts = trace;
287 INIT_LIST_HEAD(&session->session_list);
288 INIT_LIST_HEAD(&session->stat_list);
289 mutex_init(&session->stat_mutex);
290 session->file = NULL;
291
292 ret = init_stat_file(session);
293 if (ret) {
294 destroy_session(session);
295 return ret;
296 }
297
298 /* Register */
299 mutex_lock(&all_stat_sessions_mutex);
300 list_add_tail(&session->session_list, &all_stat_sessions);
301 mutex_unlock(&all_stat_sessions_mutex);
302
303 return 0;
304}
305
306void unregister_stat_tracer(struct tracer_stat *trace)
307{
308 struct tracer_stat_session *node, *tmp;
309
310 mutex_lock(&all_stat_sessions_mutex);
311 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
312 if (node->ts == trace) {
313 list_del(&node->session_list);
314 destroy_session(node);
315 break;
316 }
317 }
318 mutex_unlock(&all_stat_sessions_mutex);
319}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
new file mode 100644
index 000000000000..202274cf7f3d
--- /dev/null
+++ b/kernel/trace/trace_stat.h
@@ -0,0 +1,31 @@
1#ifndef __TRACE_STAT_H
2#define __TRACE_STAT_H
3
4#include <linux/seq_file.h>
5
6/*
7 * If you want to provide a stat file (one-shot statistics), fill
8 * an iterator with stat_start/stat_next and a stat_show callbacks.
9 * The others callbacks are optional.
10 */
11struct tracer_stat {
12 /* The name of your stat file */
13 const char *name;
14 /* Iteration over statistic entries */
15 void *(*stat_start)(void);
16 void *(*stat_next)(void *prev, int idx);
17 /* Compare two entries for stats sorting */
18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s);
23};
24
25/*
26 * Destroy or create a stat file
27 */
28extern int register_stat_tracer(struct tracer_stat *trace);
29extern void unregister_stat_tracer(struct tracer_stat *trace);
30
31#endif /* __TRACE_STAT_H */
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index eaca5ad803ff..c771af4e8f1a 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -88,7 +88,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
88 } 88 }
89} 89}
90 90
91const static struct stacktrace_ops backtrace_ops = { 91static const struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning, 92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
@@ -226,15 +226,6 @@ static void stop_stack_timers(void)
226 stop_stack_timer(cpu); 226 stop_stack_timer(cpu);
227} 227}
228 228
229static void start_stack_trace(struct trace_array *tr)
230{
231 mutex_lock(&sample_timer_lock);
232 tracing_reset_online_cpus(tr);
233 start_stack_timers();
234 tracer_enabled = 1;
235 mutex_unlock(&sample_timer_lock);
236}
237
238static void stop_stack_trace(struct trace_array *tr) 229static void stop_stack_trace(struct trace_array *tr)
239{ 230{
240 mutex_lock(&sample_timer_lock); 231 mutex_lock(&sample_timer_lock);
@@ -247,12 +238,18 @@ static int stack_trace_init(struct trace_array *tr)
247{ 238{
248 sysprof_trace = tr; 239 sysprof_trace = tr;
249 240
250 start_stack_trace(tr); 241 tracing_start_cmdline_record();
242
243 mutex_lock(&sample_timer_lock);
244 start_stack_timers();
245 tracer_enabled = 1;
246 mutex_unlock(&sample_timer_lock);
251 return 0; 247 return 0;
252} 248}
253 249
254static void stack_trace_reset(struct trace_array *tr) 250static void stack_trace_reset(struct trace_array *tr)
255{ 251{
252 tracing_stop_cmdline_record();
256 stop_stack_trace(tr); 253 stop_stack_trace(tr);
257} 254}
258 255
@@ -330,5 +327,5 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
330 d_tracer, NULL, &sysprof_sample_fops); 327 d_tracer, NULL, &sysprof_sample_fops);
331 if (entry) 328 if (entry)
332 return; 329 return;
333 pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); 330 pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
334} 331}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644
index 000000000000..4664990fe9c5
--- /dev/null
+++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,281 @@
1/*
2 * Workqueue statistical tracer.
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8
9#include <trace/workqueue.h>
10#include <linux/list.h>
11#include <linux/percpu.h>
12#include "trace_stat.h"
13#include "trace.h"
14
15
16/* A cpu workqueue thread */
17struct cpu_workqueue_stats {
18 struct list_head list;
19/* Useful to know if we print the cpu headers */
20 bool first_entry;
21 int cpu;
22 pid_t pid;
23/* Can be inserted from interrupt or user context, need to be atomic */
24 atomic_t inserted;
25/*
26 * Don't need to be atomic, works are serialized in a single workqueue thread
27 * on a single CPU.
28 */
29 unsigned int executed;
30};
31
32/* List of workqueue threads on one cpu */
33struct workqueue_global_stats {
34 struct list_head list;
35 spinlock_t lock;
36};
37
38/* Don't need a global lock because allocated before the workqueues, and
39 * never freed.
40 */
41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
43
44/* Insertion of a work */
45static void
46probe_workqueue_insertion(struct task_struct *wq_thread,
47 struct work_struct *work)
48{
49 int cpu = cpumask_first(&wq_thread->cpus_allowed);
50 struct cpu_workqueue_stats *node, *next;
51 unsigned long flags;
52
53 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
54 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
55 list) {
56 if (node->pid == wq_thread->pid) {
57 atomic_inc(&node->inserted);
58 goto found;
59 }
60 }
61 pr_debug("trace_workqueue: entry not found\n");
62found:
63 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
64}
65
66/* Execution of a work */
67static void
68probe_workqueue_execution(struct task_struct *wq_thread,
69 struct work_struct *work)
70{
71 int cpu = cpumask_first(&wq_thread->cpus_allowed);
72 struct cpu_workqueue_stats *node, *next;
73 unsigned long flags;
74
75 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
76 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
77 list) {
78 if (node->pid == wq_thread->pid) {
79 node->executed++;
80 goto found;
81 }
82 }
83 pr_debug("trace_workqueue: entry not found\n");
84found:
85 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
86}
87
88/* Creation of a cpu workqueue thread */
89static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
90{
91 struct cpu_workqueue_stats *cws;
92 unsigned long flags;
93
94 WARN_ON(cpu < 0 || cpu >= num_possible_cpus());
95
96 /* Workqueues are sometimes created in atomic context */
97 cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
98 if (!cws) {
99 pr_warning("trace_workqueue: not enough memory\n");
100 return;
101 }
102 tracing_record_cmdline(wq_thread);
103
104 INIT_LIST_HEAD(&cws->list);
105 cws->cpu = cpu;
106
107 cws->pid = wq_thread->pid;
108
109 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
110 if (list_empty(&workqueue_cpu_stat(cpu)->list))
111 cws->first_entry = true;
112 list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
113 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
114}
115
116/* Destruction of a cpu workqueue thread */
117static void probe_workqueue_destruction(struct task_struct *wq_thread)
118{
119 /* Workqueue only execute on one cpu */
120 int cpu = cpumask_first(&wq_thread->cpus_allowed);
121 struct cpu_workqueue_stats *node, *next;
122 unsigned long flags;
123
124 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
125 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
126 list) {
127 if (node->pid == wq_thread->pid) {
128 list_del(&node->list);
129 kfree(node);
130 goto found;
131 }
132 }
133
134 pr_debug("trace_workqueue: don't find workqueue to destroy\n");
135found:
136 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
137
138}
139
140static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
141{
142 unsigned long flags;
143 struct cpu_workqueue_stats *ret = NULL;
144
145
146 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
147
148 if (!list_empty(&workqueue_cpu_stat(cpu)->list))
149 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
150 struct cpu_workqueue_stats, list);
151
152 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
153
154 return ret;
155}
156
157static void *workqueue_stat_start(void)
158{
159 int cpu;
160 void *ret = NULL;
161
162 for_each_possible_cpu(cpu) {
163 ret = workqueue_stat_start_cpu(cpu);
164 if (ret)
165 return ret;
166 }
167 return NULL;
168}
169
170static void *workqueue_stat_next(void *prev, int idx)
171{
172 struct cpu_workqueue_stats *prev_cws = prev;
173 int cpu = prev_cws->cpu;
174 unsigned long flags;
175 void *ret = NULL;
176
177 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
178 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
180 for (++cpu ; cpu < num_possible_cpus(); cpu++) {
181 ret = workqueue_stat_start_cpu(cpu);
182 if (ret)
183 return ret;
184 }
185 return NULL;
186 }
187 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
188
189 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
190 list);
191}
192
193static int workqueue_stat_show(struct seq_file *s, void *p)
194{
195 struct cpu_workqueue_stats *cws = p;
196 unsigned long flags;
197 int cpu = cws->cpu;
198
199 seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
200 atomic_read(&cws->inserted),
201 cws->executed,
202 trace_find_cmdline(cws->pid));
203
204 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
205 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
206 seq_printf(s, "\n");
207 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
208
209 return 0;
210}
211
212static int workqueue_stat_headers(struct seq_file *s)
213{
214 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
215 seq_printf(s, "# | | | |\n\n");
216 return 0;
217}
218
219struct tracer_stat workqueue_stats __read_mostly = {
220 .name = "workqueues",
221 .stat_start = workqueue_stat_start,
222 .stat_next = workqueue_stat_next,
223 .stat_show = workqueue_stat_show,
224 .stat_headers = workqueue_stat_headers
225};
226
227
228int __init stat_workqueue_init(void)
229{
230 if (register_stat_tracer(&workqueue_stats)) {
231 pr_warning("Unable to register workqueue stat tracer\n");
232 return 1;
233 }
234
235 return 0;
236}
237fs_initcall(stat_workqueue_init);
238
239/*
240 * Workqueues are created very early, just after pre-smp initcalls.
241 * So we must register our tracepoints at this stage.
242 */
243int __init trace_workqueue_early_init(void)
244{
245 int ret, cpu;
246
247 ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
248 if (ret)
249 goto out;
250
251 ret = register_trace_workqueue_execution(probe_workqueue_execution);
252 if (ret)
253 goto no_insertion;
254
255 ret = register_trace_workqueue_creation(probe_workqueue_creation);
256 if (ret)
257 goto no_execution;
258
259 ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
260 if (ret)
261 goto no_creation;
262
263 for_each_possible_cpu(cpu) {
264 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
265 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
266 }
267
268 return 0;
269
270no_creation:
271 unregister_trace_workqueue_creation(probe_workqueue_creation);
272no_execution:
273 unregister_trace_workqueue_execution(probe_workqueue_execution);
274no_insertion:
275 unregister_trace_workqueue_insertion(probe_workqueue_insertion);
276out:
277 pr_warning("trace_workqueue: unable to trace workqueues\n");
278
279 return 1;
280}
281early_initcall(trace_workqueue_early_init);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1f0c509b40d3..e53ee18ef431 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,6 +33,7 @@
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/debug_locks.h> 34#include <linux/debug_locks.h>
35#include <linux/lockdep.h> 35#include <linux/lockdep.h>
36#include <trace/workqueue.h>
36 37
37/* 38/*
38 * The per-CPU workqueue (if single thread, we always use the first 39 * The per-CPU workqueue (if single thread, we always use the first
@@ -125,9 +126,13 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
125 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); 126 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
126} 127}
127 128
129DEFINE_TRACE(workqueue_insertion);
130
128static void insert_work(struct cpu_workqueue_struct *cwq, 131static void insert_work(struct cpu_workqueue_struct *cwq,
129 struct work_struct *work, struct list_head *head) 132 struct work_struct *work, struct list_head *head)
130{ 133{
134 trace_workqueue_insertion(cwq->thread, work);
135
131 set_wq_data(work, cwq); 136 set_wq_data(work, cwq);
132 /* 137 /*
133 * Ensure that we get the right work->data if we see the 138 * Ensure that we get the right work->data if we see the
@@ -259,6 +264,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
259} 264}
260EXPORT_SYMBOL_GPL(queue_delayed_work_on); 265EXPORT_SYMBOL_GPL(queue_delayed_work_on);
261 266
267DEFINE_TRACE(workqueue_execution);
268
262static void run_workqueue(struct cpu_workqueue_struct *cwq) 269static void run_workqueue(struct cpu_workqueue_struct *cwq)
263{ 270{
264 spin_lock_irq(&cwq->lock); 271 spin_lock_irq(&cwq->lock);
@@ -284,7 +291,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
284 */ 291 */
285 struct lockdep_map lockdep_map = work->lockdep_map; 292 struct lockdep_map lockdep_map = work->lockdep_map;
286#endif 293#endif
287 294 trace_workqueue_execution(cwq->thread, work);
288 cwq->current_work = work; 295 cwq->current_work = work;
289 list_del_init(cwq->worklist.next); 296 list_del_init(cwq->worklist.next);
290 spin_unlock_irq(&cwq->lock); 297 spin_unlock_irq(&cwq->lock);
@@ -765,6 +772,8 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
765 return cwq; 772 return cwq;
766} 773}
767 774
775DEFINE_TRACE(workqueue_creation);
776
768static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 777static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
769{ 778{
770 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 779 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -787,6 +796,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
787 sched_setscheduler_nocheck(p, SCHED_FIFO, &param); 796 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
788 cwq->thread = p; 797 cwq->thread = p;
789 798
799 trace_workqueue_creation(cwq->thread, cpu);
800
790 return 0; 801 return 0;
791} 802}
792 803
@@ -868,6 +879,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
868} 879}
869EXPORT_SYMBOL_GPL(__create_workqueue_key); 880EXPORT_SYMBOL_GPL(__create_workqueue_key);
870 881
882DEFINE_TRACE(workqueue_destruction);
883
871static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) 884static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
872{ 885{
873 /* 886 /*
@@ -891,6 +904,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
891 * checks list_empty(), and a "normal" queue_work() can't use 904 * checks list_empty(), and a "normal" queue_work() can't use
892 * a dead CPU. 905 * a dead CPU.
893 */ 906 */
907 trace_workqueue_destruction(cwq->thread);
894 kthread_stop(cwq->thread); 908 kthread_stop(cwq->thread);
895 cwq->thread = NULL; 909 cwq->thread = NULL;
896} 910}