aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig119
-rw-r--r--kernel/trace/Makefile11
-rw-r--r--kernel/trace/blktrace.c1550
-rw-r--r--kernel/trace/events.c15
-rw-r--r--kernel/trace/ftrace.c1071
-rw-r--r--kernel/trace/kmemtrace.c339
-rw-r--r--kernel/trace/ring_buffer.c581
-rw-r--r--kernel/trace/trace.c2941
-rw-r--r--kernel/trace/trace.h243
-rw-r--r--kernel/trace/trace_boot.c36
-rw-r--r--kernel/trace/trace_branch.c278
-rw-r--r--kernel/trace/trace_clock.c108
-rw-r--r--kernel/trace/trace_event_types.h175
-rw-r--r--kernel/trace/trace_events.c604
-rw-r--r--kernel/trace/trace_events_stage_1.h39
-rw-r--r--kernel/trace/trace_events_stage_2.h131
-rw-r--r--kernel/trace/trace_events_stage_3.h217
-rw-r--r--kernel/trace/trace_export.c102
-rw-r--r--kernel/trace/trace_functions.c369
-rw-r--r--kernel/trace/trace_functions_graph.c504
-rw-r--r--kernel/trace/trace_hw_branches.c185
-rw-r--r--kernel/trace/trace_irqsoff.c54
-rw-r--r--kernel/trace/trace_mmiotrace.c43
-rw-r--r--kernel/trace/trace_nop.c5
-rw-r--r--kernel/trace/trace_output.c967
-rw-r--r--kernel/trace/trace_output.h63
-rw-r--r--kernel/trace/trace_power.c194
-rw-r--r--kernel/trace/trace_printk.c270
-rw-r--r--kernel/trace/trace_sched_switch.c24
-rw-r--r--kernel/trace/trace_sched_wakeup.c96
-rw-r--r--kernel/trace/trace_selftest.c133
-rw-r--r--kernel/trace/trace_stack.c19
-rw-r--r--kernel/trace/trace_stat.c319
-rw-r--r--kernel/trace/trace_stat.h31
-rw-r--r--kernel/trace/trace_syscalls.c250
-rw-r--r--kernel/trace/trace_sysprof.c23
-rw-r--r--kernel/trace/trace_workqueue.c288
37 files changed, 10013 insertions, 2384 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 34e707e5ab87..b0a46f889659 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,9 @@ config USER_STACKTRACE_SUPPORT
9config NOP_TRACER 9config NOP_TRACER
10 bool 10 bool
11 11
12config HAVE_FTRACE_NMI_ENTER
13 bool
14
12config HAVE_FUNCTION_TRACER 15config HAVE_FUNCTION_TRACER
13 bool 16 bool
14 17
@@ -31,12 +34,20 @@ config HAVE_FTRACE_MCOUNT_RECORD
31config HAVE_HW_BRANCH_TRACER 34config HAVE_HW_BRANCH_TRACER
32 bool 35 bool
33 36
37config HAVE_FTRACE_SYSCALLS
38 bool
39
34config TRACER_MAX_TRACE 40config TRACER_MAX_TRACE
35 bool 41 bool
36 42
37config RING_BUFFER 43config RING_BUFFER
38 bool 44 bool
39 45
46config FTRACE_NMI_ENTER
47 bool
48 depends on HAVE_FTRACE_NMI_ENTER
49 default y
50
40config TRACING 51config TRACING
41 bool 52 bool
42 select DEBUG_FS 53 select DEBUG_FS
@@ -44,13 +55,25 @@ config TRACING
44 select STACKTRACE if STACKTRACE_SUPPORT 55 select STACKTRACE if STACKTRACE_SUPPORT
45 select TRACEPOINTS 56 select TRACEPOINTS
46 select NOP_TRACER 57 select NOP_TRACER
58 select BINARY_PRINTF
59
60#
61# Minimum requirements an architecture has to meet for us to
62# be able to offer generic tracing facilities:
63#
64config TRACING_SUPPORT
65 bool
66 depends on TRACE_IRQFLAGS_SUPPORT
67 depends on STACKTRACE_SUPPORT
68 default y
69
70if TRACING_SUPPORT
47 71
48menu "Tracers" 72menu "Tracers"
49 73
50config FUNCTION_TRACER 74config FUNCTION_TRACER
51 bool "Kernel Function Tracer" 75 bool "Kernel Function Tracer"
52 depends on HAVE_FUNCTION_TRACER 76 depends on HAVE_FUNCTION_TRACER
53 depends on DEBUG_KERNEL
54 select FRAME_POINTER 77 select FRAME_POINTER
55 select KALLSYMS 78 select KALLSYMS
56 select TRACING 79 select TRACING
@@ -83,7 +106,6 @@ config IRQSOFF_TRACER
83 default n 106 default n
84 depends on TRACE_IRQFLAGS_SUPPORT 107 depends on TRACE_IRQFLAGS_SUPPORT
85 depends on GENERIC_TIME 108 depends on GENERIC_TIME
86 depends on DEBUG_KERNEL
87 select TRACE_IRQFLAGS 109 select TRACE_IRQFLAGS
88 select TRACING 110 select TRACING
89 select TRACER_MAX_TRACE 111 select TRACER_MAX_TRACE
@@ -106,7 +128,6 @@ config PREEMPT_TRACER
106 default n 128 default n
107 depends on GENERIC_TIME 129 depends on GENERIC_TIME
108 depends on PREEMPT 130 depends on PREEMPT
109 depends on DEBUG_KERNEL
110 select TRACING 131 select TRACING
111 select TRACER_MAX_TRACE 132 select TRACER_MAX_TRACE
112 help 133 help
@@ -127,13 +148,13 @@ config SYSPROF_TRACER
127 bool "Sysprof Tracer" 148 bool "Sysprof Tracer"
128 depends on X86 149 depends on X86
129 select TRACING 150 select TRACING
151 select CONTEXT_SWITCH_TRACER
130 help 152 help
131 This tracer provides the trace needed by the 'Sysprof' userspace 153 This tracer provides the trace needed by the 'Sysprof' userspace
132 tool. 154 tool.
133 155
134config SCHED_TRACER 156config SCHED_TRACER
135 bool "Scheduling Latency Tracer" 157 bool "Scheduling Latency Tracer"
136 depends on DEBUG_KERNEL
137 select TRACING 158 select TRACING
138 select CONTEXT_SWITCH_TRACER 159 select CONTEXT_SWITCH_TRACER
139 select TRACER_MAX_TRACE 160 select TRACER_MAX_TRACE
@@ -143,16 +164,30 @@ config SCHED_TRACER
143 164
144config CONTEXT_SWITCH_TRACER 165config CONTEXT_SWITCH_TRACER
145 bool "Trace process context switches" 166 bool "Trace process context switches"
146 depends on DEBUG_KERNEL
147 select TRACING 167 select TRACING
148 select MARKERS 168 select MARKERS
149 help 169 help
150 This tracer gets called from the context switch and records 170 This tracer gets called from the context switch and records
151 all switching of tasks. 171 all switching of tasks.
152 172
173config EVENT_TRACER
174 bool "Trace various events in the kernel"
175 select TRACING
176 help
177 This tracer hooks to various trace points in the kernel
178 allowing the user to pick and choose which trace point they
179 want to trace.
180
181config FTRACE_SYSCALLS
182 bool "Trace syscalls"
183 depends on HAVE_FTRACE_SYSCALLS
184 select TRACING
185 select KALLSYMS
186 help
187 Basic tracer to catch the syscall entry and exit events.
188
153config BOOT_TRACER 189config BOOT_TRACER
154 bool "Trace boot initcalls" 190 bool "Trace boot initcalls"
155 depends on DEBUG_KERNEL
156 select TRACING 191 select TRACING
157 select CONTEXT_SWITCH_TRACER 192 select CONTEXT_SWITCH_TRACER
158 help 193 help
@@ -165,13 +200,11 @@ config BOOT_TRACER
165 representation of the delays during initcalls - but the raw 200 representation of the delays during initcalls - but the raw
166 /debug/tracing/trace text output is readable too. 201 /debug/tracing/trace text output is readable too.
167 202
168 ( Note that tracing self tests can't be enabled if this tracer is 203 You must pass in ftrace=initcall to the kernel command line
169 selected, because the self-tests are an initcall as well and that 204 to enable this on bootup.
170 would invalidate the boot trace. )
171 205
172config TRACE_BRANCH_PROFILING 206config TRACE_BRANCH_PROFILING
173 bool "Trace likely/unlikely profiler" 207 bool "Trace likely/unlikely profiler"
174 depends on DEBUG_KERNEL
175 select TRACING 208 select TRACING
176 help 209 help
177 This tracer profiles all the the likely and unlikely macros 210 This tracer profiles all the the likely and unlikely macros
@@ -224,7 +257,6 @@ config BRANCH_TRACER
224 257
225config POWER_TRACER 258config POWER_TRACER
226 bool "Trace power consumption behavior" 259 bool "Trace power consumption behavior"
227 depends on DEBUG_KERNEL
228 depends on X86 260 depends on X86
229 select TRACING 261 select TRACING
230 help 262 help
@@ -236,7 +268,6 @@ config POWER_TRACER
236config STACK_TRACER 268config STACK_TRACER
237 bool "Trace max stack" 269 bool "Trace max stack"
238 depends on HAVE_FUNCTION_TRACER 270 depends on HAVE_FUNCTION_TRACER
239 depends on DEBUG_KERNEL
240 select FUNCTION_TRACER 271 select FUNCTION_TRACER
241 select STACKTRACE 272 select STACKTRACE
242 select KALLSYMS 273 select KALLSYMS
@@ -266,11 +297,66 @@ config HW_BRANCH_TRACER
266 This tracer records all branches on the system in a circular 297 This tracer records all branches on the system in a circular
267 buffer giving access to the last N branches for each cpu. 298 buffer giving access to the last N branches for each cpu.
268 299
300config KMEMTRACE
301 bool "Trace SLAB allocations"
302 select TRACING
303 help
304 kmemtrace provides tracing for slab allocator functions, such as
305 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
306 data is then fed to the userspace application in order to analyse
307 allocation hotspots, internal fragmentation and so on, making it
308 possible to see how well an allocator performs, as well as debug
309 and profile kernel code.
310
311 This requires an userspace application to use. See
312 Documentation/vm/kmemtrace.txt for more information.
313
314 Saying Y will make the kernel somewhat larger and slower. However,
315 if you disable kmemtrace at run-time or boot-time, the performance
316 impact is minimal (depending on the arch the kernel is built for).
317
318 If unsure, say N.
319
320config WORKQUEUE_TRACER
321 bool "Trace workqueues"
322 select TRACING
323 help
324 The workqueue tracer provides some statistical informations
325 about each cpu workqueue thread such as the number of the
326 works inserted and executed since their creation. It can help
327 to evaluate the amount of work each of them have to perform.
328 For example it can help a developer to decide whether he should
329 choose a per cpu workqueue instead of a singlethreaded one.
330
331config BLK_DEV_IO_TRACE
332 bool "Support for tracing block io actions"
333 depends on SYSFS
334 depends on BLOCK
335 select RELAY
336 select DEBUG_FS
337 select TRACEPOINTS
338 select TRACING
339 select STACKTRACE
340 help
341 Say Y here if you want to be able to trace the block layer actions
342 on a given queue. Tracing allows you to see any traffic happening
343 on a block device queue. For more information (and the userspace
344 support tools needed), fetch the blktrace tools from:
345
346 git://git.kernel.dk/blktrace.git
347
348 Tracing also is possible using the ftrace interface, e.g.:
349
350 echo 1 > /sys/block/sda/sda1/trace/enable
351 echo blk > /sys/kernel/debug/tracing/current_tracer
352 cat /sys/kernel/debug/tracing/trace_pipe
353
354 If unsure, say N.
355
269config DYNAMIC_FTRACE 356config DYNAMIC_FTRACE
270 bool "enable/disable ftrace tracepoints dynamically" 357 bool "enable/disable ftrace tracepoints dynamically"
271 depends on FUNCTION_TRACER 358 depends on FUNCTION_TRACER
272 depends on HAVE_DYNAMIC_FTRACE 359 depends on HAVE_DYNAMIC_FTRACE
273 depends on DEBUG_KERNEL
274 default y 360 default y
275 help 361 help
276 This option will modify all the calls to ftrace dynamically 362 This option will modify all the calls to ftrace dynamically
@@ -296,7 +382,7 @@ config FTRACE_SELFTEST
296 382
297config FTRACE_STARTUP_TEST 383config FTRACE_STARTUP_TEST
298 bool "Perform a startup test on ftrace" 384 bool "Perform a startup test on ftrace"
299 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER 385 depends on TRACING
300 select FTRACE_SELFTEST 386 select FTRACE_SELFTEST
301 help 387 help
302 This option performs a series of startup tests on ftrace. On bootup 388 This option performs a series of startup tests on ftrace. On bootup
@@ -306,7 +392,7 @@ config FTRACE_STARTUP_TEST
306 392
307config MMIOTRACE 393config MMIOTRACE
308 bool "Memory mapped IO tracing" 394 bool "Memory mapped IO tracing"
309 depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI 395 depends on HAVE_MMIOTRACE_SUPPORT && PCI
310 select TRACING 396 select TRACING
311 help 397 help
312 Mmiotrace traces Memory Mapped I/O access and is meant for 398 Mmiotrace traces Memory Mapped I/O access and is meant for
@@ -328,3 +414,6 @@ config MMIOTRACE_TEST
328 Say N, unless you absolutely know what you are doing. 414 Say N, unless you absolutely know what you are doing.
329 415
330endmenu 416endmenu
417
418endif # TRACING_SUPPORT
419
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a93653f..c3feea01c3e0 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
20 20
21obj-$(CONFIG_TRACING) += trace.o 21obj-$(CONFIG_TRACING) += trace.o
22obj-$(CONFIG_TRACING) += trace_clock.o
23obj-$(CONFIG_TRACING) += trace_output.o
24obj-$(CONFIG_TRACING) += trace_stat.o
25obj-$(CONFIG_TRACING) += trace_printk.o
22obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 26obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
23obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 27obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
24obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o 28obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
@@ -33,5 +37,12 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 37obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o 38obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o 39obj-$(CONFIG_POWER_TRACER) += trace_power.o
40obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
41obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
42obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
43obj-$(CONFIG_EVENT_TRACER) += trace_events.o
44obj-$(CONFIG_EVENT_TRACER) += events.o
45obj-$(CONFIG_EVENT_TRACER) += trace_export.o
46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
36 47
37libftrace-y := ftrace.o 48libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
new file mode 100644
index 000000000000..b171778e3863
--- /dev/null
+++ b/kernel/trace/blktrace.c
@@ -0,0 +1,1550 @@
1/*
2 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 *
17 */
18#include <linux/kernel.h>
19#include <linux/blkdev.h>
20#include <linux/blktrace_api.h>
21#include <linux/percpu.h>
22#include <linux/init.h>
23#include <linux/mutex.h>
24#include <linux/debugfs.h>
25#include <linux/time.h>
26#include <trace/block.h>
27#include <linux/uaccess.h>
28#include "trace_output.h"
29
30static unsigned int blktrace_seq __read_mostly = 1;
31
32static struct trace_array *blk_tr;
33static int __read_mostly blk_tracer_enabled;
34
35/* Select an alternative, minimalistic output than the original one */
36#define TRACE_BLK_OPT_CLASSIC 0x1
37
38static struct tracer_opt blk_tracer_opts[] = {
39 /* Default disable the minimalistic output */
40 { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
41 { }
42};
43
44static struct tracer_flags blk_tracer_flags = {
45 .val = 0,
46 .opts = blk_tracer_opts,
47};
48
49/* Global reference count of probes */
50static DEFINE_MUTEX(blk_probe_mutex);
51static atomic_t blk_probes_ref = ATOMIC_INIT(0);
52
53static int blk_register_tracepoints(void);
54static void blk_unregister_tracepoints(void);
55
56/*
57 * Send out a notify message.
58 */
59static void trace_note(struct blk_trace *bt, pid_t pid, int action,
60 const void *data, size_t len)
61{
62 struct blk_io_trace *t;
63
64 if (!bt->rchan)
65 return;
66
67 t = relay_reserve(bt->rchan, sizeof(*t) + len);
68 if (t) {
69 const int cpu = smp_processor_id();
70
71 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
72 t->time = ktime_to_ns(ktime_get());
73 t->device = bt->dev;
74 t->action = action;
75 t->pid = pid;
76 t->cpu = cpu;
77 t->pdu_len = len;
78 memcpy((void *) t + sizeof(*t), data, len);
79 }
80}
81
82/*
83 * Send out a notify for this process, if we haven't done so since a trace
84 * started
85 */
86static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
87{
88 tsk->btrace_seq = blktrace_seq;
89 trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
90}
91
92static void trace_note_time(struct blk_trace *bt)
93{
94 struct timespec now;
95 unsigned long flags;
96 u32 words[2];
97
98 getnstimeofday(&now);
99 words[0] = now.tv_sec;
100 words[1] = now.tv_nsec;
101
102 local_irq_save(flags);
103 trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
104 local_irq_restore(flags);
105}
106
107void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
108{
109 int n;
110 va_list args;
111 unsigned long flags;
112 char *buf;
113
114 if (blk_tr) {
115 va_start(args, fmt);
116 ftrace_vprintk(fmt, args);
117 va_end(args);
118 return;
119 }
120
121 if (!bt->msg_data)
122 return;
123
124 local_irq_save(flags);
125 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
126 va_start(args, fmt);
127 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
128 va_end(args);
129
130 trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
131 local_irq_restore(flags);
132}
133EXPORT_SYMBOL_GPL(__trace_note_message);
134
135static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
136 pid_t pid)
137{
138 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
139 return 1;
140 if (sector < bt->start_lba || sector > bt->end_lba)
141 return 1;
142 if (bt->pid && pid != bt->pid)
143 return 1;
144
145 return 0;
146}
147
148/*
149 * Data direction bit lookup
150 */
151static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ),
152 BLK_TC_ACT(BLK_TC_WRITE) };
153
154/* The ilog2() calls fall out because they're constant */
155#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
156 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
157
158/*
159 * The worker for the various blk_add_trace*() types. Fills out a
160 * blk_io_trace structure and places it in a per-cpu subbuffer.
161 */
162static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
163 int rw, u32 what, int error, int pdu_len, void *pdu_data)
164{
165 struct task_struct *tsk = current;
166 struct ring_buffer_event *event = NULL;
167 struct blk_io_trace *t;
168 unsigned long flags = 0;
169 unsigned long *sequence;
170 pid_t pid;
171 int cpu, pc = 0;
172
173 if (unlikely(bt->trace_state != Blktrace_running ||
174 !blk_tracer_enabled))
175 return;
176
177 what |= ddir_act[rw & WRITE];
178 what |= MASK_TC_BIT(rw, BARRIER);
179 what |= MASK_TC_BIT(rw, SYNCIO);
180 what |= MASK_TC_BIT(rw, AHEAD);
181 what |= MASK_TC_BIT(rw, META);
182 what |= MASK_TC_BIT(rw, DISCARD);
183
184 pid = tsk->pid;
185 if (unlikely(act_log_check(bt, what, sector, pid)))
186 return;
187 cpu = raw_smp_processor_id();
188
189 if (blk_tr) {
190 tracing_record_cmdline(current);
191
192 pc = preempt_count();
193 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
194 sizeof(*t) + pdu_len,
195 0, pc);
196 if (!event)
197 return;
198 t = ring_buffer_event_data(event);
199 goto record_it;
200 }
201
202 /*
203 * A word about the locking here - we disable interrupts to reserve
204 * some space in the relay per-cpu buffer, to prevent an irq
205 * from coming in and stepping on our toes.
206 */
207 local_irq_save(flags);
208
209 if (unlikely(tsk->btrace_seq != blktrace_seq))
210 trace_note_tsk(bt, tsk);
211
212 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
213 if (t) {
214 sequence = per_cpu_ptr(bt->sequence, cpu);
215
216 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
217 t->sequence = ++(*sequence);
218 t->time = ktime_to_ns(ktime_get());
219record_it:
220 /*
221 * These two are not needed in ftrace as they are in the
222 * generic trace_entry, filled by tracing_generic_entry_update,
223 * but for the trace_event->bin() synthesizer benefit we do it
224 * here too.
225 */
226 t->cpu = cpu;
227 t->pid = pid;
228
229 t->sector = sector;
230 t->bytes = bytes;
231 t->action = what;
232 t->device = bt->dev;
233 t->error = error;
234 t->pdu_len = pdu_len;
235
236 if (pdu_len)
237 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
238
239 if (blk_tr) {
240 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
241 return;
242 }
243 }
244
245 local_irq_restore(flags);
246}
247
248static struct dentry *blk_tree_root;
249static DEFINE_MUTEX(blk_tree_mutex);
250
251static void blk_trace_cleanup(struct blk_trace *bt)
252{
253 debugfs_remove(bt->msg_file);
254 debugfs_remove(bt->dropped_file);
255 relay_close(bt->rchan);
256 free_percpu(bt->sequence);
257 free_percpu(bt->msg_data);
258 kfree(bt);
259 mutex_lock(&blk_probe_mutex);
260 if (atomic_dec_and_test(&blk_probes_ref))
261 blk_unregister_tracepoints();
262 mutex_unlock(&blk_probe_mutex);
263}
264
265int blk_trace_remove(struct request_queue *q)
266{
267 struct blk_trace *bt;
268
269 bt = xchg(&q->blk_trace, NULL);
270 if (!bt)
271 return -EINVAL;
272
273 if (bt->trace_state == Blktrace_setup ||
274 bt->trace_state == Blktrace_stopped)
275 blk_trace_cleanup(bt);
276
277 return 0;
278}
279EXPORT_SYMBOL_GPL(blk_trace_remove);
280
281static int blk_dropped_open(struct inode *inode, struct file *filp)
282{
283 filp->private_data = inode->i_private;
284
285 return 0;
286}
287
288static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
289 size_t count, loff_t *ppos)
290{
291 struct blk_trace *bt = filp->private_data;
292 char buf[16];
293
294 snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
295
296 return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
297}
298
299static const struct file_operations blk_dropped_fops = {
300 .owner = THIS_MODULE,
301 .open = blk_dropped_open,
302 .read = blk_dropped_read,
303};
304
305static int blk_msg_open(struct inode *inode, struct file *filp)
306{
307 filp->private_data = inode->i_private;
308
309 return 0;
310}
311
312static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
313 size_t count, loff_t *ppos)
314{
315 char *msg;
316 struct blk_trace *bt;
317
318 if (count > BLK_TN_MAX_MSG)
319 return -EINVAL;
320
321 msg = kmalloc(count, GFP_KERNEL);
322 if (msg == NULL)
323 return -ENOMEM;
324
325 if (copy_from_user(msg, buffer, count)) {
326 kfree(msg);
327 return -EFAULT;
328 }
329
330 bt = filp->private_data;
331 __trace_note_message(bt, "%s", msg);
332 kfree(msg);
333
334 return count;
335}
336
337static const struct file_operations blk_msg_fops = {
338 .owner = THIS_MODULE,
339 .open = blk_msg_open,
340 .write = blk_msg_write,
341};
342
343/*
344 * Keep track of how many times we encountered a full subbuffer, to aid
345 * the user space app in telling how many lost events there were.
346 */
347static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
348 void *prev_subbuf, size_t prev_padding)
349{
350 struct blk_trace *bt;
351
352 if (!relay_buf_full(buf))
353 return 1;
354
355 bt = buf->chan->private_data;
356 atomic_inc(&bt->dropped);
357 return 0;
358}
359
360static int blk_remove_buf_file_callback(struct dentry *dentry)
361{
362 struct dentry *parent = dentry->d_parent;
363 debugfs_remove(dentry);
364
365 /*
366 * this will fail for all but the last file, but that is ok. what we
367 * care about is the top level buts->name directory going away, when
368 * the last trace file is gone. Then we don't have to rmdir() that
369 * manually on trace stop, so it nicely solves the issue with
370 * force killing of running traces.
371 */
372
373 debugfs_remove(parent);
374 return 0;
375}
376
377static struct dentry *blk_create_buf_file_callback(const char *filename,
378 struct dentry *parent,
379 int mode,
380 struct rchan_buf *buf,
381 int *is_global)
382{
383 return debugfs_create_file(filename, mode, parent, buf,
384 &relay_file_operations);
385}
386
387static struct rchan_callbacks blk_relay_callbacks = {
388 .subbuf_start = blk_subbuf_start_callback,
389 .create_buf_file = blk_create_buf_file_callback,
390 .remove_buf_file = blk_remove_buf_file_callback,
391};
392
393/*
394 * Setup everything required to start tracing
395 */
396int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
397 struct blk_user_trace_setup *buts)
398{
399 struct blk_trace *old_bt, *bt = NULL;
400 struct dentry *dir = NULL;
401 int ret, i;
402
403 if (!buts->buf_size || !buts->buf_nr)
404 return -EINVAL;
405
406 strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
407 buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
408
409 /*
410 * some device names have larger paths - convert the slashes
411 * to underscores for this to work as expected
412 */
413 for (i = 0; i < strlen(buts->name); i++)
414 if (buts->name[i] == '/')
415 buts->name[i] = '_';
416
417 ret = -ENOMEM;
418 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
419 if (!bt)
420 goto err;
421
422 bt->sequence = alloc_percpu(unsigned long);
423 if (!bt->sequence)
424 goto err;
425
426 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
427 if (!bt->msg_data)
428 goto err;
429
430 ret = -ENOENT;
431
432 if (!blk_tree_root) {
433 blk_tree_root = debugfs_create_dir("block", NULL);
434 if (!blk_tree_root)
435 return -ENOMEM;
436 }
437
438 dir = debugfs_create_dir(buts->name, blk_tree_root);
439
440 if (!dir)
441 goto err;
442
443 bt->dir = dir;
444 bt->dev = dev;
445 atomic_set(&bt->dropped, 0);
446
447 ret = -EIO;
448 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
449 &blk_dropped_fops);
450 if (!bt->dropped_file)
451 goto err;
452
453 bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
454 if (!bt->msg_file)
455 goto err;
456
457 bt->rchan = relay_open("trace", dir, buts->buf_size,
458 buts->buf_nr, &blk_relay_callbacks, bt);
459 if (!bt->rchan)
460 goto err;
461
462 bt->act_mask = buts->act_mask;
463 if (!bt->act_mask)
464 bt->act_mask = (u16) -1;
465
466 bt->start_lba = buts->start_lba;
467 bt->end_lba = buts->end_lba;
468 if (!bt->end_lba)
469 bt->end_lba = -1ULL;
470
471 bt->pid = buts->pid;
472 bt->trace_state = Blktrace_setup;
473
474 mutex_lock(&blk_probe_mutex);
475 if (atomic_add_return(1, &blk_probes_ref) == 1) {
476 ret = blk_register_tracepoints();
477 if (ret)
478 goto probe_err;
479 }
480 mutex_unlock(&blk_probe_mutex);
481
482 ret = -EBUSY;
483 old_bt = xchg(&q->blk_trace, bt);
484 if (old_bt) {
485 (void) xchg(&q->blk_trace, old_bt);
486 goto err;
487 }
488
489 return 0;
490probe_err:
491 atomic_dec(&blk_probes_ref);
492 mutex_unlock(&blk_probe_mutex);
493err:
494 if (bt) {
495 if (bt->msg_file)
496 debugfs_remove(bt->msg_file);
497 if (bt->dropped_file)
498 debugfs_remove(bt->dropped_file);
499 free_percpu(bt->sequence);
500 free_percpu(bt->msg_data);
501 if (bt->rchan)
502 relay_close(bt->rchan);
503 kfree(bt);
504 }
505 return ret;
506}
507
508int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
509 char __user *arg)
510{
511 struct blk_user_trace_setup buts;
512 int ret;
513
514 ret = copy_from_user(&buts, arg, sizeof(buts));
515 if (ret)
516 return -EFAULT;
517
518 ret = do_blk_trace_setup(q, name, dev, &buts);
519 if (ret)
520 return ret;
521
522 if (copy_to_user(arg, &buts, sizeof(buts)))
523 return -EFAULT;
524
525 return 0;
526}
527EXPORT_SYMBOL_GPL(blk_trace_setup);
528
529int blk_trace_startstop(struct request_queue *q, int start)
530{
531 int ret;
532 struct blk_trace *bt = q->blk_trace;
533
534 if (bt == NULL)
535 return -EINVAL;
536
537 /*
538 * For starting a trace, we can transition from a setup or stopped
539 * trace. For stopping a trace, the state must be running
540 */
541 ret = -EINVAL;
542 if (start) {
543 if (bt->trace_state == Blktrace_setup ||
544 bt->trace_state == Blktrace_stopped) {
545 blktrace_seq++;
546 smp_mb();
547 bt->trace_state = Blktrace_running;
548
549 trace_note_time(bt);
550 ret = 0;
551 }
552 } else {
553 if (bt->trace_state == Blktrace_running) {
554 bt->trace_state = Blktrace_stopped;
555 relay_flush(bt->rchan);
556 ret = 0;
557 }
558 }
559
560 return ret;
561}
562EXPORT_SYMBOL_GPL(blk_trace_startstop);
563
564/**
565 * blk_trace_ioctl: - handle the ioctls associated with tracing
566 * @bdev: the block device
567 * @cmd: the ioctl cmd
568 * @arg: the argument data, if any
569 *
570 **/
571int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
572{
573 struct request_queue *q;
574 int ret, start = 0;
575 char b[BDEVNAME_SIZE];
576
577 q = bdev_get_queue(bdev);
578 if (!q)
579 return -ENXIO;
580
581 mutex_lock(&bdev->bd_mutex);
582
583 switch (cmd) {
584 case BLKTRACESETUP:
585 bdevname(bdev, b);
586 ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
587 break;
588 case BLKTRACESTART:
589 start = 1;
590 case BLKTRACESTOP:
591 ret = blk_trace_startstop(q, start);
592 break;
593 case BLKTRACETEARDOWN:
594 ret = blk_trace_remove(q);
595 break;
596 default:
597 ret = -ENOTTY;
598 break;
599 }
600
601 mutex_unlock(&bdev->bd_mutex);
602 return ret;
603}
604
605/**
606 * blk_trace_shutdown: - stop and cleanup trace structures
607 * @q: the request queue associated with the device
608 *
609 **/
610void blk_trace_shutdown(struct request_queue *q)
611{
612 if (q->blk_trace) {
613 blk_trace_startstop(q, 0);
614 blk_trace_remove(q);
615 }
616}
617
618/*
619 * blktrace probes
620 */
621
622/**
623 * blk_add_trace_rq - Add a trace for a request oriented action
624 * @q: queue the io is for
625 * @rq: the source request
626 * @what: the action
627 *
628 * Description:
629 * Records an action against a request. Will log the bio offset + size.
630 *
631 **/
632static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
633 u32 what)
634{
635 struct blk_trace *bt = q->blk_trace;
636 int rw = rq->cmd_flags & 0x03;
637
638 if (likely(!bt))
639 return;
640
641 if (blk_discard_rq(rq))
642 rw |= (1 << BIO_RW_DISCARD);
643
644 if (blk_pc_request(rq)) {
645 what |= BLK_TC_ACT(BLK_TC_PC);
646 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
647 sizeof(rq->cmd), rq->cmd);
648 } else {
649 what |= BLK_TC_ACT(BLK_TC_FS);
650 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
651 rw, what, rq->errors, 0, NULL);
652 }
653}
654
655static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
656{
657 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
658}
659
660static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
661{
662 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
663}
664
665static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
666{
667 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
668}
669
670static void blk_add_trace_rq_requeue(struct request_queue *q,
671 struct request *rq)
672{
673 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
674}
675
676static void blk_add_trace_rq_complete(struct request_queue *q,
677 struct request *rq)
678{
679 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
680}
681
682/**
683 * blk_add_trace_bio - Add a trace for a bio oriented action
684 * @q: queue the io is for
685 * @bio: the source bio
686 * @what: the action
687 *
688 * Description:
689 * Records an action against a bio. Will log the bio offset + size.
690 *
691 **/
692static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
693 u32 what)
694{
695 struct blk_trace *bt = q->blk_trace;
696
697 if (likely(!bt))
698 return;
699
700 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
701 !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
702}
703
704static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
705{
706 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
707}
708
709static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
710{
711 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
712}
713
714static void blk_add_trace_bio_backmerge(struct request_queue *q,
715 struct bio *bio)
716{
717 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
718}
719
720static void blk_add_trace_bio_frontmerge(struct request_queue *q,
721 struct bio *bio)
722{
723 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
724}
725
726static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
727{
728 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
729}
730
731static void blk_add_trace_getrq(struct request_queue *q,
732 struct bio *bio, int rw)
733{
734 if (bio)
735 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
736 else {
737 struct blk_trace *bt = q->blk_trace;
738
739 if (bt)
740 __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
741 }
742}
743
744
745static void blk_add_trace_sleeprq(struct request_queue *q,
746 struct bio *bio, int rw)
747{
748 if (bio)
749 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
750 else {
751 struct blk_trace *bt = q->blk_trace;
752
753 if (bt)
754 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
755 0, 0, NULL);
756 }
757}
758
759static void blk_add_trace_plug(struct request_queue *q)
760{
761 struct blk_trace *bt = q->blk_trace;
762
763 if (bt)
764 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
765}
766
767static void blk_add_trace_unplug_io(struct request_queue *q)
768{
769 struct blk_trace *bt = q->blk_trace;
770
771 if (bt) {
772 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
773 __be64 rpdu = cpu_to_be64(pdu);
774
775 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
776 sizeof(rpdu), &rpdu);
777 }
778}
779
780static void blk_add_trace_unplug_timer(struct request_queue *q)
781{
782 struct blk_trace *bt = q->blk_trace;
783
784 if (bt) {
785 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
786 __be64 rpdu = cpu_to_be64(pdu);
787
788 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
789 sizeof(rpdu), &rpdu);
790 }
791}
792
793static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
794 unsigned int pdu)
795{
796 struct blk_trace *bt = q->blk_trace;
797
798 if (bt) {
799 __be64 rpdu = cpu_to_be64(pdu);
800
801 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
802 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
803 sizeof(rpdu), &rpdu);
804 }
805}
806
807/**
808 * blk_add_trace_remap - Add a trace for a remap operation
809 * @q: queue the io is for
810 * @bio: the source bio
811 * @dev: target device
812 * @from: source sector
813 * @to: target sector
814 *
815 * Description:
816 * Device mapper or raid target sometimes need to split a bio because
817 * it spans a stripe (or similar). Add a trace for that action.
818 *
819 **/
820static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
821 dev_t dev, sector_t from, sector_t to)
822{
823 struct blk_trace *bt = q->blk_trace;
824 struct blk_io_trace_remap r;
825
826 if (likely(!bt))
827 return;
828
829 r.device = cpu_to_be32(dev);
830 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
831 r.sector = cpu_to_be64(to);
832
833 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
834 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
835}
836
837/**
838 * blk_add_driver_data - Add binary message with driver-specific data
839 * @q: queue the io is for
840 * @rq: io request
841 * @data: driver-specific data
842 * @len: length of driver-specific data
843 *
844 * Description:
845 * Some drivers might want to write driver-specific data per request.
846 *
847 **/
848void blk_add_driver_data(struct request_queue *q,
849 struct request *rq,
850 void *data, size_t len)
851{
852 struct blk_trace *bt = q->blk_trace;
853
854 if (likely(!bt))
855 return;
856
857 if (blk_pc_request(rq))
858 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
859 rq->errors, len, data);
860 else
861 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
862 0, BLK_TA_DRV_DATA, rq->errors, len, data);
863}
864EXPORT_SYMBOL_GPL(blk_add_driver_data);
865
866static int blk_register_tracepoints(void)
867{
868 int ret;
869
870 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
871 WARN_ON(ret);
872 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
873 WARN_ON(ret);
874 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
875 WARN_ON(ret);
876 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
877 WARN_ON(ret);
878 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
879 WARN_ON(ret);
880 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
881 WARN_ON(ret);
882 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
883 WARN_ON(ret);
884 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
885 WARN_ON(ret);
886 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
887 WARN_ON(ret);
888 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
889 WARN_ON(ret);
890 ret = register_trace_block_getrq(blk_add_trace_getrq);
891 WARN_ON(ret);
892 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
893 WARN_ON(ret);
894 ret = register_trace_block_plug(blk_add_trace_plug);
895 WARN_ON(ret);
896 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
897 WARN_ON(ret);
898 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
899 WARN_ON(ret);
900 ret = register_trace_block_split(blk_add_trace_split);
901 WARN_ON(ret);
902 ret = register_trace_block_remap(blk_add_trace_remap);
903 WARN_ON(ret);
904 return 0;
905}
906
907static void blk_unregister_tracepoints(void)
908{
909 unregister_trace_block_remap(blk_add_trace_remap);
910 unregister_trace_block_split(blk_add_trace_split);
911 unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
912 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
913 unregister_trace_block_plug(blk_add_trace_plug);
914 unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
915 unregister_trace_block_getrq(blk_add_trace_getrq);
916 unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
917 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
918 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
919 unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
920 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
921 unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
922 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
923 unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
924 unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
925 unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
926
927 tracepoint_synchronize_unregister();
928}
929
930/*
931 * struct blk_io_tracer formatting routines
932 */
933
934static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
935{
936 int i = 0;
937
938 if (t->action & BLK_TC_DISCARD)
939 rwbs[i++] = 'D';
940 else if (t->action & BLK_TC_WRITE)
941 rwbs[i++] = 'W';
942 else if (t->bytes)
943 rwbs[i++] = 'R';
944 else
945 rwbs[i++] = 'N';
946
947 if (t->action & BLK_TC_AHEAD)
948 rwbs[i++] = 'A';
949 if (t->action & BLK_TC_BARRIER)
950 rwbs[i++] = 'B';
951 if (t->action & BLK_TC_SYNC)
952 rwbs[i++] = 'S';
953 if (t->action & BLK_TC_META)
954 rwbs[i++] = 'M';
955
956 rwbs[i] = '\0';
957}
958
959static inline
960const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
961{
962 return (const struct blk_io_trace *)ent;
963}
964
965static inline const void *pdu_start(const struct trace_entry *ent)
966{
967 return te_blk_io_trace(ent) + 1;
968}
969
970static inline u32 t_sec(const struct trace_entry *ent)
971{
972 return te_blk_io_trace(ent)->bytes >> 9;
973}
974
975static inline unsigned long long t_sector(const struct trace_entry *ent)
976{
977 return te_blk_io_trace(ent)->sector;
978}
979
980static inline __u16 t_error(const struct trace_entry *ent)
981{
982 return te_blk_io_trace(ent)->sector;
983}
984
985static __u64 get_pdu_int(const struct trace_entry *ent)
986{
987 const __u64 *val = pdu_start(ent);
988 return be64_to_cpu(*val);
989}
990
991static void get_pdu_remap(const struct trace_entry *ent,
992 struct blk_io_trace_remap *r)
993{
994 const struct blk_io_trace_remap *__r = pdu_start(ent);
995 __u64 sector = __r->sector;
996
997 r->device = be32_to_cpu(__r->device);
998 r->device_from = be32_to_cpu(__r->device_from);
999 r->sector = be64_to_cpu(sector);
1000}
1001
1002static int blk_log_action_iter(struct trace_iterator *iter, const char *act)
1003{
1004 char rwbs[6];
1005 unsigned long long ts = ns2usecs(iter->ts);
1006 unsigned long usec_rem = do_div(ts, USEC_PER_SEC);
1007 unsigned secs = (unsigned long)ts;
1008 const struct trace_entry *ent = iter->ent;
1009 const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
1010
1011 fill_rwbs(rwbs, t);
1012
1013 return trace_seq_printf(&iter->seq,
1014 "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ",
1015 MAJOR(t->device), MINOR(t->device), iter->cpu,
1016 secs, usec_rem, ent->pid, act, rwbs);
1017}
1018
1019static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t,
1020 const char *act)
1021{
1022 char rwbs[6];
1023 fill_rwbs(rwbs, t);
1024 return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
1025 MAJOR(t->device), MINOR(t->device), act, rwbs);
1026}
1027
1028static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1029{
1030 char cmd[TASK_COMM_LEN];
1031
1032 trace_find_cmdline(ent->pid, cmd);
1033
1034 if (t_sec(ent))
1035 return trace_seq_printf(s, "%llu + %u [%s]\n",
1036 t_sector(ent), t_sec(ent), cmd);
1037 return trace_seq_printf(s, "[%s]\n", cmd);
1038}
1039
1040static int blk_log_with_error(struct trace_seq *s,
1041 const struct trace_entry *ent)
1042{
1043 if (t_sec(ent))
1044 return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
1045 t_sec(ent), t_error(ent));
1046 return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
1047}
1048
1049static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1050{
1051 struct blk_io_trace_remap r = { .device = 0, };
1052
1053 get_pdu_remap(ent, &r);
1054 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
1055 t_sector(ent),
1056 t_sec(ent), MAJOR(r.device), MINOR(r.device),
1057 (unsigned long long)r.sector);
1058}
1059
1060static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
1061{
1062 char cmd[TASK_COMM_LEN];
1063
1064 trace_find_cmdline(ent->pid, cmd);
1065
1066 return trace_seq_printf(s, "[%s]\n", cmd);
1067}
1068
1069static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
1070{
1071 char cmd[TASK_COMM_LEN];
1072
1073 trace_find_cmdline(ent->pid, cmd);
1074
1075 return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent));
1076}
1077
1078static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1079{
1080 char cmd[TASK_COMM_LEN];
1081
1082 trace_find_cmdline(ent->pid, cmd);
1083
1084 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
1085 get_pdu_int(ent), cmd);
1086}
1087
1088/*
1089 * struct tracer operations
1090 */
1091
1092static void blk_tracer_print_header(struct seq_file *m)
1093{
1094 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1095 return;
1096 seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n"
1097 "# | | | | | |\n");
1098}
1099
1100static void blk_tracer_start(struct trace_array *tr)
1101{
1102 mutex_lock(&blk_probe_mutex);
1103 if (atomic_add_return(1, &blk_probes_ref) == 1)
1104 if (blk_register_tracepoints())
1105 atomic_dec(&blk_probes_ref);
1106 mutex_unlock(&blk_probe_mutex);
1107 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1108}
1109
1110static int blk_tracer_init(struct trace_array *tr)
1111{
1112 blk_tr = tr;
1113 blk_tracer_start(tr);
1114 mutex_lock(&blk_probe_mutex);
1115 blk_tracer_enabled++;
1116 mutex_unlock(&blk_probe_mutex);
1117 return 0;
1118}
1119
1120static void blk_tracer_stop(struct trace_array *tr)
1121{
1122 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1123 mutex_lock(&blk_probe_mutex);
1124 if (atomic_dec_and_test(&blk_probes_ref))
1125 blk_unregister_tracepoints();
1126 mutex_unlock(&blk_probe_mutex);
1127}
1128
1129static void blk_tracer_reset(struct trace_array *tr)
1130{
1131 if (!atomic_read(&blk_probes_ref))
1132 return;
1133
1134 mutex_lock(&blk_probe_mutex);
1135 blk_tracer_enabled--;
1136 WARN_ON(blk_tracer_enabled < 0);
1137 mutex_unlock(&blk_probe_mutex);
1138
1139 blk_tracer_stop(tr);
1140}
1141
1142static struct {
1143 const char *act[2];
1144 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1145} what2act[] __read_mostly = {
1146 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1147 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1148 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
1149 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic },
1150 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic },
1151 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error },
1152 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic },
1153 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
1154 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
1155 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
1156 [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
1157 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
1158 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
1159 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
1160 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1161};
1162
1163static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1164 int flags)
1165{
1166 struct trace_seq *s = &iter->seq;
1167 const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1168 const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1169 int ret;
1170
1171 if (!trace_print_context(iter))
1172 return TRACE_TYPE_PARTIAL_LINE;
1173
1174 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1175 ret = trace_seq_printf(s, "Bad pc action %x\n", what);
1176 else {
1177 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1178 ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
1179 if (ret)
1180 ret = what2act[what].print(s, iter->ent);
1181 }
1182
1183 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1184}
1185
1186static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1187{
1188 struct trace_seq *s = &iter->seq;
1189 struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1190 const int offset = offsetof(struct blk_io_trace, sector);
1191 struct blk_io_trace old = {
1192 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1193 .time = ns2usecs(iter->ts),
1194 };
1195
1196 if (!trace_seq_putmem(s, &old, offset))
1197 return 0;
1198 return trace_seq_putmem(s, &t->sector,
1199 sizeof(old) - offset + t->pdu_len);
1200}
1201
1202static enum print_line_t
1203blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
1204{
1205 return blk_trace_synthesize_old_trace(iter) ?
1206 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1207}
1208
1209static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1210{
1211 const struct blk_io_trace *t;
1212 u16 what;
1213 int ret;
1214
1215 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1216 return TRACE_TYPE_UNHANDLED;
1217
1218 t = (const struct blk_io_trace *)iter->ent;
1219 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1220
1221 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1222 ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
1223 else {
1224 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1225 ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
1226 if (ret)
1227 ret = what2act[what].print(&iter->seq, iter->ent);
1228 }
1229
1230 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1231}
1232
1233static struct tracer blk_tracer __read_mostly = {
1234 .name = "blk",
1235 .init = blk_tracer_init,
1236 .reset = blk_tracer_reset,
1237 .start = blk_tracer_start,
1238 .stop = blk_tracer_stop,
1239 .print_header = blk_tracer_print_header,
1240 .print_line = blk_tracer_print_line,
1241 .flags = &blk_tracer_flags,
1242};
1243
1244static struct trace_event trace_blk_event = {
1245 .type = TRACE_BLK,
1246 .trace = blk_trace_event_print,
1247 .binary = blk_trace_event_print_binary,
1248};
1249
1250static int __init init_blk_tracer(void)
1251{
1252 if (!register_ftrace_event(&trace_blk_event)) {
1253 pr_warning("Warning: could not register block events\n");
1254 return 1;
1255 }
1256
1257 if (register_tracer(&blk_tracer) != 0) {
1258 pr_warning("Warning: could not register the block tracer\n");
1259 unregister_ftrace_event(&trace_blk_event);
1260 return 1;
1261 }
1262
1263 return 0;
1264}
1265
1266device_initcall(init_blk_tracer);
1267
1268static int blk_trace_remove_queue(struct request_queue *q)
1269{
1270 struct blk_trace *bt;
1271
1272 bt = xchg(&q->blk_trace, NULL);
1273 if (bt == NULL)
1274 return -EINVAL;
1275
1276 kfree(bt);
1277 return 0;
1278}
1279
1280/*
1281 * Setup everything required to start tracing
1282 */
1283static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1284{
1285 struct blk_trace *old_bt, *bt = NULL;
1286 int ret;
1287
1288 ret = -ENOMEM;
1289 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1290 if (!bt)
1291 goto err;
1292
1293 bt->dev = dev;
1294 bt->act_mask = (u16)-1;
1295 bt->end_lba = -1ULL;
1296 bt->trace_state = Blktrace_running;
1297
1298 old_bt = xchg(&q->blk_trace, bt);
1299 if (old_bt != NULL) {
1300 (void)xchg(&q->blk_trace, old_bt);
1301 kfree(bt);
1302 ret = -EBUSY;
1303 }
1304 return 0;
1305err:
1306 return ret;
1307}
1308
1309/*
1310 * sysfs interface to enable and configure tracing
1311 */
1312
1313static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
1314 struct device_attribute *attr,
1315 char *buf)
1316{
1317 struct hd_struct *p = dev_to_part(dev);
1318 struct block_device *bdev;
1319 ssize_t ret = -ENXIO;
1320
1321 lock_kernel();
1322 bdev = bdget(part_devt(p));
1323 if (bdev != NULL) {
1324 struct request_queue *q = bdev_get_queue(bdev);
1325
1326 if (q != NULL) {
1327 mutex_lock(&bdev->bd_mutex);
1328 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1329 mutex_unlock(&bdev->bd_mutex);
1330 }
1331
1332 bdput(bdev);
1333 }
1334
1335 unlock_kernel();
1336 return ret;
1337}
1338
1339static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
1340 struct device_attribute *attr,
1341 const char *buf, size_t count)
1342{
1343 struct block_device *bdev;
1344 struct request_queue *q;
1345 struct hd_struct *p;
1346 int value;
1347 ssize_t ret = -ENXIO;
1348
1349 if (count == 0 || sscanf(buf, "%d", &value) != 1)
1350 goto out;
1351
1352 lock_kernel();
1353 p = dev_to_part(dev);
1354 bdev = bdget(part_devt(p));
1355 if (bdev == NULL)
1356 goto out_unlock_kernel;
1357
1358 q = bdev_get_queue(bdev);
1359 if (q == NULL)
1360 goto out_bdput;
1361
1362 mutex_lock(&bdev->bd_mutex);
1363 if (value)
1364 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1365 else
1366 ret = blk_trace_remove_queue(q);
1367 mutex_unlock(&bdev->bd_mutex);
1368
1369 if (ret == 0)
1370 ret = count;
1371out_bdput:
1372 bdput(bdev);
1373out_unlock_kernel:
1374 unlock_kernel();
1375out:
1376 return ret;
1377}
1378
1379static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1380 struct device_attribute *attr,
1381 char *buf);
1382static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1383 struct device_attribute *attr,
1384 const char *buf, size_t count);
1385#define BLK_TRACE_DEVICE_ATTR(_name) \
1386 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
1387 sysfs_blk_trace_attr_show, \
1388 sysfs_blk_trace_attr_store)
1389
1390static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
1391 sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
1392static BLK_TRACE_DEVICE_ATTR(act_mask);
1393static BLK_TRACE_DEVICE_ATTR(pid);
1394static BLK_TRACE_DEVICE_ATTR(start_lba);
1395static BLK_TRACE_DEVICE_ATTR(end_lba);
1396
1397static struct attribute *blk_trace_attrs[] = {
1398 &dev_attr_enable.attr,
1399 &dev_attr_act_mask.attr,
1400 &dev_attr_pid.attr,
1401 &dev_attr_start_lba.attr,
1402 &dev_attr_end_lba.attr,
1403 NULL
1404};
1405
1406struct attribute_group blk_trace_attr_group = {
1407 .name = "trace",
1408 .attrs = blk_trace_attrs,
1409};
1410
1411static int blk_str2act_mask(const char *str)
1412{
1413 int mask = 0;
1414 char *copy = kstrdup(str, GFP_KERNEL), *s;
1415
1416 if (copy == NULL)
1417 return -ENOMEM;
1418
1419 s = strstrip(copy);
1420
1421 while (1) {
1422 char *sep = strchr(s, ',');
1423
1424 if (sep != NULL)
1425 *sep = '\0';
1426
1427 if (strcasecmp(s, "barrier") == 0)
1428 mask |= BLK_TC_BARRIER;
1429 else if (strcasecmp(s, "complete") == 0)
1430 mask |= BLK_TC_COMPLETE;
1431 else if (strcasecmp(s, "fs") == 0)
1432 mask |= BLK_TC_FS;
1433 else if (strcasecmp(s, "issue") == 0)
1434 mask |= BLK_TC_ISSUE;
1435 else if (strcasecmp(s, "pc") == 0)
1436 mask |= BLK_TC_PC;
1437 else if (strcasecmp(s, "queue") == 0)
1438 mask |= BLK_TC_QUEUE;
1439 else if (strcasecmp(s, "read") == 0)
1440 mask |= BLK_TC_READ;
1441 else if (strcasecmp(s, "requeue") == 0)
1442 mask |= BLK_TC_REQUEUE;
1443 else if (strcasecmp(s, "sync") == 0)
1444 mask |= BLK_TC_SYNC;
1445 else if (strcasecmp(s, "write") == 0)
1446 mask |= BLK_TC_WRITE;
1447
1448 if (sep == NULL)
1449 break;
1450
1451 s = sep + 1;
1452 }
1453 kfree(copy);
1454
1455 return mask;
1456}
1457
1458static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1459 struct device_attribute *attr,
1460 char *buf)
1461{
1462 struct hd_struct *p = dev_to_part(dev);
1463 struct request_queue *q;
1464 struct block_device *bdev;
1465 ssize_t ret = -ENXIO;
1466
1467 lock_kernel();
1468 bdev = bdget(part_devt(p));
1469 if (bdev == NULL)
1470 goto out_unlock_kernel;
1471
1472 q = bdev_get_queue(bdev);
1473 if (q == NULL)
1474 goto out_bdput;
1475 mutex_lock(&bdev->bd_mutex);
1476 if (q->blk_trace == NULL)
1477 ret = sprintf(buf, "disabled\n");
1478 else if (attr == &dev_attr_act_mask)
1479 ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask);
1480 else if (attr == &dev_attr_pid)
1481 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1482 else if (attr == &dev_attr_start_lba)
1483 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1484 else if (attr == &dev_attr_end_lba)
1485 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1486 mutex_unlock(&bdev->bd_mutex);
1487out_bdput:
1488 bdput(bdev);
1489out_unlock_kernel:
1490 unlock_kernel();
1491 return ret;
1492}
1493
1494static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1495 struct device_attribute *attr,
1496 const char *buf, size_t count)
1497{
1498 struct block_device *bdev;
1499 struct request_queue *q;
1500 struct hd_struct *p;
1501 u64 value;
1502 ssize_t ret = -ENXIO;
1503
1504 if (count == 0)
1505 goto out;
1506
1507 if (attr == &dev_attr_act_mask) {
1508 if (sscanf(buf, "%llx", &value) != 1) {
1509 /* Assume it is a list of trace category names */
1510 value = blk_str2act_mask(buf);
1511 if (value < 0)
1512 goto out;
1513 }
1514 } else if (sscanf(buf, "%llu", &value) != 1)
1515 goto out;
1516
1517 lock_kernel();
1518 p = dev_to_part(dev);
1519 bdev = bdget(part_devt(p));
1520 if (bdev == NULL)
1521 goto out_unlock_kernel;
1522
1523 q = bdev_get_queue(bdev);
1524 if (q == NULL)
1525 goto out_bdput;
1526
1527 mutex_lock(&bdev->bd_mutex);
1528 ret = 0;
1529 if (q->blk_trace == NULL)
1530 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1531
1532 if (ret == 0) {
1533 if (attr == &dev_attr_act_mask)
1534 q->blk_trace->act_mask = value;
1535 else if (attr == &dev_attr_pid)
1536 q->blk_trace->pid = value;
1537 else if (attr == &dev_attr_start_lba)
1538 q->blk_trace->start_lba = value;
1539 else if (attr == &dev_attr_end_lba)
1540 q->blk_trace->end_lba = value;
1541 ret = count;
1542 }
1543 mutex_unlock(&bdev->bd_mutex);
1544out_bdput:
1545 bdput(bdev);
1546out_unlock_kernel:
1547 unlock_kernel();
1548out:
1549 return ret;
1550}
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
new file mode 100644
index 000000000000..9fc918da404f
--- /dev/null
+++ b/kernel/trace/events.c
@@ -0,0 +1,15 @@
1/*
2 * This is the place to register all trace points as events.
3 */
4
5#include <linux/stringify.h>
6
7#include <trace/trace_events.h>
8
9#include "trace_output.h"
10
11#include "trace_events_stage_1.h"
12#include "trace_events_stage_2.h"
13#include "trace_events_stage_3.h"
14
15#include <trace/trace_event_types.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fdf913dfc7e8..7847806eefef 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,7 @@
27#include <linux/sysctl.h> 27#include <linux/sysctl.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h>
30 31
31#include <asm/ftrace.h> 32#include <asm/ftrace.h>
32 33
@@ -44,14 +45,14 @@
44 ftrace_kill(); \ 45 ftrace_kill(); \
45 } while (0) 46 } while (0)
46 47
48/* hash bits for specific function selection */
49#define FTRACE_HASH_BITS 7
50#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
51
47/* ftrace_enabled is a method to turn ftrace on or off */ 52/* ftrace_enabled is a method to turn ftrace on or off */
48int ftrace_enabled __read_mostly; 53int ftrace_enabled __read_mostly;
49static int last_ftrace_enabled; 54static int last_ftrace_enabled;
50 55
51/* set when tracing only a pid */
52struct pid *ftrace_pid_trace;
53static struct pid * const ftrace_swapper_pid = &init_struct_pid;
54
55/* Quick disabling of function tracer. */ 56/* Quick disabling of function tracer. */
56int function_trace_stop; 57int function_trace_stop;
57 58
@@ -61,9 +62,7 @@ int function_trace_stop;
61 */ 62 */
62static int ftrace_disabled __read_mostly; 63static int ftrace_disabled __read_mostly;
63 64
64static DEFINE_SPINLOCK(ftrace_lock); 65static DEFINE_MUTEX(ftrace_lock);
65static DEFINE_MUTEX(ftrace_sysctl_lock);
66static DEFINE_MUTEX(ftrace_start_lock);
67 66
68static struct ftrace_ops ftrace_list_end __read_mostly = 67static struct ftrace_ops ftrace_list_end __read_mostly =
69{ 68{
@@ -134,9 +133,6 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
134 133
135static int __register_ftrace_function(struct ftrace_ops *ops) 134static int __register_ftrace_function(struct ftrace_ops *ops)
136{ 135{
137 /* should not be called from interrupt context */
138 spin_lock(&ftrace_lock);
139
140 ops->next = ftrace_list; 136 ops->next = ftrace_list;
141 /* 137 /*
142 * We are entering ops into the ftrace_list but another 138 * We are entering ops into the ftrace_list but another
@@ -172,18 +168,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
172#endif 168#endif
173 } 169 }
174 170
175 spin_unlock(&ftrace_lock);
176
177 return 0; 171 return 0;
178} 172}
179 173
180static int __unregister_ftrace_function(struct ftrace_ops *ops) 174static int __unregister_ftrace_function(struct ftrace_ops *ops)
181{ 175{
182 struct ftrace_ops **p; 176 struct ftrace_ops **p;
183 int ret = 0;
184
185 /* should not be called from interrupt context */
186 spin_lock(&ftrace_lock);
187 177
188 /* 178 /*
189 * If we are removing the last function, then simply point 179 * If we are removing the last function, then simply point
@@ -192,17 +182,15 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
192 if (ftrace_list == ops && ops->next == &ftrace_list_end) { 182 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
193 ftrace_trace_function = ftrace_stub; 183 ftrace_trace_function = ftrace_stub;
194 ftrace_list = &ftrace_list_end; 184 ftrace_list = &ftrace_list_end;
195 goto out; 185 return 0;
196 } 186 }
197 187
198 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) 188 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
199 if (*p == ops) 189 if (*p == ops)
200 break; 190 break;
201 191
202 if (*p != ops) { 192 if (*p != ops)
203 ret = -1; 193 return -1;
204 goto out;
205 }
206 194
207 *p = (*p)->next; 195 *p = (*p)->next;
208 196
@@ -223,21 +211,15 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
223 } 211 }
224 } 212 }
225 213
226 out: 214 return 0;
227 spin_unlock(&ftrace_lock);
228
229 return ret;
230} 215}
231 216
232static void ftrace_update_pid_func(void) 217static void ftrace_update_pid_func(void)
233{ 218{
234 ftrace_func_t func; 219 ftrace_func_t func;
235 220
236 /* should not be called from interrupt context */
237 spin_lock(&ftrace_lock);
238
239 if (ftrace_trace_function == ftrace_stub) 221 if (ftrace_trace_function == ftrace_stub)
240 goto out; 222 return;
241 223
242 func = ftrace_trace_function; 224 func = ftrace_trace_function;
243 225
@@ -254,23 +236,29 @@ static void ftrace_update_pid_func(void)
254#else 236#else
255 __ftrace_trace_function = func; 237 __ftrace_trace_function = func;
256#endif 238#endif
257
258 out:
259 spin_unlock(&ftrace_lock);
260} 239}
261 240
241/* set when tracing only a pid */
242struct pid *ftrace_pid_trace;
243static struct pid * const ftrace_swapper_pid = &init_struct_pid;
244
262#ifdef CONFIG_DYNAMIC_FTRACE 245#ifdef CONFIG_DYNAMIC_FTRACE
246
263#ifndef CONFIG_FTRACE_MCOUNT_RECORD 247#ifndef CONFIG_FTRACE_MCOUNT_RECORD
264# error Dynamic ftrace depends on MCOUNT_RECORD 248# error Dynamic ftrace depends on MCOUNT_RECORD
265#endif 249#endif
266 250
267/* 251static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
268 * Since MCOUNT_ADDR may point to mcount itself, we do not want 252
269 * to get it confused by reading a reference in the code as we 253struct ftrace_func_probe {
270 * are parsing on objcopy output of text. Use a variable for 254 struct hlist_node node;
271 * it instead. 255 struct ftrace_probe_ops *ops;
272 */ 256 unsigned long flags;
273static unsigned long mcount_addr = MCOUNT_ADDR; 257 unsigned long ip;
258 void *data;
259 struct rcu_head rcu;
260};
261
274 262
275enum { 263enum {
276 FTRACE_ENABLE_CALLS = (1 << 0), 264 FTRACE_ENABLE_CALLS = (1 << 0),
@@ -284,13 +272,13 @@ enum {
284 272
285static int ftrace_filtered; 273static int ftrace_filtered;
286 274
287static LIST_HEAD(ftrace_new_addrs); 275static struct dyn_ftrace *ftrace_new_addrs;
288 276
289static DEFINE_MUTEX(ftrace_regex_lock); 277static DEFINE_MUTEX(ftrace_regex_lock);
290 278
291struct ftrace_page { 279struct ftrace_page {
292 struct ftrace_page *next; 280 struct ftrace_page *next;
293 unsigned long index; 281 int index;
294 struct dyn_ftrace records[]; 282 struct dyn_ftrace records[];
295}; 283};
296 284
@@ -305,6 +293,19 @@ static struct ftrace_page *ftrace_pages;
305 293
306static struct dyn_ftrace *ftrace_free_records; 294static struct dyn_ftrace *ftrace_free_records;
307 295
296/*
297 * This is a double for. Do not use 'break' to break out of the loop,
298 * you must use a goto.
299 */
300#define do_for_each_ftrace_rec(pg, rec) \
301 for (pg = ftrace_pages_start; pg; pg = pg->next) { \
302 int _____i; \
303 for (_____i = 0; _____i < pg->index; _____i++) { \
304 rec = &pg->records[_____i];
305
306#define while_for_each_ftrace_rec() \
307 } \
308 }
308 309
309#ifdef CONFIG_KPROBES 310#ifdef CONFIG_KPROBES
310 311
@@ -349,23 +350,17 @@ void ftrace_release(void *start, unsigned long size)
349 struct ftrace_page *pg; 350 struct ftrace_page *pg;
350 unsigned long s = (unsigned long)start; 351 unsigned long s = (unsigned long)start;
351 unsigned long e = s + size; 352 unsigned long e = s + size;
352 int i;
353 353
354 if (ftrace_disabled || !start) 354 if (ftrace_disabled || !start)
355 return; 355 return;
356 356
357 /* should not be called from interrupt context */ 357 mutex_lock(&ftrace_lock);
358 spin_lock(&ftrace_lock); 358 do_for_each_ftrace_rec(pg, rec) {
359 359 if ((rec->ip >= s) && (rec->ip < e) &&
360 for (pg = ftrace_pages_start; pg; pg = pg->next) { 360 !(rec->flags & FTRACE_FL_FREE))
361 for (i = 0; i < pg->index; i++) { 361 ftrace_free_rec(rec);
362 rec = &pg->records[i]; 362 } while_for_each_ftrace_rec();
363 363 mutex_unlock(&ftrace_lock);
364 if ((rec->ip >= s) && (rec->ip < e))
365 ftrace_free_rec(rec);
366 }
367 }
368 spin_unlock(&ftrace_lock);
369} 364}
370 365
371static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 366static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -414,8 +409,8 @@ ftrace_record_ip(unsigned long ip)
414 return NULL; 409 return NULL;
415 410
416 rec->ip = ip; 411 rec->ip = ip;
417 412 rec->flags = (unsigned long)ftrace_new_addrs;
418 list_add(&rec->list, &ftrace_new_addrs); 413 ftrace_new_addrs = rec;
419 414
420 return rec; 415 return rec;
421} 416}
@@ -461,10 +456,10 @@ static void ftrace_bug(int failed, unsigned long ip)
461static int 456static int
462__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 457__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
463{ 458{
464 unsigned long ip, fl;
465 unsigned long ftrace_addr; 459 unsigned long ftrace_addr;
460 unsigned long ip, fl;
466 461
467 ftrace_addr = (unsigned long)ftrace_caller; 462 ftrace_addr = (unsigned long)FTRACE_ADDR;
468 463
469 ip = rec->ip; 464 ip = rec->ip;
470 465
@@ -473,7 +468,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
473 * it is not enabled then do nothing. 468 * it is not enabled then do nothing.
474 * 469 *
475 * If this record is not to be traced and 470 * If this record is not to be traced and
476 * it is enabled then disabled it. 471 * it is enabled then disable it.
477 * 472 *
478 */ 473 */
479 if (rec->flags & FTRACE_FL_NOTRACE) { 474 if (rec->flags & FTRACE_FL_NOTRACE) {
@@ -493,7 +488,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
493 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) 488 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
494 return 0; 489 return 0;
495 490
496 /* Record is not filtered and is not enabled do nothing */ 491 /* Record is not filtered or enabled, do nothing */
497 if (!fl) 492 if (!fl)
498 return 0; 493 return 0;
499 494
@@ -515,7 +510,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
515 510
516 } else { 511 } else {
517 512
518 /* if record is not enabled do nothing */ 513 /* if record is not enabled, do nothing */
519 if (!(rec->flags & FTRACE_FL_ENABLED)) 514 if (!(rec->flags & FTRACE_FL_ENABLED))
520 return 0; 515 return 0;
521 516
@@ -531,41 +526,41 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
531 526
532static void ftrace_replace_code(int enable) 527static void ftrace_replace_code(int enable)
533{ 528{
534 int i, failed;
535 struct dyn_ftrace *rec; 529 struct dyn_ftrace *rec;
536 struct ftrace_page *pg; 530 struct ftrace_page *pg;
531 int failed;
537 532
538 for (pg = ftrace_pages_start; pg; pg = pg->next) { 533 do_for_each_ftrace_rec(pg, rec) {
539 for (i = 0; i < pg->index; i++) { 534 /*
540 rec = &pg->records[i]; 535 * Skip over free records, records that have
541 536 * failed and not converted.
542 /* 537 */
543 * Skip over free records and records that have 538 if (rec->flags & FTRACE_FL_FREE ||
544 * failed. 539 rec->flags & FTRACE_FL_FAILED ||
545 */ 540 !(rec->flags & FTRACE_FL_CONVERTED))
546 if (rec->flags & FTRACE_FL_FREE || 541 continue;
547 rec->flags & FTRACE_FL_FAILED)
548 continue;
549 542
550 /* ignore updates to this record's mcount site */ 543 /* ignore updates to this record's mcount site */
551 if (get_kprobe((void *)rec->ip)) { 544 if (get_kprobe((void *)rec->ip)) {
552 freeze_record(rec); 545 freeze_record(rec);
553 continue; 546 continue;
554 } else { 547 } else {
555 unfreeze_record(rec); 548 unfreeze_record(rec);
556 } 549 }
557 550
558 failed = __ftrace_replace_code(rec, enable); 551 failed = __ftrace_replace_code(rec, enable);
559 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 552 if (failed) {
560 rec->flags |= FTRACE_FL_FAILED; 553 rec->flags |= FTRACE_FL_FAILED;
561 if ((system_state == SYSTEM_BOOTING) || 554 if ((system_state == SYSTEM_BOOTING) ||
562 !core_kernel_text(rec->ip)) { 555 !core_kernel_text(rec->ip)) {
563 ftrace_free_rec(rec); 556 ftrace_free_rec(rec);
564 } else 557 } else {
565 ftrace_bug(failed, rec->ip); 558 ftrace_bug(failed, rec->ip);
566 } 559 /* Stop processing */
560 return;
561 }
567 } 562 }
568 } 563 } while_for_each_ftrace_rec();
569} 564}
570 565
571static int 566static int
@@ -576,7 +571,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
576 571
577 ip = rec->ip; 572 ip = rec->ip;
578 573
579 ret = ftrace_make_nop(mod, rec, mcount_addr); 574 ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
580 if (ret) { 575 if (ret) {
581 ftrace_bug(ret, ip); 576 ftrace_bug(ret, ip);
582 rec->flags |= FTRACE_FL_FAILED; 577 rec->flags |= FTRACE_FL_FAILED;
@@ -585,6 +580,24 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
585 return 1; 580 return 1;
586} 581}
587 582
583/*
584 * archs can override this function if they must do something
585 * before the modifying code is performed.
586 */
587int __weak ftrace_arch_code_modify_prepare(void)
588{
589 return 0;
590}
591
592/*
593 * archs can override this function if they must do something
594 * after the modifying code is performed.
595 */
596int __weak ftrace_arch_code_modify_post_process(void)
597{
598 return 0;
599}
600
588static int __ftrace_modify_code(void *data) 601static int __ftrace_modify_code(void *data)
589{ 602{
590 int *command = data; 603 int *command = data;
@@ -607,7 +620,17 @@ static int __ftrace_modify_code(void *data)
607 620
608static void ftrace_run_update_code(int command) 621static void ftrace_run_update_code(int command)
609{ 622{
623 int ret;
624
625 ret = ftrace_arch_code_modify_prepare();
626 FTRACE_WARN_ON(ret);
627 if (ret)
628 return;
629
610 stop_machine(__ftrace_modify_code, &command, NULL); 630 stop_machine(__ftrace_modify_code, &command, NULL);
631
632 ret = ftrace_arch_code_modify_post_process();
633 FTRACE_WARN_ON(ret);
611} 634}
612 635
613static ftrace_func_t saved_ftrace_func; 636static ftrace_func_t saved_ftrace_func;
@@ -631,13 +654,10 @@ static void ftrace_startup(int command)
631 if (unlikely(ftrace_disabled)) 654 if (unlikely(ftrace_disabled))
632 return; 655 return;
633 656
634 mutex_lock(&ftrace_start_lock);
635 ftrace_start_up++; 657 ftrace_start_up++;
636 command |= FTRACE_ENABLE_CALLS; 658 command |= FTRACE_ENABLE_CALLS;
637 659
638 ftrace_startup_enable(command); 660 ftrace_startup_enable(command);
639
640 mutex_unlock(&ftrace_start_lock);
641} 661}
642 662
643static void ftrace_shutdown(int command) 663static void ftrace_shutdown(int command)
@@ -645,7 +665,6 @@ static void ftrace_shutdown(int command)
645 if (unlikely(ftrace_disabled)) 665 if (unlikely(ftrace_disabled))
646 return; 666 return;
647 667
648 mutex_lock(&ftrace_start_lock);
649 ftrace_start_up--; 668 ftrace_start_up--;
650 if (!ftrace_start_up) 669 if (!ftrace_start_up)
651 command |= FTRACE_DISABLE_CALLS; 670 command |= FTRACE_DISABLE_CALLS;
@@ -656,11 +675,9 @@ static void ftrace_shutdown(int command)
656 } 675 }
657 676
658 if (!command || !ftrace_enabled) 677 if (!command || !ftrace_enabled)
659 goto out; 678 return;
660 679
661 ftrace_run_update_code(command); 680 ftrace_run_update_code(command);
662 out:
663 mutex_unlock(&ftrace_start_lock);
664} 681}
665 682
666static void ftrace_startup_sysctl(void) 683static void ftrace_startup_sysctl(void)
@@ -670,7 +687,6 @@ static void ftrace_startup_sysctl(void)
670 if (unlikely(ftrace_disabled)) 687 if (unlikely(ftrace_disabled))
671 return; 688 return;
672 689
673 mutex_lock(&ftrace_start_lock);
674 /* Force update next time */ 690 /* Force update next time */
675 saved_ftrace_func = NULL; 691 saved_ftrace_func = NULL;
676 /* ftrace_start_up is true if we want ftrace running */ 692 /* ftrace_start_up is true if we want ftrace running */
@@ -678,7 +694,6 @@ static void ftrace_startup_sysctl(void)
678 command |= FTRACE_ENABLE_CALLS; 694 command |= FTRACE_ENABLE_CALLS;
679 695
680 ftrace_run_update_code(command); 696 ftrace_run_update_code(command);
681 mutex_unlock(&ftrace_start_lock);
682} 697}
683 698
684static void ftrace_shutdown_sysctl(void) 699static void ftrace_shutdown_sysctl(void)
@@ -688,13 +703,11 @@ static void ftrace_shutdown_sysctl(void)
688 if (unlikely(ftrace_disabled)) 703 if (unlikely(ftrace_disabled))
689 return; 704 return;
690 705
691 mutex_lock(&ftrace_start_lock);
692 /* ftrace_start_up is true if ftrace is running */ 706 /* ftrace_start_up is true if ftrace is running */
693 if (ftrace_start_up) 707 if (ftrace_start_up)
694 command |= FTRACE_DISABLE_CALLS; 708 command |= FTRACE_DISABLE_CALLS;
695 709
696 ftrace_run_update_code(command); 710 ftrace_run_update_code(command);
697 mutex_unlock(&ftrace_start_lock);
698} 711}
699 712
700static cycle_t ftrace_update_time; 713static cycle_t ftrace_update_time;
@@ -703,19 +716,21 @@ unsigned long ftrace_update_tot_cnt;
703 716
704static int ftrace_update_code(struct module *mod) 717static int ftrace_update_code(struct module *mod)
705{ 718{
706 struct dyn_ftrace *p, *t; 719 struct dyn_ftrace *p;
707 cycle_t start, stop; 720 cycle_t start, stop;
708 721
709 start = ftrace_now(raw_smp_processor_id()); 722 start = ftrace_now(raw_smp_processor_id());
710 ftrace_update_cnt = 0; 723 ftrace_update_cnt = 0;
711 724
712 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { 725 while (ftrace_new_addrs) {
713 726
714 /* If something went wrong, bail without enabling anything */ 727 /* If something went wrong, bail without enabling anything */
715 if (unlikely(ftrace_disabled)) 728 if (unlikely(ftrace_disabled))
716 return -1; 729 return -1;
717 730
718 list_del_init(&p->list); 731 p = ftrace_new_addrs;
732 ftrace_new_addrs = (struct dyn_ftrace *)p->flags;
733 p->flags = 0L;
719 734
720 /* convert record (i.e, patch mcount-call with NOP) */ 735 /* convert record (i.e, patch mcount-call with NOP) */
721 if (ftrace_code_disable(mod, p)) { 736 if (ftrace_code_disable(mod, p)) {
@@ -781,13 +796,16 @@ enum {
781 FTRACE_ITER_CONT = (1 << 1), 796 FTRACE_ITER_CONT = (1 << 1),
782 FTRACE_ITER_NOTRACE = (1 << 2), 797 FTRACE_ITER_NOTRACE = (1 << 2),
783 FTRACE_ITER_FAILURES = (1 << 3), 798 FTRACE_ITER_FAILURES = (1 << 3),
799 FTRACE_ITER_PRINTALL = (1 << 4),
800 FTRACE_ITER_HASH = (1 << 5),
784}; 801};
785 802
786#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 803#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
787 804
788struct ftrace_iterator { 805struct ftrace_iterator {
789 struct ftrace_page *pg; 806 struct ftrace_page *pg;
790 unsigned idx; 807 int hidx;
808 int idx;
791 unsigned flags; 809 unsigned flags;
792 unsigned char buffer[FTRACE_BUFF_MAX+1]; 810 unsigned char buffer[FTRACE_BUFF_MAX+1];
793 unsigned buffer_idx; 811 unsigned buffer_idx;
@@ -795,15 +813,89 @@ struct ftrace_iterator {
795}; 813};
796 814
797static void * 815static void *
816t_hash_next(struct seq_file *m, void *v, loff_t *pos)
817{
818 struct ftrace_iterator *iter = m->private;
819 struct hlist_node *hnd = v;
820 struct hlist_head *hhd;
821
822 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
823
824 (*pos)++;
825
826 retry:
827 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
828 return NULL;
829
830 hhd = &ftrace_func_hash[iter->hidx];
831
832 if (hlist_empty(hhd)) {
833 iter->hidx++;
834 hnd = NULL;
835 goto retry;
836 }
837
838 if (!hnd)
839 hnd = hhd->first;
840 else {
841 hnd = hnd->next;
842 if (!hnd) {
843 iter->hidx++;
844 goto retry;
845 }
846 }
847
848 return hnd;
849}
850
851static void *t_hash_start(struct seq_file *m, loff_t *pos)
852{
853 struct ftrace_iterator *iter = m->private;
854 void *p = NULL;
855
856 iter->flags |= FTRACE_ITER_HASH;
857
858 return t_hash_next(m, p, pos);
859}
860
861static int t_hash_show(struct seq_file *m, void *v)
862{
863 struct ftrace_func_probe *rec;
864 struct hlist_node *hnd = v;
865 char str[KSYM_SYMBOL_LEN];
866
867 rec = hlist_entry(hnd, struct ftrace_func_probe, node);
868
869 if (rec->ops->print)
870 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
871
872 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
873 seq_printf(m, "%s:", str);
874
875 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
876 seq_printf(m, "%s", str);
877
878 if (rec->data)
879 seq_printf(m, ":%p", rec->data);
880 seq_putc(m, '\n');
881
882 return 0;
883}
884
885static void *
798t_next(struct seq_file *m, void *v, loff_t *pos) 886t_next(struct seq_file *m, void *v, loff_t *pos)
799{ 887{
800 struct ftrace_iterator *iter = m->private; 888 struct ftrace_iterator *iter = m->private;
801 struct dyn_ftrace *rec = NULL; 889 struct dyn_ftrace *rec = NULL;
802 890
891 if (iter->flags & FTRACE_ITER_HASH)
892 return t_hash_next(m, v, pos);
893
803 (*pos)++; 894 (*pos)++;
804 895
805 /* should not be called from interrupt context */ 896 if (iter->flags & FTRACE_ITER_PRINTALL)
806 spin_lock(&ftrace_lock); 897 return NULL;
898
807 retry: 899 retry:
808 if (iter->idx >= iter->pg->index) { 900 if (iter->idx >= iter->pg->index) {
809 if (iter->pg->next) { 901 if (iter->pg->next) {
@@ -832,7 +924,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
832 goto retry; 924 goto retry;
833 } 925 }
834 } 926 }
835 spin_unlock(&ftrace_lock);
836 927
837 return rec; 928 return rec;
838} 929}
@@ -842,6 +933,23 @@ static void *t_start(struct seq_file *m, loff_t *pos)
842 struct ftrace_iterator *iter = m->private; 933 struct ftrace_iterator *iter = m->private;
843 void *p = NULL; 934 void *p = NULL;
844 935
936 mutex_lock(&ftrace_lock);
937 /*
938 * For set_ftrace_filter reading, if we have the filter
939 * off, we can short cut and just print out that all
940 * functions are enabled.
941 */
942 if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
943 if (*pos > 0)
944 return t_hash_start(m, pos);
945 iter->flags |= FTRACE_ITER_PRINTALL;
946 (*pos)++;
947 return iter;
948 }
949
950 if (iter->flags & FTRACE_ITER_HASH)
951 return t_hash_start(m, pos);
952
845 if (*pos > 0) { 953 if (*pos > 0) {
846 if (iter->idx < 0) 954 if (iter->idx < 0)
847 return p; 955 return p;
@@ -851,18 +959,31 @@ static void *t_start(struct seq_file *m, loff_t *pos)
851 959
852 p = t_next(m, p, pos); 960 p = t_next(m, p, pos);
853 961
962 if (!p)
963 return t_hash_start(m, pos);
964
854 return p; 965 return p;
855} 966}
856 967
857static void t_stop(struct seq_file *m, void *p) 968static void t_stop(struct seq_file *m, void *p)
858{ 969{
970 mutex_unlock(&ftrace_lock);
859} 971}
860 972
861static int t_show(struct seq_file *m, void *v) 973static int t_show(struct seq_file *m, void *v)
862{ 974{
975 struct ftrace_iterator *iter = m->private;
863 struct dyn_ftrace *rec = v; 976 struct dyn_ftrace *rec = v;
864 char str[KSYM_SYMBOL_LEN]; 977 char str[KSYM_SYMBOL_LEN];
865 978
979 if (iter->flags & FTRACE_ITER_HASH)
980 return t_hash_show(m, v);
981
982 if (iter->flags & FTRACE_ITER_PRINTALL) {
983 seq_printf(m, "#### all functions enabled ####\n");
984 return 0;
985 }
986
866 if (!rec) 987 if (!rec)
867 return 0; 988 return 0;
868 989
@@ -941,23 +1062,16 @@ static void ftrace_filter_reset(int enable)
941 struct ftrace_page *pg; 1062 struct ftrace_page *pg;
942 struct dyn_ftrace *rec; 1063 struct dyn_ftrace *rec;
943 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1064 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
944 unsigned i;
945 1065
946 /* should not be called from interrupt context */ 1066 mutex_lock(&ftrace_lock);
947 spin_lock(&ftrace_lock);
948 if (enable) 1067 if (enable)
949 ftrace_filtered = 0; 1068 ftrace_filtered = 0;
950 pg = ftrace_pages_start; 1069 do_for_each_ftrace_rec(pg, rec) {
951 while (pg) { 1070 if (rec->flags & FTRACE_FL_FAILED)
952 for (i = 0; i < pg->index; i++) { 1071 continue;
953 rec = &pg->records[i]; 1072 rec->flags &= ~type;
954 if (rec->flags & FTRACE_FL_FAILED) 1073 } while_for_each_ftrace_rec();
955 continue; 1074 mutex_unlock(&ftrace_lock);
956 rec->flags &= ~type;
957 }
958 pg = pg->next;
959 }
960 spin_unlock(&ftrace_lock);
961} 1075}
962 1076
963static int 1077static int
@@ -1008,16 +1122,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
1008 return ftrace_regex_open(inode, file, 0); 1122 return ftrace_regex_open(inode, file, 0);
1009} 1123}
1010 1124
1011static ssize_t
1012ftrace_regex_read(struct file *file, char __user *ubuf,
1013 size_t cnt, loff_t *ppos)
1014{
1015 if (file->f_mode & FMODE_READ)
1016 return seq_read(file, ubuf, cnt, ppos);
1017 else
1018 return -EPERM;
1019}
1020
1021static loff_t 1125static loff_t
1022ftrace_regex_lseek(struct file *file, loff_t offset, int origin) 1126ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1023{ 1127{
@@ -1038,86 +1142,536 @@ enum {
1038 MATCH_END_ONLY, 1142 MATCH_END_ONLY,
1039}; 1143};
1040 1144
1041static void 1145/*
1042ftrace_match(unsigned char *buff, int len, int enable) 1146 * (static function - no need for kernel doc)
1147 *
1148 * Pass in a buffer containing a glob and this function will
1149 * set search to point to the search part of the buffer and
1150 * return the type of search it is (see enum above).
1151 * This does modify buff.
1152 *
1153 * Returns enum type.
1154 * search returns the pointer to use for comparison.
1155 * not returns 1 if buff started with a '!'
1156 * 0 otherwise.
1157 */
1158static int
1159ftrace_setup_glob(char *buff, int len, char **search, int *not)
1043{ 1160{
1044 char str[KSYM_SYMBOL_LEN];
1045 char *search = NULL;
1046 struct ftrace_page *pg;
1047 struct dyn_ftrace *rec;
1048 int type = MATCH_FULL; 1161 int type = MATCH_FULL;
1049 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1162 int i;
1050 unsigned i, match = 0, search_len = 0;
1051 int not = 0;
1052 1163
1053 if (buff[0] == '!') { 1164 if (buff[0] == '!') {
1054 not = 1; 1165 *not = 1;
1055 buff++; 1166 buff++;
1056 len--; 1167 len--;
1057 } 1168 } else
1169 *not = 0;
1170
1171 *search = buff;
1058 1172
1059 for (i = 0; i < len; i++) { 1173 for (i = 0; i < len; i++) {
1060 if (buff[i] == '*') { 1174 if (buff[i] == '*') {
1061 if (!i) { 1175 if (!i) {
1062 search = buff + i + 1; 1176 *search = buff + 1;
1063 type = MATCH_END_ONLY; 1177 type = MATCH_END_ONLY;
1064 search_len = len - (i + 1);
1065 } else { 1178 } else {
1066 if (type == MATCH_END_ONLY) { 1179 if (type == MATCH_END_ONLY)
1067 type = MATCH_MIDDLE_ONLY; 1180 type = MATCH_MIDDLE_ONLY;
1068 } else { 1181 else
1069 match = i;
1070 type = MATCH_FRONT_ONLY; 1182 type = MATCH_FRONT_ONLY;
1071 }
1072 buff[i] = 0; 1183 buff[i] = 0;
1073 break; 1184 break;
1074 } 1185 }
1075 } 1186 }
1076 } 1187 }
1077 1188
1078 /* should not be called from interrupt context */ 1189 return type;
1079 spin_lock(&ftrace_lock); 1190}
1080 if (enable) 1191
1081 ftrace_filtered = 1; 1192static int ftrace_match(char *str, char *regex, int len, int type)
1082 pg = ftrace_pages_start; 1193{
1083 while (pg) { 1194 int matched = 0;
1084 for (i = 0; i < pg->index; i++) { 1195 char *ptr;
1085 int matched = 0; 1196
1086 char *ptr; 1197 switch (type) {
1087 1198 case MATCH_FULL:
1088 rec = &pg->records[i]; 1199 if (strcmp(str, regex) == 0)
1089 if (rec->flags & FTRACE_FL_FAILED) 1200 matched = 1;
1201 break;
1202 case MATCH_FRONT_ONLY:
1203 if (strncmp(str, regex, len) == 0)
1204 matched = 1;
1205 break;
1206 case MATCH_MIDDLE_ONLY:
1207 if (strstr(str, regex))
1208 matched = 1;
1209 break;
1210 case MATCH_END_ONLY:
1211 ptr = strstr(str, regex);
1212 if (ptr && (ptr[len] == 0))
1213 matched = 1;
1214 break;
1215 }
1216
1217 return matched;
1218}
1219
1220static int
1221ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1222{
1223 char str[KSYM_SYMBOL_LEN];
1224
1225 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1226 return ftrace_match(str, regex, len, type);
1227}
1228
1229static void ftrace_match_records(char *buff, int len, int enable)
1230{
1231 unsigned int search_len;
1232 struct ftrace_page *pg;
1233 struct dyn_ftrace *rec;
1234 unsigned long flag;
1235 char *search;
1236 int type;
1237 int not;
1238
1239 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1240 type = ftrace_setup_glob(buff, len, &search, &not);
1241
1242 search_len = strlen(search);
1243
1244 mutex_lock(&ftrace_lock);
1245 do_for_each_ftrace_rec(pg, rec) {
1246
1247 if (rec->flags & FTRACE_FL_FAILED)
1248 continue;
1249
1250 if (ftrace_match_record(rec, search, search_len, type)) {
1251 if (not)
1252 rec->flags &= ~flag;
1253 else
1254 rec->flags |= flag;
1255 }
1256 /*
1257 * Only enable filtering if we have a function that
1258 * is filtered on.
1259 */
1260 if (enable && (rec->flags & FTRACE_FL_FILTER))
1261 ftrace_filtered = 1;
1262 } while_for_each_ftrace_rec();
1263 mutex_unlock(&ftrace_lock);
1264}
1265
1266static int
1267ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1268 char *regex, int len, int type)
1269{
1270 char str[KSYM_SYMBOL_LEN];
1271 char *modname;
1272
1273 kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
1274
1275 if (!modname || strcmp(modname, mod))
1276 return 0;
1277
1278 /* blank search means to match all funcs in the mod */
1279 if (len)
1280 return ftrace_match(str, regex, len, type);
1281 else
1282 return 1;
1283}
1284
1285static void ftrace_match_module_records(char *buff, char *mod, int enable)
1286{
1287 unsigned search_len = 0;
1288 struct ftrace_page *pg;
1289 struct dyn_ftrace *rec;
1290 int type = MATCH_FULL;
1291 char *search = buff;
1292 unsigned long flag;
1293 int not = 0;
1294
1295 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1296
1297 /* blank or '*' mean the same */
1298 if (strcmp(buff, "*") == 0)
1299 buff[0] = 0;
1300
1301 /* handle the case of 'dont filter this module' */
1302 if (strcmp(buff, "!") == 0 || strcmp(buff, "!*") == 0) {
1303 buff[0] = 0;
1304 not = 1;
1305 }
1306
1307 if (strlen(buff)) {
1308 type = ftrace_setup_glob(buff, strlen(buff), &search, &not);
1309 search_len = strlen(search);
1310 }
1311
1312 mutex_lock(&ftrace_lock);
1313 do_for_each_ftrace_rec(pg, rec) {
1314
1315 if (rec->flags & FTRACE_FL_FAILED)
1316 continue;
1317
1318 if (ftrace_match_module_record(rec, mod,
1319 search, search_len, type)) {
1320 if (not)
1321 rec->flags &= ~flag;
1322 else
1323 rec->flags |= flag;
1324 }
1325 if (enable && (rec->flags & FTRACE_FL_FILTER))
1326 ftrace_filtered = 1;
1327
1328 } while_for_each_ftrace_rec();
1329 mutex_unlock(&ftrace_lock);
1330}
1331
1332/*
1333 * We register the module command as a template to show others how
1334 * to register the a command as well.
1335 */
1336
1337static int
1338ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1339{
1340 char *mod;
1341
1342 /*
1343 * cmd == 'mod' because we only registered this func
1344 * for the 'mod' ftrace_func_command.
1345 * But if you register one func with multiple commands,
1346 * you can tell which command was used by the cmd
1347 * parameter.
1348 */
1349
1350 /* we must have a module name */
1351 if (!param)
1352 return -EINVAL;
1353
1354 mod = strsep(&param, ":");
1355 if (!strlen(mod))
1356 return -EINVAL;
1357
1358 ftrace_match_module_records(func, mod, enable);
1359 return 0;
1360}
1361
1362static struct ftrace_func_command ftrace_mod_cmd = {
1363 .name = "mod",
1364 .func = ftrace_mod_callback,
1365};
1366
1367static int __init ftrace_mod_cmd_init(void)
1368{
1369 return register_ftrace_command(&ftrace_mod_cmd);
1370}
1371device_initcall(ftrace_mod_cmd_init);
1372
1373static void
1374function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1375{
1376 struct ftrace_func_probe *entry;
1377 struct hlist_head *hhd;
1378 struct hlist_node *n;
1379 unsigned long key;
1380 int resched;
1381
1382 key = hash_long(ip, FTRACE_HASH_BITS);
1383
1384 hhd = &ftrace_func_hash[key];
1385
1386 if (hlist_empty(hhd))
1387 return;
1388
1389 /*
1390 * Disable preemption for these calls to prevent a RCU grace
1391 * period. This syncs the hash iteration and freeing of items
1392 * on the hash. rcu_read_lock is too dangerous here.
1393 */
1394 resched = ftrace_preempt_disable();
1395 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1396 if (entry->ip == ip)
1397 entry->ops->func(ip, parent_ip, &entry->data);
1398 }
1399 ftrace_preempt_enable(resched);
1400}
1401
1402static struct ftrace_ops trace_probe_ops __read_mostly =
1403{
1404 .func = function_trace_probe_call,
1405};
1406
1407static int ftrace_probe_registered;
1408
1409static void __enable_ftrace_function_probe(void)
1410{
1411 int i;
1412
1413 if (ftrace_probe_registered)
1414 return;
1415
1416 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1417 struct hlist_head *hhd = &ftrace_func_hash[i];
1418 if (hhd->first)
1419 break;
1420 }
1421 /* Nothing registered? */
1422 if (i == FTRACE_FUNC_HASHSIZE)
1423 return;
1424
1425 __register_ftrace_function(&trace_probe_ops);
1426 ftrace_startup(0);
1427 ftrace_probe_registered = 1;
1428}
1429
1430static void __disable_ftrace_function_probe(void)
1431{
1432 int i;
1433
1434 if (!ftrace_probe_registered)
1435 return;
1436
1437 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1438 struct hlist_head *hhd = &ftrace_func_hash[i];
1439 if (hhd->first)
1440 return;
1441 }
1442
1443 /* no more funcs left */
1444 __unregister_ftrace_function(&trace_probe_ops);
1445 ftrace_shutdown(0);
1446 ftrace_probe_registered = 0;
1447}
1448
1449
1450static void ftrace_free_entry_rcu(struct rcu_head *rhp)
1451{
1452 struct ftrace_func_probe *entry =
1453 container_of(rhp, struct ftrace_func_probe, rcu);
1454
1455 if (entry->ops->free)
1456 entry->ops->free(&entry->data);
1457 kfree(entry);
1458}
1459
1460
1461int
1462register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1463 void *data)
1464{
1465 struct ftrace_func_probe *entry;
1466 struct ftrace_page *pg;
1467 struct dyn_ftrace *rec;
1468 int type, len, not;
1469 unsigned long key;
1470 int count = 0;
1471 char *search;
1472
1473 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
1474 len = strlen(search);
1475
1476 /* we do not support '!' for function probes */
1477 if (WARN_ON(not))
1478 return -EINVAL;
1479
1480 mutex_lock(&ftrace_lock);
1481 do_for_each_ftrace_rec(pg, rec) {
1482
1483 if (rec->flags & FTRACE_FL_FAILED)
1484 continue;
1485
1486 if (!ftrace_match_record(rec, search, len, type))
1487 continue;
1488
1489 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
1490 if (!entry) {
1491 /* If we did not process any, then return error */
1492 if (!count)
1493 count = -ENOMEM;
1494 goto out_unlock;
1495 }
1496
1497 count++;
1498
1499 entry->data = data;
1500
1501 /*
1502 * The caller might want to do something special
1503 * for each function we find. We call the callback
1504 * to give the caller an opportunity to do so.
1505 */
1506 if (ops->callback) {
1507 if (ops->callback(rec->ip, &entry->data) < 0) {
1508 /* caller does not like this func */
1509 kfree(entry);
1090 continue; 1510 continue;
1091 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1092 switch (type) {
1093 case MATCH_FULL:
1094 if (strcmp(str, buff) == 0)
1095 matched = 1;
1096 break;
1097 case MATCH_FRONT_ONLY:
1098 if (memcmp(str, buff, match) == 0)
1099 matched = 1;
1100 break;
1101 case MATCH_MIDDLE_ONLY:
1102 if (strstr(str, search))
1103 matched = 1;
1104 break;
1105 case MATCH_END_ONLY:
1106 ptr = strstr(str, search);
1107 if (ptr && (ptr[search_len] == 0))
1108 matched = 1;
1109 break;
1110 } 1511 }
1111 if (matched) { 1512 }
1112 if (not) 1513
1113 rec->flags &= ~flag; 1514 entry->ops = ops;
1114 else 1515 entry->ip = rec->ip;
1115 rec->flags |= flag; 1516
1517 key = hash_long(entry->ip, FTRACE_HASH_BITS);
1518 hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
1519
1520 } while_for_each_ftrace_rec();
1521 __enable_ftrace_function_probe();
1522
1523 out_unlock:
1524 mutex_unlock(&ftrace_lock);
1525
1526 return count;
1527}
1528
1529enum {
1530 PROBE_TEST_FUNC = 1,
1531 PROBE_TEST_DATA = 2
1532};
1533
1534static void
1535__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1536 void *data, int flags)
1537{
1538 struct ftrace_func_probe *entry;
1539 struct hlist_node *n, *tmp;
1540 char str[KSYM_SYMBOL_LEN];
1541 int type = MATCH_FULL;
1542 int i, len = 0;
1543 char *search;
1544
1545 if (glob && (strcmp(glob, "*") || !strlen(glob)))
1546 glob = NULL;
1547 else {
1548 int not;
1549
1550 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
1551 len = strlen(search);
1552
1553 /* we do not support '!' for function probes */
1554 if (WARN_ON(not))
1555 return;
1556 }
1557
1558 mutex_lock(&ftrace_lock);
1559 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1560 struct hlist_head *hhd = &ftrace_func_hash[i];
1561
1562 hlist_for_each_entry_safe(entry, n, tmp, hhd, node) {
1563
1564 /* break up if statements for readability */
1565 if ((flags & PROBE_TEST_FUNC) && entry->ops != ops)
1566 continue;
1567
1568 if ((flags & PROBE_TEST_DATA) && entry->data != data)
1569 continue;
1570
1571 /* do this last, since it is the most expensive */
1572 if (glob) {
1573 kallsyms_lookup(entry->ip, NULL, NULL,
1574 NULL, str);
1575 if (!ftrace_match(str, glob, len, type))
1576 continue;
1116 } 1577 }
1578
1579 hlist_del(&entry->node);
1580 call_rcu(&entry->rcu, ftrace_free_entry_rcu);
1581 }
1582 }
1583 __disable_ftrace_function_probe();
1584 mutex_unlock(&ftrace_lock);
1585}
1586
1587void
1588unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1589 void *data)
1590{
1591 __unregister_ftrace_function_probe(glob, ops, data,
1592 PROBE_TEST_FUNC | PROBE_TEST_DATA);
1593}
1594
1595void
1596unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops)
1597{
1598 __unregister_ftrace_function_probe(glob, ops, NULL, PROBE_TEST_FUNC);
1599}
1600
1601void unregister_ftrace_function_probe_all(char *glob)
1602{
1603 __unregister_ftrace_function_probe(glob, NULL, NULL, 0);
1604}
1605
1606static LIST_HEAD(ftrace_commands);
1607static DEFINE_MUTEX(ftrace_cmd_mutex);
1608
1609int register_ftrace_command(struct ftrace_func_command *cmd)
1610{
1611 struct ftrace_func_command *p;
1612 int ret = 0;
1613
1614 mutex_lock(&ftrace_cmd_mutex);
1615 list_for_each_entry(p, &ftrace_commands, list) {
1616 if (strcmp(cmd->name, p->name) == 0) {
1617 ret = -EBUSY;
1618 goto out_unlock;
1117 } 1619 }
1118 pg = pg->next;
1119 } 1620 }
1120 spin_unlock(&ftrace_lock); 1621 list_add(&cmd->list, &ftrace_commands);
1622 out_unlock:
1623 mutex_unlock(&ftrace_cmd_mutex);
1624
1625 return ret;
1626}
1627
1628int unregister_ftrace_command(struct ftrace_func_command *cmd)
1629{
1630 struct ftrace_func_command *p, *n;
1631 int ret = -ENODEV;
1632
1633 mutex_lock(&ftrace_cmd_mutex);
1634 list_for_each_entry_safe(p, n, &ftrace_commands, list) {
1635 if (strcmp(cmd->name, p->name) == 0) {
1636 ret = 0;
1637 list_del_init(&p->list);
1638 goto out_unlock;
1639 }
1640 }
1641 out_unlock:
1642 mutex_unlock(&ftrace_cmd_mutex);
1643
1644 return ret;
1645}
1646
1647static int ftrace_process_regex(char *buff, int len, int enable)
1648{
1649 char *func, *command, *next = buff;
1650 struct ftrace_func_command *p;
1651 int ret = -EINVAL;
1652
1653 func = strsep(&next, ":");
1654
1655 if (!next) {
1656 ftrace_match_records(func, len, enable);
1657 return 0;
1658 }
1659
1660 /* command found */
1661
1662 command = strsep(&next, ":");
1663
1664 mutex_lock(&ftrace_cmd_mutex);
1665 list_for_each_entry(p, &ftrace_commands, list) {
1666 if (strcmp(p->name, command) == 0) {
1667 ret = p->func(func, command, next, enable);
1668 goto out_unlock;
1669 }
1670 }
1671 out_unlock:
1672 mutex_unlock(&ftrace_cmd_mutex);
1673
1674 return ret;
1121} 1675}
1122 1676
1123static ssize_t 1677static ssize_t
@@ -1187,7 +1741,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
1187 if (isspace(ch)) { 1741 if (isspace(ch)) {
1188 iter->filtered++; 1742 iter->filtered++;
1189 iter->buffer[iter->buffer_idx] = 0; 1743 iter->buffer[iter->buffer_idx] = 0;
1190 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1744 ret = ftrace_process_regex(iter->buffer,
1745 iter->buffer_idx, enable);
1746 if (ret)
1747 goto out;
1191 iter->buffer_idx = 0; 1748 iter->buffer_idx = 0;
1192 } else 1749 } else
1193 iter->flags |= FTRACE_ITER_CONT; 1750 iter->flags |= FTRACE_ITER_CONT;
@@ -1226,7 +1783,7 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1226 if (reset) 1783 if (reset)
1227 ftrace_filter_reset(enable); 1784 ftrace_filter_reset(enable);
1228 if (buf) 1785 if (buf)
1229 ftrace_match(buf, len, enable); 1786 ftrace_match_records(buf, len, enable);
1230 mutex_unlock(&ftrace_regex_lock); 1787 mutex_unlock(&ftrace_regex_lock);
1231} 1788}
1232 1789
@@ -1276,15 +1833,13 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1276 if (iter->buffer_idx) { 1833 if (iter->buffer_idx) {
1277 iter->filtered++; 1834 iter->filtered++;
1278 iter->buffer[iter->buffer_idx] = 0; 1835 iter->buffer[iter->buffer_idx] = 0;
1279 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1836 ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
1280 } 1837 }
1281 1838
1282 mutex_lock(&ftrace_sysctl_lock); 1839 mutex_lock(&ftrace_lock);
1283 mutex_lock(&ftrace_start_lock);
1284 if (ftrace_start_up && ftrace_enabled) 1840 if (ftrace_start_up && ftrace_enabled)
1285 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1841 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1286 mutex_unlock(&ftrace_start_lock); 1842 mutex_unlock(&ftrace_lock);
1287 mutex_unlock(&ftrace_sysctl_lock);
1288 1843
1289 kfree(iter); 1844 kfree(iter);
1290 mutex_unlock(&ftrace_regex_lock); 1845 mutex_unlock(&ftrace_regex_lock);
@@ -1303,31 +1858,31 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
1303 return ftrace_regex_release(inode, file, 0); 1858 return ftrace_regex_release(inode, file, 0);
1304} 1859}
1305 1860
1306static struct file_operations ftrace_avail_fops = { 1861static const struct file_operations ftrace_avail_fops = {
1307 .open = ftrace_avail_open, 1862 .open = ftrace_avail_open,
1308 .read = seq_read, 1863 .read = seq_read,
1309 .llseek = seq_lseek, 1864 .llseek = seq_lseek,
1310 .release = ftrace_avail_release, 1865 .release = ftrace_avail_release,
1311}; 1866};
1312 1867
1313static struct file_operations ftrace_failures_fops = { 1868static const struct file_operations ftrace_failures_fops = {
1314 .open = ftrace_failures_open, 1869 .open = ftrace_failures_open,
1315 .read = seq_read, 1870 .read = seq_read,
1316 .llseek = seq_lseek, 1871 .llseek = seq_lseek,
1317 .release = ftrace_avail_release, 1872 .release = ftrace_avail_release,
1318}; 1873};
1319 1874
1320static struct file_operations ftrace_filter_fops = { 1875static const struct file_operations ftrace_filter_fops = {
1321 .open = ftrace_filter_open, 1876 .open = ftrace_filter_open,
1322 .read = ftrace_regex_read, 1877 .read = seq_read,
1323 .write = ftrace_filter_write, 1878 .write = ftrace_filter_write,
1324 .llseek = ftrace_regex_lseek, 1879 .llseek = ftrace_regex_lseek,
1325 .release = ftrace_filter_release, 1880 .release = ftrace_filter_release,
1326}; 1881};
1327 1882
1328static struct file_operations ftrace_notrace_fops = { 1883static const struct file_operations ftrace_notrace_fops = {
1329 .open = ftrace_notrace_open, 1884 .open = ftrace_notrace_open,
1330 .read = ftrace_regex_read, 1885 .read = seq_read,
1331 .write = ftrace_notrace_write, 1886 .write = ftrace_notrace_write,
1332 .llseek = ftrace_regex_lseek, 1887 .llseek = ftrace_regex_lseek,
1333 .release = ftrace_notrace_release, 1888 .release = ftrace_notrace_release,
@@ -1360,6 +1915,10 @@ static void *g_start(struct seq_file *m, loff_t *pos)
1360 1915
1361 mutex_lock(&graph_lock); 1916 mutex_lock(&graph_lock);
1362 1917
1918 /* Nothing, tell g_show to print all functions are enabled */
1919 if (!ftrace_graph_count && !*pos)
1920 return (void *)1;
1921
1363 p = g_next(m, p, pos); 1922 p = g_next(m, p, pos);
1364 1923
1365 return p; 1924 return p;
@@ -1378,6 +1937,11 @@ static int g_show(struct seq_file *m, void *v)
1378 if (!ptr) 1937 if (!ptr)
1379 return 0; 1938 return 0;
1380 1939
1940 if (ptr == (unsigned long *)1) {
1941 seq_printf(m, "#### all functions enabled ####\n");
1942 return 0;
1943 }
1944
1381 kallsyms_lookup(*ptr, NULL, NULL, NULL, str); 1945 kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
1382 1946
1383 seq_printf(m, "%s\n", str); 1947 seq_printf(m, "%s\n", str);
@@ -1420,53 +1984,53 @@ ftrace_graph_open(struct inode *inode, struct file *file)
1420 return ret; 1984 return ret;
1421} 1985}
1422 1986
1423static ssize_t
1424ftrace_graph_read(struct file *file, char __user *ubuf,
1425 size_t cnt, loff_t *ppos)
1426{
1427 if (file->f_mode & FMODE_READ)
1428 return seq_read(file, ubuf, cnt, ppos);
1429 else
1430 return -EPERM;
1431}
1432
1433static int 1987static int
1434ftrace_set_func(unsigned long *array, int idx, char *buffer) 1988ftrace_set_func(unsigned long *array, int *idx, char *buffer)
1435{ 1989{
1436 char str[KSYM_SYMBOL_LEN];
1437 struct dyn_ftrace *rec; 1990 struct dyn_ftrace *rec;
1438 struct ftrace_page *pg; 1991 struct ftrace_page *pg;
1992 int search_len;
1439 int found = 0; 1993 int found = 0;
1440 int i, j; 1994 int type, not;
1995 char *search;
1996 bool exists;
1997 int i;
1441 1998
1442 if (ftrace_disabled) 1999 if (ftrace_disabled)
1443 return -ENODEV; 2000 return -ENODEV;
1444 2001
1445 /* should not be called from interrupt context */ 2002 /* decode regex */
1446 spin_lock(&ftrace_lock); 2003 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not);
2004 if (not)
2005 return -EINVAL;
1447 2006
1448 for (pg = ftrace_pages_start; pg; pg = pg->next) { 2007 search_len = strlen(search);
1449 for (i = 0; i < pg->index; i++) {
1450 rec = &pg->records[i];
1451 2008
1452 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2009 mutex_lock(&ftrace_lock);
1453 continue; 2010 do_for_each_ftrace_rec(pg, rec) {
2011
2012 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2013 break;
1454 2014
1455 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 2015 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
1456 if (strcmp(str, buffer) == 0) { 2016 continue;
2017
2018 if (ftrace_match_record(rec, search, search_len, type)) {
2019 /* ensure it is not already in the array */
2020 exists = false;
2021 for (i = 0; i < *idx; i++)
2022 if (array[i] == rec->ip) {
2023 exists = true;
2024 break;
2025 }
2026 if (!exists) {
2027 array[(*idx)++] = rec->ip;
1457 found = 1; 2028 found = 1;
1458 for (j = 0; j < idx; j++)
1459 if (array[j] == rec->ip) {
1460 found = 0;
1461 break;
1462 }
1463 if (found)
1464 array[idx] = rec->ip;
1465 break;
1466 } 2029 }
1467 } 2030 }
1468 } 2031 } while_for_each_ftrace_rec();
1469 spin_unlock(&ftrace_lock); 2032
2033 mutex_unlock(&ftrace_lock);
1470 2034
1471 return found ? 0 : -EINVAL; 2035 return found ? 0 : -EINVAL;
1472} 2036}
@@ -1534,13 +2098,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
1534 } 2098 }
1535 buffer[index] = 0; 2099 buffer[index] = 0;
1536 2100
1537 /* we allow only one at a time */ 2101 /* we allow only one expression at a time */
1538 ret = ftrace_set_func(array, ftrace_graph_count, buffer); 2102 ret = ftrace_set_func(array, &ftrace_graph_count, buffer);
1539 if (ret) 2103 if (ret)
1540 goto out; 2104 goto out;
1541 2105
1542 ftrace_graph_count++;
1543
1544 file->f_pos += read; 2106 file->f_pos += read;
1545 2107
1546 ret = read; 2108 ret = read;
@@ -1552,7 +2114,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
1552 2114
1553static const struct file_operations ftrace_graph_fops = { 2115static const struct file_operations ftrace_graph_fops = {
1554 .open = ftrace_graph_open, 2116 .open = ftrace_graph_open,
1555 .read = ftrace_graph_read, 2117 .read = seq_read,
1556 .write = ftrace_graph_write, 2118 .write = ftrace_graph_write,
1557}; 2119};
1558#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2120#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -1604,7 +2166,7 @@ static int ftrace_convert_nops(struct module *mod,
1604 unsigned long addr; 2166 unsigned long addr;
1605 unsigned long flags; 2167 unsigned long flags;
1606 2168
1607 mutex_lock(&ftrace_start_lock); 2169 mutex_lock(&ftrace_lock);
1608 p = start; 2170 p = start;
1609 while (p < end) { 2171 while (p < end) {
1610 addr = ftrace_call_adjust(*p++); 2172 addr = ftrace_call_adjust(*p++);
@@ -1623,7 +2185,7 @@ static int ftrace_convert_nops(struct module *mod,
1623 local_irq_save(flags); 2185 local_irq_save(flags);
1624 ftrace_update_code(mod); 2186 ftrace_update_code(mod);
1625 local_irq_restore(flags); 2187 local_irq_restore(flags);
1626 mutex_unlock(&ftrace_start_lock); 2188 mutex_unlock(&ftrace_lock);
1627 2189
1628 return 0; 2190 return 0;
1629} 2191}
@@ -1796,7 +2358,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
1796 if (ret < 0) 2358 if (ret < 0)
1797 return ret; 2359 return ret;
1798 2360
1799 mutex_lock(&ftrace_start_lock); 2361 mutex_lock(&ftrace_lock);
1800 if (val < 0) { 2362 if (val < 0) {
1801 /* disable pid tracing */ 2363 /* disable pid tracing */
1802 if (!ftrace_pid_trace) 2364 if (!ftrace_pid_trace)
@@ -1835,12 +2397,12 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
1835 ftrace_startup_enable(0); 2397 ftrace_startup_enable(0);
1836 2398
1837 out: 2399 out:
1838 mutex_unlock(&ftrace_start_lock); 2400 mutex_unlock(&ftrace_lock);
1839 2401
1840 return cnt; 2402 return cnt;
1841} 2403}
1842 2404
1843static struct file_operations ftrace_pid_fops = { 2405static const struct file_operations ftrace_pid_fops = {
1844 .read = ftrace_pid_read, 2406 .read = ftrace_pid_read,
1845 .write = ftrace_pid_write, 2407 .write = ftrace_pid_write,
1846}; 2408};
@@ -1863,7 +2425,6 @@ static __init int ftrace_init_debugfs(void)
1863 "'set_ftrace_pid' entry\n"); 2425 "'set_ftrace_pid' entry\n");
1864 return 0; 2426 return 0;
1865} 2427}
1866
1867fs_initcall(ftrace_init_debugfs); 2428fs_initcall(ftrace_init_debugfs);
1868 2429
1869/** 2430/**
@@ -1898,17 +2459,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
1898 if (unlikely(ftrace_disabled)) 2459 if (unlikely(ftrace_disabled))
1899 return -1; 2460 return -1;
1900 2461
1901 mutex_lock(&ftrace_sysctl_lock); 2462 mutex_lock(&ftrace_lock);
1902 2463
1903 ret = __register_ftrace_function(ops); 2464 ret = __register_ftrace_function(ops);
1904 ftrace_startup(0); 2465 ftrace_startup(0);
1905 2466
1906 mutex_unlock(&ftrace_sysctl_lock); 2467 mutex_unlock(&ftrace_lock);
1907 return ret; 2468 return ret;
1908} 2469}
1909 2470
1910/** 2471/**
1911 * unregister_ftrace_function - unresgister a function for profiling. 2472 * unregister_ftrace_function - unregister a function for profiling.
1912 * @ops - ops structure that holds the function to unregister 2473 * @ops - ops structure that holds the function to unregister
1913 * 2474 *
1914 * Unregister a function that was added to be called by ftrace profiling. 2475 * Unregister a function that was added to be called by ftrace profiling.
@@ -1917,10 +2478,10 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
1917{ 2478{
1918 int ret; 2479 int ret;
1919 2480
1920 mutex_lock(&ftrace_sysctl_lock); 2481 mutex_lock(&ftrace_lock);
1921 ret = __unregister_ftrace_function(ops); 2482 ret = __unregister_ftrace_function(ops);
1922 ftrace_shutdown(0); 2483 ftrace_shutdown(0);
1923 mutex_unlock(&ftrace_sysctl_lock); 2484 mutex_unlock(&ftrace_lock);
1924 2485
1925 return ret; 2486 return ret;
1926} 2487}
@@ -1935,7 +2496,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1935 if (unlikely(ftrace_disabled)) 2496 if (unlikely(ftrace_disabled))
1936 return -ENODEV; 2497 return -ENODEV;
1937 2498
1938 mutex_lock(&ftrace_sysctl_lock); 2499 mutex_lock(&ftrace_lock);
1939 2500
1940 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 2501 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1941 2502
@@ -1964,7 +2525,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1964 } 2525 }
1965 2526
1966 out: 2527 out:
1967 mutex_unlock(&ftrace_sysctl_lock); 2528 mutex_unlock(&ftrace_lock);
1968 return ret; 2529 return ret;
1969} 2530}
1970 2531
@@ -2080,7 +2641,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2080{ 2641{
2081 int ret = 0; 2642 int ret = 0;
2082 2643
2083 mutex_lock(&ftrace_sysctl_lock); 2644 mutex_lock(&ftrace_lock);
2084 2645
2085 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; 2646 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2086 register_pm_notifier(&ftrace_suspend_notifier); 2647 register_pm_notifier(&ftrace_suspend_notifier);
@@ -2098,13 +2659,13 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2098 ftrace_startup(FTRACE_START_FUNC_RET); 2659 ftrace_startup(FTRACE_START_FUNC_RET);
2099 2660
2100out: 2661out:
2101 mutex_unlock(&ftrace_sysctl_lock); 2662 mutex_unlock(&ftrace_lock);
2102 return ret; 2663 return ret;
2103} 2664}
2104 2665
2105void unregister_ftrace_graph(void) 2666void unregister_ftrace_graph(void)
2106{ 2667{
2107 mutex_lock(&ftrace_sysctl_lock); 2668 mutex_lock(&ftrace_lock);
2108 2669
2109 atomic_dec(&ftrace_graph_active); 2670 atomic_dec(&ftrace_graph_active);
2110 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2671 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
@@ -2112,7 +2673,7 @@ void unregister_ftrace_graph(void)
2112 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2673 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2113 unregister_pm_notifier(&ftrace_suspend_notifier); 2674 unregister_pm_notifier(&ftrace_suspend_notifier);
2114 2675
2115 mutex_unlock(&ftrace_sysctl_lock); 2676 mutex_unlock(&ftrace_lock);
2116} 2677}
2117 2678
2118/* Allocate a return stack for newly created task */ 2679/* Allocate a return stack for newly created task */
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644
index 000000000000..ae201b3eda89
--- /dev/null
+++ b/kernel/trace/kmemtrace.c
@@ -0,0 +1,339 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/dcache.h>
10#include <linux/debugfs.h>
11#include <linux/fs.h>
12#include <linux/seq_file.h>
13#include <trace/kmemtrace.h>
14
15#include "trace.h"
16#include "trace_output.h"
17
18/* Select an alternative, minimalistic output than the original one */
19#define TRACE_KMEM_OPT_MINIMAL 0x1
20
21static struct tracer_opt kmem_opts[] = {
22 /* Default disable the minimalistic output */
23 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
24 { }
25};
26
27static struct tracer_flags kmem_tracer_flags = {
28 .val = 0,
29 .opts = kmem_opts
30};
31
32
33static bool kmem_tracing_enabled __read_mostly;
34static struct trace_array *kmemtrace_array;
35
36static int kmem_trace_init(struct trace_array *tr)
37{
38 int cpu;
39 kmemtrace_array = tr;
40
41 for_each_cpu_mask(cpu, cpu_possible_map)
42 tracing_reset(tr, cpu);
43
44 kmem_tracing_enabled = true;
45
46 return 0;
47}
48
49static void kmem_trace_reset(struct trace_array *tr)
50{
51 kmem_tracing_enabled = false;
52}
53
54static void kmemtrace_headers(struct seq_file *s)
55{
56 /* Don't need headers for the original kmemtrace output */
57 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
58 return;
59
60 seq_printf(s, "#\n");
61 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
62 " POINTER NODE CALLER\n");
63 seq_printf(s, "# FREE | | | | "
64 " | | | |\n");
65 seq_printf(s, "# |\n\n");
66}
67
68/*
69 * The two following functions give the original output from kmemtrace,
70 * or something close to....perhaps they need some missing things
71 */
72static enum print_line_t
73kmemtrace_print_alloc_original(struct trace_iterator *iter,
74 struct kmemtrace_alloc_entry *entry)
75{
76 struct trace_seq *s = &iter->seq;
77 int ret;
78
79 /* Taken from the old linux/kmemtrace.h */
80 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
81 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
82 entry->type_id, entry->call_site, (unsigned long) entry->ptr,
83 (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
84 (unsigned long) entry->gfp_flags, entry->node);
85
86 if (!ret)
87 return TRACE_TYPE_PARTIAL_LINE;
88
89 return TRACE_TYPE_HANDLED;
90}
91
92static enum print_line_t
93kmemtrace_print_free_original(struct trace_iterator *iter,
94 struct kmemtrace_free_entry *entry)
95{
96 struct trace_seq *s = &iter->seq;
97 int ret;
98
99 /* Taken from the old linux/kmemtrace.h */
100 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
101 entry->type_id, entry->call_site, (unsigned long) entry->ptr);
102
103 if (!ret)
104 return TRACE_TYPE_PARTIAL_LINE;
105
106 return TRACE_TYPE_HANDLED;
107}
108
109
110/* The two other following provide a more minimalistic output */
111static enum print_line_t
112kmemtrace_print_alloc_compress(struct trace_iterator *iter,
113 struct kmemtrace_alloc_entry *entry)
114{
115 struct trace_seq *s = &iter->seq;
116 int ret;
117
118 /* Alloc entry */
119 ret = trace_seq_printf(s, " + ");
120 if (!ret)
121 return TRACE_TYPE_PARTIAL_LINE;
122
123 /* Type */
124 switch (entry->type_id) {
125 case KMEMTRACE_TYPE_KMALLOC:
126 ret = trace_seq_printf(s, "K ");
127 break;
128 case KMEMTRACE_TYPE_CACHE:
129 ret = trace_seq_printf(s, "C ");
130 break;
131 case KMEMTRACE_TYPE_PAGES:
132 ret = trace_seq_printf(s, "P ");
133 break;
134 default:
135 ret = trace_seq_printf(s, "? ");
136 }
137
138 if (!ret)
139 return TRACE_TYPE_PARTIAL_LINE;
140
141 /* Requested */
142 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
143 if (!ret)
144 return TRACE_TYPE_PARTIAL_LINE;
145
146 /* Allocated */
147 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
148 if (!ret)
149 return TRACE_TYPE_PARTIAL_LINE;
150
151 /* Flags
152 * TODO: would be better to see the name of the GFP flag names
153 */
154 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
155 if (!ret)
156 return TRACE_TYPE_PARTIAL_LINE;
157
158 /* Pointer to allocated */
159 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
160 if (!ret)
161 return TRACE_TYPE_PARTIAL_LINE;
162
163 /* Node */
164 ret = trace_seq_printf(s, "%4d ", entry->node);
165 if (!ret)
166 return TRACE_TYPE_PARTIAL_LINE;
167
168 /* Call site */
169 ret = seq_print_ip_sym(s, entry->call_site, 0);
170 if (!ret)
171 return TRACE_TYPE_PARTIAL_LINE;
172
173 if (!trace_seq_printf(s, "\n"))
174 return TRACE_TYPE_PARTIAL_LINE;
175
176 return TRACE_TYPE_HANDLED;
177}
178
179static enum print_line_t
180kmemtrace_print_free_compress(struct trace_iterator *iter,
181 struct kmemtrace_free_entry *entry)
182{
183 struct trace_seq *s = &iter->seq;
184 int ret;
185
186 /* Free entry */
187 ret = trace_seq_printf(s, " - ");
188 if (!ret)
189 return TRACE_TYPE_PARTIAL_LINE;
190
191 /* Type */
192 switch (entry->type_id) {
193 case KMEMTRACE_TYPE_KMALLOC:
194 ret = trace_seq_printf(s, "K ");
195 break;
196 case KMEMTRACE_TYPE_CACHE:
197 ret = trace_seq_printf(s, "C ");
198 break;
199 case KMEMTRACE_TYPE_PAGES:
200 ret = trace_seq_printf(s, "P ");
201 break;
202 default:
203 ret = trace_seq_printf(s, "? ");
204 }
205
206 if (!ret)
207 return TRACE_TYPE_PARTIAL_LINE;
208
209 /* Skip requested/allocated/flags */
210 ret = trace_seq_printf(s, " ");
211 if (!ret)
212 return TRACE_TYPE_PARTIAL_LINE;
213
214 /* Pointer to allocated */
215 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
216 if (!ret)
217 return TRACE_TYPE_PARTIAL_LINE;
218
219 /* Skip node */
220 ret = trace_seq_printf(s, " ");
221 if (!ret)
222 return TRACE_TYPE_PARTIAL_LINE;
223
224 /* Call site */
225 ret = seq_print_ip_sym(s, entry->call_site, 0);
226 if (!ret)
227 return TRACE_TYPE_PARTIAL_LINE;
228
229 if (!trace_seq_printf(s, "\n"))
230 return TRACE_TYPE_PARTIAL_LINE;
231
232 return TRACE_TYPE_HANDLED;
233}
234
235static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
236{
237 struct trace_entry *entry = iter->ent;
238
239 switch (entry->type) {
240 case TRACE_KMEM_ALLOC: {
241 struct kmemtrace_alloc_entry *field;
242 trace_assign_type(field, entry);
243 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
244 return kmemtrace_print_alloc_compress(iter, field);
245 else
246 return kmemtrace_print_alloc_original(iter, field);
247 }
248
249 case TRACE_KMEM_FREE: {
250 struct kmemtrace_free_entry *field;
251 trace_assign_type(field, entry);
252 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
253 return kmemtrace_print_free_compress(iter, field);
254 else
255 return kmemtrace_print_free_original(iter, field);
256 }
257
258 default:
259 return TRACE_TYPE_UNHANDLED;
260 }
261}
262
263/* Trace allocations */
264void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
265 unsigned long call_site,
266 const void *ptr,
267 size_t bytes_req,
268 size_t bytes_alloc,
269 gfp_t gfp_flags,
270 int node)
271{
272 struct ring_buffer_event *event;
273 struct kmemtrace_alloc_entry *entry;
274 struct trace_array *tr = kmemtrace_array;
275
276 if (!kmem_tracing_enabled)
277 return;
278
279 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
280 sizeof(*entry), 0, 0);
281 if (!event)
282 return;
283 entry = ring_buffer_event_data(event);
284
285 entry->call_site = call_site;
286 entry->ptr = ptr;
287 entry->bytes_req = bytes_req;
288 entry->bytes_alloc = bytes_alloc;
289 entry->gfp_flags = gfp_flags;
290 entry->node = node;
291
292 trace_buffer_unlock_commit(tr, event, 0, 0);
293}
294EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
295
296void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
297 unsigned long call_site,
298 const void *ptr)
299{
300 struct ring_buffer_event *event;
301 struct kmemtrace_free_entry *entry;
302 struct trace_array *tr = kmemtrace_array;
303
304 if (!kmem_tracing_enabled)
305 return;
306
307 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
308 sizeof(*entry), 0, 0);
309 if (!event)
310 return;
311 entry = ring_buffer_event_data(event);
312 entry->type_id = type_id;
313 entry->call_site = call_site;
314 entry->ptr = ptr;
315
316 trace_buffer_unlock_commit(tr, event, 0, 0);
317}
318EXPORT_SYMBOL(kmemtrace_mark_free);
319
320static struct tracer kmem_tracer __read_mostly = {
321 .name = "kmemtrace",
322 .init = kmem_trace_init,
323 .reset = kmem_trace_reset,
324 .print_line = kmemtrace_print_line,
325 .print_header = kmemtrace_headers,
326 .flags = &kmem_tracer_flags
327};
328
329void kmemtrace_init(void)
330{
331 /* earliest opportunity to start kmem tracing */
332}
333
334static int __init init_kmem_tracer(void)
335{
336 return register_tracer(&kmem_tracer);
337}
338
339device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd38c5cfd8ad..384ca5d9d729 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,21 +4,92 @@
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h>
8#include <linux/ftrace_irq.h>
7#include <linux/spinlock.h> 9#include <linux/spinlock.h>
8#include <linux/debugfs.h> 10#include <linux/debugfs.h>
9#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/hardirq.h>
10#include <linux/module.h> 13#include <linux/module.h>
11#include <linux/percpu.h> 14#include <linux/percpu.h>
12#include <linux/mutex.h> 15#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/hash.h> 17#include <linux/hash.h>
16#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/cpu.h>
17#include <linux/fs.h> 20#include <linux/fs.h>
18 21
19#include "trace.h" 22#include "trace.h"
20 23
21/* 24/*
25 * The ring buffer is made up of a list of pages. A separate list of pages is
26 * allocated for each CPU. A writer may only write to a buffer that is
27 * associated with the CPU it is currently executing on. A reader may read
28 * from any per cpu buffer.
29 *
30 * The reader is special. For each per cpu buffer, the reader has its own
31 * reader page. When a reader has read the entire reader page, this reader
32 * page is swapped with another page in the ring buffer.
33 *
34 * Now, as long as the writer is off the reader page, the reader can do what
35 * ever it wants with that page. The writer will never write to that page
36 * again (as long as it is out of the ring buffer).
37 *
38 * Here's some silly ASCII art.
39 *
40 * +------+
41 * |reader| RING BUFFER
42 * |page |
43 * +------+ +---+ +---+ +---+
44 * | |-->| |-->| |
45 * +---+ +---+ +---+
46 * ^ |
47 * | |
48 * +---------------+
49 *
50 *
51 * +------+
52 * |reader| RING BUFFER
53 * |page |------------------v
54 * +------+ +---+ +---+ +---+
55 * | |-->| |-->| |
56 * +---+ +---+ +---+
57 * ^ |
58 * | |
59 * +---------------+
60 *
61 *
62 * +------+
63 * |reader| RING BUFFER
64 * |page |------------------v
65 * +------+ +---+ +---+ +---+
66 * ^ | |-->| |-->| |
67 * | +---+ +---+ +---+
68 * | |
69 * | |
70 * +------------------------------+
71 *
72 *
73 * +------+
74 * |buffer| RING BUFFER
75 * |page |------------------v
76 * +------+ +---+ +---+ +---+
77 * ^ | | | |-->| |
78 * | New +---+ +---+ +---+
79 * | Reader------^ |
80 * | page |
81 * +------------------------------+
82 *
83 *
84 * After we make this swap, the reader can hand this page off to the splice
85 * code and be done with it. It can even allocate a new page if it needs to
86 * and swap that into the ring buffer.
87 *
88 * We will be using cmpxchg soon to make all this lockless.
89 *
90 */
91
92/*
22 * A fast way to enable or disable all ring buffers is to 93 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers 94 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to. 95 * prevents all ring buffers from being recorded to.
@@ -57,7 +128,9 @@ enum {
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, 128 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58}; 129};
59 130
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 131static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
132
133#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
61 134
62/** 135/**
63 * tracing_on - enable all tracing buffers 136 * tracing_on - enable all tracing buffers
@@ -89,42 +162,26 @@ EXPORT_SYMBOL_GPL(tracing_off);
89 * tracing_off_permanent - permanently disable ring buffers 162 * tracing_off_permanent - permanently disable ring buffers
90 * 163 *
91 * This function, once called, will disable all ring buffers 164 * This function, once called, will disable all ring buffers
92 * permanenty. 165 * permanently.
93 */ 166 */
94void tracing_off_permanent(void) 167void tracing_off_permanent(void)
95{ 168{
96 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 169 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
97} 170}
98 171
99#include "trace.h" 172/**
100 173 * tracing_is_on - show state of ring buffers enabled
101/* Up this if you want to test the TIME_EXTENTS and normalization */ 174 */
102#define DEBUG_SHIFT 0 175int tracing_is_on(void)
103
104/* FIXME!!! */
105u64 ring_buffer_time_stamp(int cpu)
106{ 176{
107 u64 time; 177 return ring_buffer_flags == RB_BUFFERS_ON;
108
109 preempt_disable_notrace();
110 /* shift to debug/test normalization and TIME_EXTENTS */
111 time = sched_clock() << DEBUG_SHIFT;
112 preempt_enable_no_resched_notrace();
113
114 return time;
115} 178}
116EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); 179EXPORT_SYMBOL_GPL(tracing_is_on);
117 180
118void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 181#include "trace.h"
119{
120 /* Just stupid testing the normalize function and deltas */
121 *ts >>= DEBUG_SHIFT;
122}
123EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
124 182
125#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 183#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
126#define RB_ALIGNMENT_SHIFT 2 184#define RB_ALIGNMENT 4U
127#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
128#define RB_MAX_SMALL_DATA 28 185#define RB_MAX_SMALL_DATA 28
129 186
130enum { 187enum {
@@ -133,7 +190,7 @@ enum {
133}; 190};
134 191
135/* inline for ring buffer fast paths */ 192/* inline for ring buffer fast paths */
136static inline unsigned 193static unsigned
137rb_event_length(struct ring_buffer_event *event) 194rb_event_length(struct ring_buffer_event *event)
138{ 195{
139 unsigned length; 196 unsigned length;
@@ -151,7 +208,7 @@ rb_event_length(struct ring_buffer_event *event)
151 208
152 case RINGBUF_TYPE_DATA: 209 case RINGBUF_TYPE_DATA:
153 if (event->len) 210 if (event->len)
154 length = event->len << RB_ALIGNMENT_SHIFT; 211 length = event->len * RB_ALIGNMENT;
155 else 212 else
156 length = event->array[0]; 213 length = event->array[0];
157 return length + RB_EVNT_HDR_SIZE; 214 return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +236,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
179EXPORT_SYMBOL_GPL(ring_buffer_event_length); 236EXPORT_SYMBOL_GPL(ring_buffer_event_length);
180 237
181/* inline for ring buffer fast paths */ 238/* inline for ring buffer fast paths */
182static inline void * 239static void *
183rb_event_data(struct ring_buffer_event *event) 240rb_event_data(struct ring_buffer_event *event)
184{ 241{
185 BUG_ON(event->type != RINGBUF_TYPE_DATA); 242 BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -209,7 +266,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
209 266
210struct buffer_data_page { 267struct buffer_data_page {
211 u64 time_stamp; /* page time stamp */ 268 u64 time_stamp; /* page time stamp */
212 local_t commit; /* write commited index */ 269 local_t commit; /* write committed index */
213 unsigned char data[]; /* data of buffer page */ 270 unsigned char data[]; /* data of buffer page */
214}; 271};
215 272
@@ -225,14 +282,25 @@ static void rb_init_page(struct buffer_data_page *bpage)
225 local_set(&bpage->commit, 0); 282 local_set(&bpage->commit, 0);
226} 283}
227 284
285/**
286 * ring_buffer_page_len - the size of data on the page.
287 * @page: The page to read
288 *
289 * Returns the amount of data on the page, including buffer page header.
290 */
291size_t ring_buffer_page_len(void *page)
292{
293 return local_read(&((struct buffer_data_page *)page)->commit)
294 + BUF_PAGE_HDR_SIZE;
295}
296
228/* 297/*
229 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 298 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
230 * this issue out. 299 * this issue out.
231 */ 300 */
232static inline void free_buffer_page(struct buffer_page *bpage) 301static void free_buffer_page(struct buffer_page *bpage)
233{ 302{
234 if (bpage->page) 303 free_page((unsigned long)bpage->page);
235 free_page((unsigned long)bpage->page);
236 kfree(bpage); 304 kfree(bpage);
237} 305}
238 306
@@ -246,7 +314,7 @@ static inline int test_time_stamp(u64 delta)
246 return 0; 314 return 0;
247} 315}
248 316
249#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) 317#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
250 318
251/* 319/*
252 * head_page == tail_page && head == tail then buffer is empty. 320 * head_page == tail_page && head == tail then buffer is empty.
@@ -260,7 +328,7 @@ struct ring_buffer_per_cpu {
260 struct list_head pages; 328 struct list_head pages;
261 struct buffer_page *head_page; /* read from head */ 329 struct buffer_page *head_page; /* read from head */
262 struct buffer_page *tail_page; /* write to tail */ 330 struct buffer_page *tail_page; /* write to tail */
263 struct buffer_page *commit_page; /* commited pages */ 331 struct buffer_page *commit_page; /* committed pages */
264 struct buffer_page *reader_page; 332 struct buffer_page *reader_page;
265 unsigned long overrun; 333 unsigned long overrun;
266 unsigned long entries; 334 unsigned long entries;
@@ -273,12 +341,17 @@ struct ring_buffer {
273 unsigned pages; 341 unsigned pages;
274 unsigned flags; 342 unsigned flags;
275 int cpus; 343 int cpus;
276 cpumask_var_t cpumask;
277 atomic_t record_disabled; 344 atomic_t record_disabled;
345 cpumask_var_t cpumask;
278 346
279 struct mutex mutex; 347 struct mutex mutex;
280 348
281 struct ring_buffer_per_cpu **buffers; 349 struct ring_buffer_per_cpu **buffers;
350
351#ifdef CONFIG_HOTPLUG_CPU
352 struct notifier_block cpu_notify;
353#endif
354 u64 (*clock)(void);
282}; 355};
283 356
284struct ring_buffer_iter { 357struct ring_buffer_iter {
@@ -299,11 +372,35 @@ struct ring_buffer_iter {
299 _____ret; \ 372 _____ret; \
300 }) 373 })
301 374
375/* Up this if you want to test the TIME_EXTENTS and normalization */
376#define DEBUG_SHIFT 0
377
378u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
379{
380 u64 time;
381
382 preempt_disable_notrace();
383 /* shift to debug/test normalization and TIME_EXTENTS */
384 time = buffer->clock() << DEBUG_SHIFT;
385 preempt_enable_no_resched_notrace();
386
387 return time;
388}
389EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
390
391void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
392 int cpu, u64 *ts)
393{
394 /* Just stupid testing the normalize function and deltas */
395 *ts >>= DEBUG_SHIFT;
396}
397EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
398
302/** 399/**
303 * check_pages - integrity check of buffer pages 400 * check_pages - integrity check of buffer pages
304 * @cpu_buffer: CPU buffer with pages to test 401 * @cpu_buffer: CPU buffer with pages to test
305 * 402 *
306 * As a safty measure we check to make sure the data pages have not 403 * As a safety measure we check to make sure the data pages have not
307 * been corrupted. 404 * been corrupted.
308 */ 405 */
309static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 406static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
@@ -437,6 +534,11 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
437 */ 534 */
438extern int ring_buffer_page_too_big(void); 535extern int ring_buffer_page_too_big(void);
439 536
537#ifdef CONFIG_HOTPLUG_CPU
538static int __cpuinit rb_cpu_notify(struct notifier_block *self,
539 unsigned long action, void *hcpu);
540#endif
541
440/** 542/**
441 * ring_buffer_alloc - allocate a new ring_buffer 543 * ring_buffer_alloc - allocate a new ring_buffer
442 * @size: the size in bytes per cpu that is needed. 544 * @size: the size in bytes per cpu that is needed.
@@ -469,12 +571,23 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
469 571
470 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 572 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
471 buffer->flags = flags; 573 buffer->flags = flags;
574 buffer->clock = trace_clock_local;
472 575
473 /* need at least two pages */ 576 /* need at least two pages */
474 if (buffer->pages == 1) 577 if (buffer->pages == 1)
475 buffer->pages++; 578 buffer->pages++;
476 579
580 /*
581 * In case of non-hotplug cpu, if the ring-buffer is allocated
582 * in early initcall, it will not be notified of secondary cpus.
583 * In that off case, we need to allocate for all possible cpus.
584 */
585#ifdef CONFIG_HOTPLUG_CPU
586 get_online_cpus();
587 cpumask_copy(buffer->cpumask, cpu_online_mask);
588#else
477 cpumask_copy(buffer->cpumask, cpu_possible_mask); 589 cpumask_copy(buffer->cpumask, cpu_possible_mask);
590#endif
478 buffer->cpus = nr_cpu_ids; 591 buffer->cpus = nr_cpu_ids;
479 592
480 bsize = sizeof(void *) * nr_cpu_ids; 593 bsize = sizeof(void *) * nr_cpu_ids;
@@ -490,6 +603,13 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
490 goto fail_free_buffers; 603 goto fail_free_buffers;
491 } 604 }
492 605
606#ifdef CONFIG_HOTPLUG_CPU
607 buffer->cpu_notify.notifier_call = rb_cpu_notify;
608 buffer->cpu_notify.priority = 0;
609 register_cpu_notifier(&buffer->cpu_notify);
610#endif
611
612 put_online_cpus();
493 mutex_init(&buffer->mutex); 613 mutex_init(&buffer->mutex);
494 614
495 return buffer; 615 return buffer;
@@ -503,6 +623,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
503 623
504 fail_free_cpumask: 624 fail_free_cpumask:
505 free_cpumask_var(buffer->cpumask); 625 free_cpumask_var(buffer->cpumask);
626 put_online_cpus();
506 627
507 fail_free_buffer: 628 fail_free_buffer:
508 kfree(buffer); 629 kfree(buffer);
@@ -519,15 +640,29 @@ ring_buffer_free(struct ring_buffer *buffer)
519{ 640{
520 int cpu; 641 int cpu;
521 642
643 get_online_cpus();
644
645#ifdef CONFIG_HOTPLUG_CPU
646 unregister_cpu_notifier(&buffer->cpu_notify);
647#endif
648
522 for_each_buffer_cpu(buffer, cpu) 649 for_each_buffer_cpu(buffer, cpu)
523 rb_free_cpu_buffer(buffer->buffers[cpu]); 650 rb_free_cpu_buffer(buffer->buffers[cpu]);
524 651
652 put_online_cpus();
653
525 free_cpumask_var(buffer->cpumask); 654 free_cpumask_var(buffer->cpumask);
526 655
527 kfree(buffer); 656 kfree(buffer);
528} 657}
529EXPORT_SYMBOL_GPL(ring_buffer_free); 658EXPORT_SYMBOL_GPL(ring_buffer_free);
530 659
660void ring_buffer_set_clock(struct ring_buffer *buffer,
661 u64 (*clock)(void))
662{
663 buffer->clock = clock;
664}
665
531static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 666static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
532 667
533static void 668static void
@@ -627,16 +762,15 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
627 return size; 762 return size;
628 763
629 mutex_lock(&buffer->mutex); 764 mutex_lock(&buffer->mutex);
765 get_online_cpus();
630 766
631 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 767 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
632 768
633 if (size < buffer_size) { 769 if (size < buffer_size) {
634 770
635 /* easy case, just free pages */ 771 /* easy case, just free pages */
636 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) { 772 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
637 mutex_unlock(&buffer->mutex); 773 goto out_fail;
638 return -1;
639 }
640 774
641 rm_pages = buffer->pages - nr_pages; 775 rm_pages = buffer->pages - nr_pages;
642 776
@@ -655,10 +789,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
655 * add these pages to the cpu_buffers. Otherwise we just free 789 * add these pages to the cpu_buffers. Otherwise we just free
656 * them all and return -ENOMEM; 790 * them all and return -ENOMEM;
657 */ 791 */
658 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) { 792 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
659 mutex_unlock(&buffer->mutex); 793 goto out_fail;
660 return -1;
661 }
662 794
663 new_pages = nr_pages - buffer->pages; 795 new_pages = nr_pages - buffer->pages;
664 796
@@ -683,13 +815,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
683 rb_insert_pages(cpu_buffer, &pages, new_pages); 815 rb_insert_pages(cpu_buffer, &pages, new_pages);
684 } 816 }
685 817
686 if (RB_WARN_ON(buffer, !list_empty(&pages))) { 818 if (RB_WARN_ON(buffer, !list_empty(&pages)))
687 mutex_unlock(&buffer->mutex); 819 goto out_fail;
688 return -1;
689 }
690 820
691 out: 821 out:
692 buffer->pages = nr_pages; 822 buffer->pages = nr_pages;
823 put_online_cpus();
693 mutex_unlock(&buffer->mutex); 824 mutex_unlock(&buffer->mutex);
694 825
695 return size; 826 return size;
@@ -699,8 +830,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
699 list_del_init(&bpage->list); 830 list_del_init(&bpage->list);
700 free_buffer_page(bpage); 831 free_buffer_page(bpage);
701 } 832 }
833 put_online_cpus();
702 mutex_unlock(&buffer->mutex); 834 mutex_unlock(&buffer->mutex);
703 return -ENOMEM; 835 return -ENOMEM;
836
837 /*
838 * Something went totally wrong, and we are too paranoid
839 * to even clean up the mess.
840 */
841 out_fail:
842 put_online_cpus();
843 mutex_unlock(&buffer->mutex);
844 return -1;
704} 845}
705EXPORT_SYMBOL_GPL(ring_buffer_resize); 846EXPORT_SYMBOL_GPL(ring_buffer_resize);
706 847
@@ -811,7 +952,7 @@ rb_event_index(struct ring_buffer_event *event)
811 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 952 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
812} 953}
813 954
814static inline int 955static int
815rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 956rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
816 struct ring_buffer_event *event) 957 struct ring_buffer_event *event)
817{ 958{
@@ -825,7 +966,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
825 rb_commit_index(cpu_buffer) == index; 966 rb_commit_index(cpu_buffer) == index;
826} 967}
827 968
828static inline void 969static void
829rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, 970rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
830 struct ring_buffer_event *event) 971 struct ring_buffer_event *event)
831{ 972{
@@ -850,7 +991,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
850 local_set(&cpu_buffer->commit_page->page->commit, index); 991 local_set(&cpu_buffer->commit_page->page->commit, index);
851} 992}
852 993
853static inline void 994static void
854rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 995rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
855{ 996{
856 /* 997 /*
@@ -896,7 +1037,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
896 cpu_buffer->reader_page->read = 0; 1037 cpu_buffer->reader_page->read = 0;
897} 1038}
898 1039
899static inline void rb_inc_iter(struct ring_buffer_iter *iter) 1040static void rb_inc_iter(struct ring_buffer_iter *iter)
900{ 1041{
901 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1042 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
902 1043
@@ -926,7 +1067,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
926 * and with this, we can determine what to place into the 1067 * and with this, we can determine what to place into the
927 * data field. 1068 * data field.
928 */ 1069 */
929static inline void 1070static void
930rb_update_event(struct ring_buffer_event *event, 1071rb_update_event(struct ring_buffer_event *event,
931 unsigned type, unsigned length) 1072 unsigned type, unsigned length)
932{ 1073{
@@ -938,15 +1079,11 @@ rb_update_event(struct ring_buffer_event *event,
938 break; 1079 break;
939 1080
940 case RINGBUF_TYPE_TIME_EXTEND: 1081 case RINGBUF_TYPE_TIME_EXTEND:
941 event->len = 1082 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
942 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
943 >> RB_ALIGNMENT_SHIFT;
944 break; 1083 break;
945 1084
946 case RINGBUF_TYPE_TIME_STAMP: 1085 case RINGBUF_TYPE_TIME_STAMP:
947 event->len = 1086 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
948 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
949 >> RB_ALIGNMENT_SHIFT;
950 break; 1087 break;
951 1088
952 case RINGBUF_TYPE_DATA: 1089 case RINGBUF_TYPE_DATA:
@@ -955,16 +1092,14 @@ rb_update_event(struct ring_buffer_event *event,
955 event->len = 0; 1092 event->len = 0;
956 event->array[0] = length; 1093 event->array[0] = length;
957 } else 1094 } else
958 event->len = 1095 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
959 (length + (RB_ALIGNMENT-1))
960 >> RB_ALIGNMENT_SHIFT;
961 break; 1096 break;
962 default: 1097 default:
963 BUG(); 1098 BUG();
964 } 1099 }
965} 1100}
966 1101
967static inline unsigned rb_calculate_event_length(unsigned length) 1102static unsigned rb_calculate_event_length(unsigned length)
968{ 1103{
969 struct ring_buffer_event event; /* Used only for sizeof array */ 1104 struct ring_buffer_event event; /* Used only for sizeof array */
970 1105
@@ -990,6 +1125,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
990 struct ring_buffer *buffer = cpu_buffer->buffer; 1125 struct ring_buffer *buffer = cpu_buffer->buffer;
991 struct ring_buffer_event *event; 1126 struct ring_buffer_event *event;
992 unsigned long flags; 1127 unsigned long flags;
1128 bool lock_taken = false;
993 1129
994 commit_page = cpu_buffer->commit_page; 1130 commit_page = cpu_buffer->commit_page;
995 /* we just need to protect against interrupts */ 1131 /* we just need to protect against interrupts */
@@ -1003,7 +1139,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1003 struct buffer_page *next_page = tail_page; 1139 struct buffer_page *next_page = tail_page;
1004 1140
1005 local_irq_save(flags); 1141 local_irq_save(flags);
1006 __raw_spin_lock(&cpu_buffer->lock); 1142 /*
1143 * Since the write to the buffer is still not
1144 * fully lockless, we must be careful with NMIs.
1145 * The locks in the writers are taken when a write
1146 * crosses to a new page. The locks protect against
1147 * races with the readers (this will soon be fixed
1148 * with a lockless solution).
1149 *
1150 * Because we can not protect against NMIs, and we
1151 * want to keep traces reentrant, we need to manage
1152 * what happens when we are in an NMI.
1153 *
1154 * NMIs can happen after we take the lock.
1155 * If we are in an NMI, only take the lock
1156 * if it is not already taken. Otherwise
1157 * simply fail.
1158 */
1159 if (unlikely(in_nmi())) {
1160 if (!__raw_spin_trylock(&cpu_buffer->lock))
1161 goto out_reset;
1162 } else
1163 __raw_spin_lock(&cpu_buffer->lock);
1164
1165 lock_taken = true;
1007 1166
1008 rb_inc_page(cpu_buffer, &next_page); 1167 rb_inc_page(cpu_buffer, &next_page);
1009 1168
@@ -1012,7 +1171,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1012 1171
1013 /* we grabbed the lock before incrementing */ 1172 /* we grabbed the lock before incrementing */
1014 if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) 1173 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1015 goto out_unlock; 1174 goto out_reset;
1016 1175
1017 /* 1176 /*
1018 * If for some reason, we had an interrupt storm that made 1177 * If for some reason, we had an interrupt storm that made
@@ -1021,12 +1180,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1021 */ 1180 */
1022 if (unlikely(next_page == commit_page)) { 1181 if (unlikely(next_page == commit_page)) {
1023 WARN_ON_ONCE(1); 1182 WARN_ON_ONCE(1);
1024 goto out_unlock; 1183 goto out_reset;
1025 } 1184 }
1026 1185
1027 if (next_page == head_page) { 1186 if (next_page == head_page) {
1028 if (!(buffer->flags & RB_FL_OVERWRITE)) 1187 if (!(buffer->flags & RB_FL_OVERWRITE))
1029 goto out_unlock; 1188 goto out_reset;
1030 1189
1031 /* tail_page has not moved yet? */ 1190 /* tail_page has not moved yet? */
1032 if (tail_page == cpu_buffer->tail_page) { 1191 if (tail_page == cpu_buffer->tail_page) {
@@ -1050,7 +1209,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1050 cpu_buffer->tail_page = next_page; 1209 cpu_buffer->tail_page = next_page;
1051 1210
1052 /* reread the time stamp */ 1211 /* reread the time stamp */
1053 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1212 *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
1054 cpu_buffer->tail_page->page->time_stamp = *ts; 1213 cpu_buffer->tail_page->page->time_stamp = *ts;
1055 } 1214 }
1056 1215
@@ -1100,12 +1259,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1100 1259
1101 return event; 1260 return event;
1102 1261
1103 out_unlock: 1262 out_reset:
1104 /* reset write */ 1263 /* reset write */
1105 if (tail <= BUF_PAGE_SIZE) 1264 if (tail <= BUF_PAGE_SIZE)
1106 local_set(&tail_page->write, tail); 1265 local_set(&tail_page->write, tail);
1107 1266
1108 __raw_spin_unlock(&cpu_buffer->lock); 1267 if (likely(lock_taken))
1268 __raw_spin_unlock(&cpu_buffer->lock);
1109 local_irq_restore(flags); 1269 local_irq_restore(flags);
1110 return NULL; 1270 return NULL;
1111} 1271}
@@ -1192,7 +1352,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1192 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 1352 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1193 return NULL; 1353 return NULL;
1194 1354
1195 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1355 ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
1196 1356
1197 /* 1357 /*
1198 * Only the first commit can update the timestamp. 1358 * Only the first commit can update the timestamp.
@@ -1265,7 +1425,6 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1265 * ring_buffer_lock_reserve - reserve a part of the buffer 1425 * ring_buffer_lock_reserve - reserve a part of the buffer
1266 * @buffer: the ring buffer to reserve from 1426 * @buffer: the ring buffer to reserve from
1267 * @length: the length of the data to reserve (excluding event header) 1427 * @length: the length of the data to reserve (excluding event header)
1268 * @flags: a pointer to save the interrupt flags
1269 * 1428 *
1270 * Returns a reseverd event on the ring buffer to copy directly to. 1429 * Returns a reseverd event on the ring buffer to copy directly to.
1271 * The user of this interface will need to get the body to write into 1430 * The user of this interface will need to get the body to write into
@@ -1278,9 +1437,7 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1278 * If NULL is returned, then nothing has been allocated or locked. 1437 * If NULL is returned, then nothing has been allocated or locked.
1279 */ 1438 */
1280struct ring_buffer_event * 1439struct ring_buffer_event *
1281ring_buffer_lock_reserve(struct ring_buffer *buffer, 1440ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1282 unsigned long length,
1283 unsigned long *flags)
1284{ 1441{
1285 struct ring_buffer_per_cpu *cpu_buffer; 1442 struct ring_buffer_per_cpu *cpu_buffer;
1286 struct ring_buffer_event *event; 1443 struct ring_buffer_event *event;
@@ -1347,15 +1504,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1347 * ring_buffer_unlock_commit - commit a reserved 1504 * ring_buffer_unlock_commit - commit a reserved
1348 * @buffer: The buffer to commit to 1505 * @buffer: The buffer to commit to
1349 * @event: The event pointer to commit. 1506 * @event: The event pointer to commit.
1350 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1351 * 1507 *
1352 * This commits the data to the ring buffer, and releases any locks held. 1508 * This commits the data to the ring buffer, and releases any locks held.
1353 * 1509 *
1354 * Must be paired with ring_buffer_lock_reserve. 1510 * Must be paired with ring_buffer_lock_reserve.
1355 */ 1511 */
1356int ring_buffer_unlock_commit(struct ring_buffer *buffer, 1512int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1357 struct ring_buffer_event *event, 1513 struct ring_buffer_event *event)
1358 unsigned long flags)
1359{ 1514{
1360 struct ring_buffer_per_cpu *cpu_buffer; 1515 struct ring_buffer_per_cpu *cpu_buffer;
1361 int cpu = raw_smp_processor_id(); 1516 int cpu = raw_smp_processor_id();
@@ -1438,7 +1593,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1438} 1593}
1439EXPORT_SYMBOL_GPL(ring_buffer_write); 1594EXPORT_SYMBOL_GPL(ring_buffer_write);
1440 1595
1441static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1596static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1442{ 1597{
1443 struct buffer_page *reader = cpu_buffer->reader_page; 1598 struct buffer_page *reader = cpu_buffer->reader_page;
1444 struct buffer_page *head = cpu_buffer->head_page; 1599 struct buffer_page *head = cpu_buffer->head_page;
@@ -1528,12 +1683,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
1528unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 1683unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1529{ 1684{
1530 struct ring_buffer_per_cpu *cpu_buffer; 1685 struct ring_buffer_per_cpu *cpu_buffer;
1686 unsigned long ret;
1531 1687
1532 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1688 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1533 return 0; 1689 return 0;
1534 1690
1535 cpu_buffer = buffer->buffers[cpu]; 1691 cpu_buffer = buffer->buffers[cpu];
1536 return cpu_buffer->entries; 1692 ret = cpu_buffer->entries;
1693
1694 return ret;
1537} 1695}
1538EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 1696EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
1539 1697
@@ -1545,12 +1703,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
1545unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) 1703unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1546{ 1704{
1547 struct ring_buffer_per_cpu *cpu_buffer; 1705 struct ring_buffer_per_cpu *cpu_buffer;
1706 unsigned long ret;
1548 1707
1549 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1708 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1550 return 0; 1709 return 0;
1551 1710
1552 cpu_buffer = buffer->buffers[cpu]; 1711 cpu_buffer = buffer->buffers[cpu];
1553 return cpu_buffer->overrun; 1712 ret = cpu_buffer->overrun;
1713
1714 return ret;
1554} 1715}
1555EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 1716EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1556 1717
@@ -1627,9 +1788,14 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
1627 */ 1788 */
1628void ring_buffer_iter_reset(struct ring_buffer_iter *iter) 1789void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1629{ 1790{
1630 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1791 struct ring_buffer_per_cpu *cpu_buffer;
1631 unsigned long flags; 1792 unsigned long flags;
1632 1793
1794 if (!iter)
1795 return;
1796
1797 cpu_buffer = iter->cpu_buffer;
1798
1633 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 1799 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1634 rb_iter_reset(iter); 1800 rb_iter_reset(iter);
1635 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 1801 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
@@ -1864,9 +2030,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1864 struct buffer_page *reader; 2030 struct buffer_page *reader;
1865 int nr_loops = 0; 2031 int nr_loops = 0;
1866 2032
1867 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1868 return NULL;
1869
1870 cpu_buffer = buffer->buffers[cpu]; 2033 cpu_buffer = buffer->buffers[cpu];
1871 2034
1872 again: 2035 again:
@@ -1906,7 +2069,8 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1906 case RINGBUF_TYPE_DATA: 2069 case RINGBUF_TYPE_DATA:
1907 if (ts) { 2070 if (ts) {
1908 *ts = cpu_buffer->read_stamp + event->time_delta; 2071 *ts = cpu_buffer->read_stamp + event->time_delta;
1909 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); 2072 ring_buffer_normalize_time_stamp(buffer,
2073 cpu_buffer->cpu, ts);
1910 } 2074 }
1911 return event; 2075 return event;
1912 2076
@@ -1967,7 +2131,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1967 case RINGBUF_TYPE_DATA: 2131 case RINGBUF_TYPE_DATA:
1968 if (ts) { 2132 if (ts) {
1969 *ts = iter->read_stamp + event->time_delta; 2133 *ts = iter->read_stamp + event->time_delta;
1970 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); 2134 ring_buffer_normalize_time_stamp(buffer,
2135 cpu_buffer->cpu, ts);
1971 } 2136 }
1972 return event; 2137 return event;
1973 2138
@@ -1995,6 +2160,9 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1995 struct ring_buffer_event *event; 2160 struct ring_buffer_event *event;
1996 unsigned long flags; 2161 unsigned long flags;
1997 2162
2163 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2164 return NULL;
2165
1998 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2166 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1999 event = rb_buffer_peek(buffer, cpu, ts); 2167 event = rb_buffer_peek(buffer, cpu, ts);
2000 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2168 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
@@ -2035,24 +2203,31 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2035struct ring_buffer_event * 2203struct ring_buffer_event *
2036ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 2204ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2037{ 2205{
2038 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2206 struct ring_buffer_per_cpu *cpu_buffer;
2039 struct ring_buffer_event *event; 2207 struct ring_buffer_event *event = NULL;
2040 unsigned long flags; 2208 unsigned long flags;
2041 2209
2210 /* might be called in atomic */
2211 preempt_disable();
2212
2042 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2213 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2043 return NULL; 2214 goto out;
2044 2215
2216 cpu_buffer = buffer->buffers[cpu];
2045 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2217 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2046 2218
2047 event = rb_buffer_peek(buffer, cpu, ts); 2219 event = rb_buffer_peek(buffer, cpu, ts);
2048 if (!event) 2220 if (!event)
2049 goto out; 2221 goto out_unlock;
2050 2222
2051 rb_advance_reader(cpu_buffer); 2223 rb_advance_reader(cpu_buffer);
2052 2224
2053 out: 2225 out_unlock:
2054 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2226 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2055 2227
2228 out:
2229 preempt_enable();
2230
2056 return event; 2231 return event;
2057} 2232}
2058EXPORT_SYMBOL_GPL(ring_buffer_consume); 2233EXPORT_SYMBOL_GPL(ring_buffer_consume);
@@ -2232,6 +2407,7 @@ int ring_buffer_empty(struct ring_buffer *buffer)
2232 if (!rb_per_cpu_empty(cpu_buffer)) 2407 if (!rb_per_cpu_empty(cpu_buffer))
2233 return 0; 2408 return 0;
2234 } 2409 }
2410
2235 return 1; 2411 return 1;
2236} 2412}
2237EXPORT_SYMBOL_GPL(ring_buffer_empty); 2413EXPORT_SYMBOL_GPL(ring_buffer_empty);
@@ -2244,12 +2420,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
2244int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) 2420int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2245{ 2421{
2246 struct ring_buffer_per_cpu *cpu_buffer; 2422 struct ring_buffer_per_cpu *cpu_buffer;
2423 int ret;
2247 2424
2248 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2425 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2249 return 1; 2426 return 1;
2250 2427
2251 cpu_buffer = buffer->buffers[cpu]; 2428 cpu_buffer = buffer->buffers[cpu];
2252 return rb_per_cpu_empty(cpu_buffer); 2429 ret = rb_per_cpu_empty(cpu_buffer);
2430
2431
2432 return ret;
2253} 2433}
2254EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 2434EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
2255 2435
@@ -2268,18 +2448,36 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2268{ 2448{
2269 struct ring_buffer_per_cpu *cpu_buffer_a; 2449 struct ring_buffer_per_cpu *cpu_buffer_a;
2270 struct ring_buffer_per_cpu *cpu_buffer_b; 2450 struct ring_buffer_per_cpu *cpu_buffer_b;
2451 int ret = -EINVAL;
2271 2452
2272 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || 2453 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
2273 !cpumask_test_cpu(cpu, buffer_b->cpumask)) 2454 !cpumask_test_cpu(cpu, buffer_b->cpumask))
2274 return -EINVAL; 2455 goto out;
2275 2456
2276 /* At least make sure the two buffers are somewhat the same */ 2457 /* At least make sure the two buffers are somewhat the same */
2277 if (buffer_a->pages != buffer_b->pages) 2458 if (buffer_a->pages != buffer_b->pages)
2278 return -EINVAL; 2459 goto out;
2460
2461 ret = -EAGAIN;
2462
2463 if (ring_buffer_flags != RB_BUFFERS_ON)
2464 goto out;
2465
2466 if (atomic_read(&buffer_a->record_disabled))
2467 goto out;
2468
2469 if (atomic_read(&buffer_b->record_disabled))
2470 goto out;
2279 2471
2280 cpu_buffer_a = buffer_a->buffers[cpu]; 2472 cpu_buffer_a = buffer_a->buffers[cpu];
2281 cpu_buffer_b = buffer_b->buffers[cpu]; 2473 cpu_buffer_b = buffer_b->buffers[cpu];
2282 2474
2475 if (atomic_read(&cpu_buffer_a->record_disabled))
2476 goto out;
2477
2478 if (atomic_read(&cpu_buffer_b->record_disabled))
2479 goto out;
2480
2283 /* 2481 /*
2284 * We can't do a synchronize_sched here because this 2482 * We can't do a synchronize_sched here because this
2285 * function can be called in atomic context. 2483 * function can be called in atomic context.
@@ -2298,18 +2496,21 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2298 atomic_dec(&cpu_buffer_a->record_disabled); 2496 atomic_dec(&cpu_buffer_a->record_disabled);
2299 atomic_dec(&cpu_buffer_b->record_disabled); 2497 atomic_dec(&cpu_buffer_b->record_disabled);
2300 2498
2301 return 0; 2499 ret = 0;
2500out:
2501 return ret;
2302} 2502}
2303EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2503EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2304 2504
2305static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, 2505static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2306 struct buffer_data_page *bpage) 2506 struct buffer_data_page *bpage,
2507 unsigned int offset)
2307{ 2508{
2308 struct ring_buffer_event *event; 2509 struct ring_buffer_event *event;
2309 unsigned long head; 2510 unsigned long head;
2310 2511
2311 __raw_spin_lock(&cpu_buffer->lock); 2512 __raw_spin_lock(&cpu_buffer->lock);
2312 for (head = 0; head < local_read(&bpage->commit); 2513 for (head = offset; head < local_read(&bpage->commit);
2313 head += rb_event_length(event)) { 2514 head += rb_event_length(event)) {
2314 2515
2315 event = __rb_data_page_index(bpage, head); 2516 event = __rb_data_page_index(bpage, head);
@@ -2340,8 +2541,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2340 */ 2541 */
2341void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 2542void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2342{ 2543{
2343 unsigned long addr;
2344 struct buffer_data_page *bpage; 2544 struct buffer_data_page *bpage;
2545 unsigned long addr;
2345 2546
2346 addr = __get_free_page(GFP_KERNEL); 2547 addr = __get_free_page(GFP_KERNEL);
2347 if (!addr) 2548 if (!addr)
@@ -2349,6 +2550,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2349 2550
2350 bpage = (void *)addr; 2551 bpage = (void *)addr;
2351 2552
2553 rb_init_page(bpage);
2554
2352 return bpage; 2555 return bpage;
2353} 2556}
2354 2557
@@ -2368,6 +2571,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2368 * ring_buffer_read_page - extract a page from the ring buffer 2571 * ring_buffer_read_page - extract a page from the ring buffer
2369 * @buffer: buffer to extract from 2572 * @buffer: buffer to extract from
2370 * @data_page: the page to use allocated from ring_buffer_alloc_read_page 2573 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2574 * @len: amount to extract
2371 * @cpu: the cpu of the buffer to extract 2575 * @cpu: the cpu of the buffer to extract
2372 * @full: should the extraction only happen when the page is full. 2576 * @full: should the extraction only happen when the page is full.
2373 * 2577 *
@@ -2377,12 +2581,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2377 * to swap with a page in the ring buffer. 2581 * to swap with a page in the ring buffer.
2378 * 2582 *
2379 * for example: 2583 * for example:
2380 * rpage = ring_buffer_alloc_page(buffer); 2584 * rpage = ring_buffer_alloc_read_page(buffer);
2381 * if (!rpage) 2585 * if (!rpage)
2382 * return error; 2586 * return error;
2383 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); 2587 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
2384 * if (ret) 2588 * if (ret >= 0)
2385 * process_page(rpage); 2589 * process_page(rpage, ret);
2386 * 2590 *
2387 * When @full is set, the function will not return true unless 2591 * When @full is set, the function will not return true unless
2388 * the writer is off the reader page. 2592 * the writer is off the reader page.
@@ -2393,72 +2597,118 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2393 * responsible for that. 2597 * responsible for that.
2394 * 2598 *
2395 * Returns: 2599 * Returns:
2396 * 1 if data has been transferred 2600 * >=0 if data has been transferred, returns the offset of consumed data.
2397 * 0 if no data has been transferred. 2601 * <0 if no data has been transferred.
2398 */ 2602 */
2399int ring_buffer_read_page(struct ring_buffer *buffer, 2603int ring_buffer_read_page(struct ring_buffer *buffer,
2400 void **data_page, int cpu, int full) 2604 void **data_page, size_t len, int cpu, int full)
2401{ 2605{
2402 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2606 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2403 struct ring_buffer_event *event; 2607 struct ring_buffer_event *event;
2404 struct buffer_data_page *bpage; 2608 struct buffer_data_page *bpage;
2609 struct buffer_page *reader;
2405 unsigned long flags; 2610 unsigned long flags;
2406 int ret = 0; 2611 unsigned int commit;
2612 unsigned int read;
2613 u64 save_timestamp;
2614 int ret = -1;
2615
2616 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2617 goto out;
2618
2619 /*
2620 * If len is not big enough to hold the page header, then
2621 * we can not copy anything.
2622 */
2623 if (len <= BUF_PAGE_HDR_SIZE)
2624 goto out;
2625
2626 len -= BUF_PAGE_HDR_SIZE;
2407 2627
2408 if (!data_page) 2628 if (!data_page)
2409 return 0; 2629 goto out;
2410 2630
2411 bpage = *data_page; 2631 bpage = *data_page;
2412 if (!bpage) 2632 if (!bpage)
2413 return 0; 2633 goto out;
2414 2634
2415 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2635 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2416 2636
2417 /* 2637 reader = rb_get_reader_page(cpu_buffer);
2418 * rb_buffer_peek will get the next ring buffer if 2638 if (!reader)
2419 * the current reader page is empty. 2639 goto out_unlock;
2420 */ 2640
2421 event = rb_buffer_peek(buffer, cpu, NULL); 2641 event = rb_reader_event(cpu_buffer);
2422 if (!event) 2642
2423 goto out; 2643 read = reader->read;
2644 commit = rb_page_commit(reader);
2424 2645
2425 /* check for data */
2426 if (!local_read(&cpu_buffer->reader_page->page->commit))
2427 goto out;
2428 /* 2646 /*
2429 * If the writer is already off of the read page, then simply 2647 * If this page has been partially read or
2430 * switch the read page with the given page. Otherwise 2648 * if len is not big enough to read the rest of the page or
2431 * we need to copy the data from the reader to the writer. 2649 * a writer is still on the page, then
2650 * we must copy the data from the page to the buffer.
2651 * Otherwise, we can simply swap the page with the one passed in.
2432 */ 2652 */
2433 if (cpu_buffer->reader_page == cpu_buffer->commit_page) { 2653 if (read || (len < (commit - read)) ||
2434 unsigned int read = cpu_buffer->reader_page->read; 2654 cpu_buffer->reader_page == cpu_buffer->commit_page) {
2655 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
2656 unsigned int rpos = read;
2657 unsigned int pos = 0;
2658 unsigned int size;
2435 2659
2436 if (full) 2660 if (full)
2437 goto out; 2661 goto out_unlock;
2438 /* The writer is still on the reader page, we must copy */ 2662
2439 bpage = cpu_buffer->reader_page->page; 2663 if (len > (commit - read))
2440 memcpy(bpage->data, 2664 len = (commit - read);
2441 cpu_buffer->reader_page->page->data + read, 2665
2442 local_read(&bpage->commit) - read); 2666 size = rb_event_length(event);
2443 2667
2444 /* consume what was read */ 2668 if (len < size)
2445 cpu_buffer->reader_page += read; 2669 goto out_unlock;
2670
2671 /* save the current timestamp, since the user will need it */
2672 save_timestamp = cpu_buffer->read_stamp;
2673
2674 /* Need to copy one event at a time */
2675 do {
2676 memcpy(bpage->data + pos, rpage->data + rpos, size);
2677
2678 len -= size;
2679
2680 rb_advance_reader(cpu_buffer);
2681 rpos = reader->read;
2682 pos += size;
2683
2684 event = rb_reader_event(cpu_buffer);
2685 size = rb_event_length(event);
2686 } while (len > size);
2687
2688 /* update bpage */
2689 local_set(&bpage->commit, pos);
2690 bpage->time_stamp = save_timestamp;
2446 2691
2692 /* we copied everything to the beginning */
2693 read = 0;
2447 } else { 2694 } else {
2448 /* swap the pages */ 2695 /* swap the pages */
2449 rb_init_page(bpage); 2696 rb_init_page(bpage);
2450 bpage = cpu_buffer->reader_page->page; 2697 bpage = reader->page;
2451 cpu_buffer->reader_page->page = *data_page; 2698 reader->page = *data_page;
2452 cpu_buffer->reader_page->read = 0; 2699 local_set(&reader->write, 0);
2700 reader->read = 0;
2453 *data_page = bpage; 2701 *data_page = bpage;
2702
2703 /* update the entry counter */
2704 rb_remove_entries(cpu_buffer, bpage, read);
2454 } 2705 }
2455 ret = 1; 2706 ret = read;
2456 2707
2457 /* update the entry counter */ 2708 out_unlock:
2458 rb_remove_entries(cpu_buffer, bpage);
2459 out:
2460 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2709 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2461 2710
2711 out:
2462 return ret; 2712 return ret;
2463} 2713}
2464 2714
@@ -2466,7 +2716,7 @@ static ssize_t
2466rb_simple_read(struct file *filp, char __user *ubuf, 2716rb_simple_read(struct file *filp, char __user *ubuf,
2467 size_t cnt, loff_t *ppos) 2717 size_t cnt, loff_t *ppos)
2468{ 2718{
2469 long *p = filp->private_data; 2719 unsigned long *p = filp->private_data;
2470 char buf[64]; 2720 char buf[64];
2471 int r; 2721 int r;
2472 2722
@@ -2482,9 +2732,9 @@ static ssize_t
2482rb_simple_write(struct file *filp, const char __user *ubuf, 2732rb_simple_write(struct file *filp, const char __user *ubuf,
2483 size_t cnt, loff_t *ppos) 2733 size_t cnt, loff_t *ppos)
2484{ 2734{
2485 long *p = filp->private_data; 2735 unsigned long *p = filp->private_data;
2486 char buf[64]; 2736 char buf[64];
2487 long val; 2737 unsigned long val;
2488 int ret; 2738 int ret;
2489 2739
2490 if (cnt >= sizeof(buf)) 2740 if (cnt >= sizeof(buf))
@@ -2509,7 +2759,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2509 return cnt; 2759 return cnt;
2510} 2760}
2511 2761
2512static struct file_operations rb_simple_fops = { 2762static const struct file_operations rb_simple_fops = {
2513 .open = tracing_open_generic, 2763 .open = tracing_open_generic,
2514 .read = rb_simple_read, 2764 .read = rb_simple_read,
2515 .write = rb_simple_write, 2765 .write = rb_simple_write,
@@ -2532,3 +2782,42 @@ static __init int rb_init_debugfs(void)
2532} 2782}
2533 2783
2534fs_initcall(rb_init_debugfs); 2784fs_initcall(rb_init_debugfs);
2785
2786#ifdef CONFIG_HOTPLUG_CPU
2787static int __cpuinit rb_cpu_notify(struct notifier_block *self,
2788 unsigned long action, void *hcpu)
2789{
2790 struct ring_buffer *buffer =
2791 container_of(self, struct ring_buffer, cpu_notify);
2792 long cpu = (long)hcpu;
2793
2794 switch (action) {
2795 case CPU_UP_PREPARE:
2796 case CPU_UP_PREPARE_FROZEN:
2797 if (cpu_isset(cpu, *buffer->cpumask))
2798 return NOTIFY_OK;
2799
2800 buffer->buffers[cpu] =
2801 rb_allocate_cpu_buffer(buffer, cpu);
2802 if (!buffer->buffers[cpu]) {
2803 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
2804 cpu);
2805 return NOTIFY_OK;
2806 }
2807 smp_wmb();
2808 cpu_set(cpu, *buffer->cpumask);
2809 break;
2810 case CPU_DOWN_PREPARE:
2811 case CPU_DOWN_PREPARE_FROZEN:
2812 /*
2813 * Do nothing.
2814 * If we were to free the buffer, then the user would
2815 * lose any trace that was in the buffer.
2816 */
2817 break;
2818 default:
2819 break;
2820 }
2821 return NOTIFY_OK;
2822}
2823#endif
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 17bb88d86ac2..c95b7292be70 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -11,32 +11,33 @@
11 * Copyright (C) 2004-2006 Ingo Molnar 11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h>
14#include <linux/utsrelease.h> 15#include <linux/utsrelease.h>
16#include <linux/stacktrace.h>
17#include <linux/writeback.h>
15#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 19#include <linux/seq_file.h>
17#include <linux/notifier.h> 20#include <linux/notifier.h>
21#include <linux/irqflags.h>
18#include <linux/debugfs.h> 22#include <linux/debugfs.h>
19#include <linux/pagemap.h> 23#include <linux/pagemap.h>
20#include <linux/hardirq.h> 24#include <linux/hardirq.h>
21#include <linux/linkage.h> 25#include <linux/linkage.h>
22#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/kprobes.h>
23#include <linux/ftrace.h> 28#include <linux/ftrace.h>
24#include <linux/module.h> 29#include <linux/module.h>
25#include <linux/percpu.h> 30#include <linux/percpu.h>
31#include <linux/splice.h>
26#include <linux/kdebug.h> 32#include <linux/kdebug.h>
27#include <linux/ctype.h> 33#include <linux/ctype.h>
28#include <linux/init.h> 34#include <linux/init.h>
29#include <linux/poll.h> 35#include <linux/poll.h>
30#include <linux/gfp.h> 36#include <linux/gfp.h>
31#include <linux/fs.h> 37#include <linux/fs.h>
32#include <linux/kprobes.h>
33#include <linux/writeback.h>
34
35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h>
37#include <linux/irqflags.h>
38 38
39#include "trace.h" 39#include "trace.h"
40#include "trace_output.h"
40 41
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 42#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42 43
@@ -44,14 +45,25 @@ unsigned long __read_mostly tracing_max_latency;
44unsigned long __read_mostly tracing_thresh; 45unsigned long __read_mostly tracing_thresh;
45 46
46/* 47/*
48 * On boot up, the ring buffer is set to the minimum size, so that
49 * we do not waste memory on systems that are not using tracing.
50 */
51static int ring_buffer_expanded;
52
53/*
47 * We need to change this state when a selftest is running. 54 * We need to change this state when a selftest is running.
48 * A selftest will lurk into the ring-buffer to count the 55 * A selftest will lurk into the ring-buffer to count the
49 * entries inserted during the selftest although some concurrent 56 * entries inserted during the selftest although some concurrent
50 * insertions into the ring-buffer such as ftrace_printk could occurred 57 * insertions into the ring-buffer such as trace_printk could occurred
51 * at the same time, giving false positive or negative results. 58 * at the same time, giving false positive or negative results.
52 */ 59 */
53static bool __read_mostly tracing_selftest_running; 60static bool __read_mostly tracing_selftest_running;
54 61
62/*
63 * If a tracer is running, we do not want to run SELFTEST.
64 */
65static bool __read_mostly tracing_selftest_disabled;
66
55/* For tracers that don't implement custom flags */ 67/* For tracers that don't implement custom flags */
56static struct tracer_opt dummy_tracer_opt[] = { 68static struct tracer_opt dummy_tracer_opt[] = {
57 { } 69 { }
@@ -73,7 +85,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
73 * of the tracer is successful. But that is the only place that sets 85 * of the tracer is successful. But that is the only place that sets
74 * this back to zero. 86 * this back to zero.
75 */ 87 */
76int tracing_disabled = 1; 88static int tracing_disabled = 1;
77 89
78static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 90static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
79 91
@@ -91,6 +103,9 @@ static inline void ftrace_enable_cpu(void)
91 103
92static cpumask_var_t __read_mostly tracing_buffer_mask; 104static cpumask_var_t __read_mostly tracing_buffer_mask;
93 105
106/* Define which cpu buffers are currently read in trace_pipe */
107static cpumask_var_t tracing_reader_cpumask;
108
94#define for_each_tracing_cpu(cpu) \ 109#define for_each_tracing_cpu(cpu) \
95 for_each_cpu(cpu, tracing_buffer_mask) 110 for_each_cpu(cpu, tracing_buffer_mask)
96 111
@@ -109,14 +124,21 @@ static cpumask_var_t __read_mostly tracing_buffer_mask;
109 */ 124 */
110int ftrace_dump_on_oops; 125int ftrace_dump_on_oops;
111 126
112static int tracing_set_tracer(char *buf); 127static int tracing_set_tracer(const char *buf);
128
129#define BOOTUP_TRACER_SIZE 100
130static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
131static char *default_bootup_tracer;
113 132
114static int __init set_ftrace(char *str) 133static int __init set_ftrace(char *str)
115{ 134{
116 tracing_set_tracer(str); 135 strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
136 default_bootup_tracer = bootup_tracer_buf;
137 /* We are using ftrace early, expand it */
138 ring_buffer_expanded = 1;
117 return 1; 139 return 1;
118} 140}
119__setup("ftrace", set_ftrace); 141__setup("ftrace=", set_ftrace);
120 142
121static int __init set_ftrace_dump_on_oops(char *str) 143static int __init set_ftrace_dump_on_oops(char *str)
122{ 144{
@@ -133,13 +155,6 @@ ns2usecs(cycle_t nsec)
133 return nsec; 155 return nsec;
134} 156}
135 157
136cycle_t ftrace_now(int cpu)
137{
138 u64 ts = ring_buffer_time_stamp(cpu);
139 ring_buffer_normalize_time_stamp(cpu, &ts);
140 return ts;
141}
142
143/* 158/*
144 * The global_trace is the descriptor that holds the tracing 159 * The global_trace is the descriptor that holds the tracing
145 * buffers for the live tracing. For each CPU, it contains 160 * buffers for the live tracing. For each CPU, it contains
@@ -156,6 +171,20 @@ static struct trace_array global_trace;
156 171
157static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); 172static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
158 173
174cycle_t ftrace_now(int cpu)
175{
176 u64 ts;
177
178 /* Early boot up does not have a buffer yet */
179 if (!global_trace.buffer)
180 return trace_clock_local();
181
182 ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
183 ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
184
185 return ts;
186}
187
159/* 188/*
160 * The max_tr is used to snapshot the global_trace when a maximum 189 * The max_tr is used to snapshot the global_trace when a maximum
161 * latency is reached. Some tracers will use this to store a maximum 190 * latency is reached. Some tracers will use this to store a maximum
@@ -186,9 +215,6 @@ int tracing_is_enabled(void)
186 return tracer_enabled; 215 return tracer_enabled;
187} 216}
188 217
189/* function tracing enabled */
190int ftrace_function_enabled;
191
192/* 218/*
193 * trace_buf_size is the size in bytes that is allocated 219 * trace_buf_size is the size in bytes that is allocated
194 * for a buffer. Note, the number of bytes is always rounded 220 * for a buffer. Note, the number of bytes is always rounded
@@ -229,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
229 255
230/* trace_flags holds trace_options default values */ 256/* trace_flags holds trace_options default values */
231unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 257unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
232 TRACE_ITER_ANNOTATE; 258 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
233 259
234/** 260/**
235 * trace_wake_up - wake up tasks waiting for trace input 261 * trace_wake_up - wake up tasks waiting for trace input
@@ -280,13 +306,16 @@ static const char *trace_options[] = {
280 "block", 306 "block",
281 "stacktrace", 307 "stacktrace",
282 "sched-tree", 308 "sched-tree",
283 "ftrace_printk", 309 "trace_printk",
284 "ftrace_preempt", 310 "ftrace_preempt",
285 "branch", 311 "branch",
286 "annotate", 312 "annotate",
287 "userstacktrace", 313 "userstacktrace",
288 "sym-userobj", 314 "sym-userobj",
289 "printk-msg-only", 315 "printk-msg-only",
316 "context-info",
317 "latency-format",
318 "global-clock",
290 NULL 319 NULL
291}; 320};
292 321
@@ -326,146 +355,37 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
326 data->rt_priority = tsk->rt_priority; 355 data->rt_priority = tsk->rt_priority;
327 356
328 /* record this tasks comm */ 357 /* record this tasks comm */
329 tracing_record_cmdline(current); 358 tracing_record_cmdline(tsk);
330} 359}
331 360
332/** 361ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
333 * trace_seq_printf - sequence printing of trace information
334 * @s: trace sequence descriptor
335 * @fmt: printf format string
336 *
337 * The tracer may use either sequence operations or its own
338 * copy to user routines. To simplify formating of a trace
339 * trace_seq_printf is used to store strings into a special
340 * buffer (@s). Then the output may be either used by
341 * the sequencer or pulled into another buffer.
342 */
343int
344trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
345{ 362{
346 int len = (PAGE_SIZE - 1) - s->len; 363 int len;
347 va_list ap;
348 int ret; 364 int ret;
349 365
350 if (!len) 366 if (!cnt)
351 return 0;
352
353 va_start(ap, fmt);
354 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
355 va_end(ap);
356
357 /* If we can't write it all, don't bother writing anything */
358 if (ret >= len)
359 return 0;
360
361 s->len += ret;
362
363 return len;
364}
365
366/**
367 * trace_seq_puts - trace sequence printing of simple string
368 * @s: trace sequence descriptor
369 * @str: simple string to record
370 *
371 * The tracer may use either the sequence operations or its own
372 * copy to user routines. This function records a simple string
373 * into a special buffer (@s) for later retrieval by a sequencer
374 * or other mechanism.
375 */
376static int
377trace_seq_puts(struct trace_seq *s, const char *str)
378{
379 int len = strlen(str);
380
381 if (len > ((PAGE_SIZE - 1) - s->len))
382 return 0;
383
384 memcpy(s->buffer + s->len, str, len);
385 s->len += len;
386
387 return len;
388}
389
390static int
391trace_seq_putc(struct trace_seq *s, unsigned char c)
392{
393 if (s->len >= (PAGE_SIZE - 1))
394 return 0;
395
396 s->buffer[s->len++] = c;
397
398 return 1;
399}
400
401static int
402trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
403{
404 if (len > ((PAGE_SIZE - 1) - s->len))
405 return 0; 367 return 0;
406 368
407 memcpy(s->buffer + s->len, mem, len); 369 if (s->len <= s->readpos)
408 s->len += len; 370 return -EBUSY;
409
410 return len;
411}
412
413#define MAX_MEMHEX_BYTES 8
414#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
415
416static int
417trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
418{
419 unsigned char hex[HEX_CHARS];
420 unsigned char *data = mem;
421 int i, j;
422
423#ifdef __BIG_ENDIAN
424 for (i = 0, j = 0; i < len; i++) {
425#else
426 for (i = len-1, j = 0; i >= 0; i--) {
427#endif
428 hex[j++] = hex_asc_hi(data[i]);
429 hex[j++] = hex_asc_lo(data[i]);
430 }
431 hex[j++] = ' ';
432
433 return trace_seq_putmem(s, hex, j);
434}
435
436static int
437trace_seq_path(struct trace_seq *s, struct path *path)
438{
439 unsigned char *p;
440 371
441 if (s->len >= (PAGE_SIZE - 1)) 372 len = s->len - s->readpos;
442 return 0; 373 if (cnt > len)
443 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 374 cnt = len;
444 if (!IS_ERR(p)) { 375 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
445 p = mangle_path(s->buffer + s->len, p, "\n"); 376 if (ret == cnt)
446 if (p) { 377 return -EFAULT;
447 s->len = p - s->buffer;
448 return 1;
449 }
450 } else {
451 s->buffer[s->len++] = '?';
452 return 1;
453 }
454 378
455 return 0; 379 cnt -= ret;
456}
457 380
458static void 381 s->readpos += cnt;
459trace_seq_reset(struct trace_seq *s) 382 return cnt;
460{
461 s->len = 0;
462 s->readpos = 0;
463} 383}
464 384
465ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) 385ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
466{ 386{
467 int len; 387 int len;
468 int ret; 388 void *ret;
469 389
470 if (s->len <= s->readpos) 390 if (s->len <= s->readpos)
471 return -EBUSY; 391 return -EBUSY;
@@ -473,11 +393,11 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
473 len = s->len - s->readpos; 393 len = s->len - s->readpos;
474 if (cnt > len) 394 if (cnt > len)
475 cnt = len; 395 cnt = len;
476 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); 396 ret = memcpy(buf, s->buffer + s->readpos, cnt);
477 if (ret) 397 if (!ret)
478 return -EFAULT; 398 return -EFAULT;
479 399
480 s->readpos += len; 400 s->readpos += cnt;
481 return cnt; 401 return cnt;
482} 402}
483 403
@@ -489,7 +409,7 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
489 s->buffer[len] = 0; 409 s->buffer[len] = 0;
490 seq_puts(m, s->buffer); 410 seq_puts(m, s->buffer);
491 411
492 trace_seq_reset(s); 412 trace_seq_init(s);
493} 413}
494 414
495/** 415/**
@@ -543,7 +463,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
543 463
544 ftrace_enable_cpu(); 464 ftrace_enable_cpu();
545 465
546 WARN_ON_ONCE(ret); 466 WARN_ON_ONCE(ret && ret != -EAGAIN);
547 467
548 __update_max_tr(tr, tsk, cpu); 468 __update_max_tr(tr, tsk, cpu);
549 __raw_spin_unlock(&ftrace_max_lock); 469 __raw_spin_unlock(&ftrace_max_lock);
@@ -556,6 +476,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
556 * Register a new plugin tracer. 476 * Register a new plugin tracer.
557 */ 477 */
558int register_tracer(struct tracer *type) 478int register_tracer(struct tracer *type)
479__releases(kernel_lock)
480__acquires(kernel_lock)
559{ 481{
560 struct tracer *t; 482 struct tracer *t;
561 int len; 483 int len;
@@ -594,9 +516,12 @@ int register_tracer(struct tracer *type)
594 else 516 else
595 if (!type->flags->opts) 517 if (!type->flags->opts)
596 type->flags->opts = dummy_tracer_opt; 518 type->flags->opts = dummy_tracer_opt;
519 if (!type->wait_pipe)
520 type->wait_pipe = default_wait_pipe;
521
597 522
598#ifdef CONFIG_FTRACE_STARTUP_TEST 523#ifdef CONFIG_FTRACE_STARTUP_TEST
599 if (type->selftest) { 524 if (type->selftest && !tracing_selftest_disabled) {
600 struct tracer *saved_tracer = current_trace; 525 struct tracer *saved_tracer = current_trace;
601 struct trace_array *tr = &global_trace; 526 struct trace_array *tr = &global_trace;
602 int i; 527 int i;
@@ -638,8 +563,26 @@ int register_tracer(struct tracer *type)
638 out: 563 out:
639 tracing_selftest_running = false; 564 tracing_selftest_running = false;
640 mutex_unlock(&trace_types_lock); 565 mutex_unlock(&trace_types_lock);
641 lock_kernel();
642 566
567 if (ret || !default_bootup_tracer)
568 goto out_unlock;
569
570 if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
571 goto out_unlock;
572
573 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
574 /* Do we want this tracer to start on bootup? */
575 tracing_set_tracer(type->name);
576 default_bootup_tracer = NULL;
577 /* disable other selftests, since this will break it. */
578 tracing_selftest_disabled = 1;
579#ifdef CONFIG_FTRACE_STARTUP_TEST
580 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
581 type->name);
582#endif
583
584 out_unlock:
585 lock_kernel();
643 return ret; 586 return ret;
644} 587}
645 588
@@ -658,6 +601,15 @@ void unregister_tracer(struct tracer *type)
658 601
659 found: 602 found:
660 *t = (*t)->next; 603 *t = (*t)->next;
604
605 if (type == current_trace && tracer_enabled) {
606 tracer_enabled = 0;
607 tracing_stop();
608 if (current_trace->stop)
609 current_trace->stop(&global_trace);
610 current_trace = &nop_trace;
611 }
612
661 if (strlen(type->name) != max_tracer_type_len) 613 if (strlen(type->name) != max_tracer_type_len)
662 goto out; 614 goto out;
663 615
@@ -689,19 +641,20 @@ void tracing_reset_online_cpus(struct trace_array *tr)
689} 641}
690 642
691#define SAVED_CMDLINES 128 643#define SAVED_CMDLINES 128
644#define NO_CMDLINE_MAP UINT_MAX
692static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; 645static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
693static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 646static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
694static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 647static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
695static int cmdline_idx; 648static int cmdline_idx;
696static DEFINE_SPINLOCK(trace_cmdline_lock); 649static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
697 650
698/* temporary disable recording */ 651/* temporary disable recording */
699atomic_t trace_record_cmdline_disabled __read_mostly; 652static atomic_t trace_record_cmdline_disabled __read_mostly;
700 653
701static void trace_init_cmdlines(void) 654static void trace_init_cmdlines(void)
702{ 655{
703 memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline)); 656 memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
704 memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid)); 657 memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
705 cmdline_idx = 0; 658 cmdline_idx = 0;
706} 659}
707 660
@@ -738,13 +691,12 @@ void tracing_start(void)
738 return; 691 return;
739 692
740 spin_lock_irqsave(&tracing_start_lock, flags); 693 spin_lock_irqsave(&tracing_start_lock, flags);
741 if (--trace_stop_count) 694 if (--trace_stop_count) {
742 goto out; 695 if (trace_stop_count < 0) {
743 696 /* Someone screwed up their debugging */
744 if (trace_stop_count < 0) { 697 WARN_ON_ONCE(1);
745 /* Someone screwed up their debugging */ 698 trace_stop_count = 0;
746 WARN_ON_ONCE(1); 699 }
747 trace_stop_count = 0;
748 goto out; 700 goto out;
749 } 701 }
750 702
@@ -794,8 +746,7 @@ void trace_stop_cmdline_recording(void);
794 746
795static void trace_save_cmdline(struct task_struct *tsk) 747static void trace_save_cmdline(struct task_struct *tsk)
796{ 748{
797 unsigned map; 749 unsigned pid, idx;
798 unsigned idx;
799 750
800 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) 751 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
801 return; 752 return;
@@ -806,17 +757,24 @@ static void trace_save_cmdline(struct task_struct *tsk)
806 * nor do we want to disable interrupts, 757 * nor do we want to disable interrupts,
807 * so if we miss here, then better luck next time. 758 * so if we miss here, then better luck next time.
808 */ 759 */
809 if (!spin_trylock(&trace_cmdline_lock)) 760 if (!__raw_spin_trylock(&trace_cmdline_lock))
810 return; 761 return;
811 762
812 idx = map_pid_to_cmdline[tsk->pid]; 763 idx = map_pid_to_cmdline[tsk->pid];
813 if (idx >= SAVED_CMDLINES) { 764 if (idx == NO_CMDLINE_MAP) {
814 idx = (cmdline_idx + 1) % SAVED_CMDLINES; 765 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
815 766
816 map = map_cmdline_to_pid[idx]; 767 /*
817 if (map <= PID_MAX_DEFAULT) 768 * Check whether the cmdline buffer at idx has a pid
818 map_pid_to_cmdline[map] = (unsigned)-1; 769 * mapped. We are going to overwrite that entry so we
770 * need to clear the map_pid_to_cmdline. Otherwise we
771 * would read the new comm for the old pid.
772 */
773 pid = map_cmdline_to_pid[idx];
774 if (pid != NO_CMDLINE_MAP)
775 map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
819 776
777 map_cmdline_to_pid[idx] = tsk->pid;
820 map_pid_to_cmdline[tsk->pid] = idx; 778 map_pid_to_cmdline[tsk->pid] = idx;
821 779
822 cmdline_idx = idx; 780 cmdline_idx = idx;
@@ -824,33 +782,37 @@ static void trace_save_cmdline(struct task_struct *tsk)
824 782
825 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 783 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
826 784
827 spin_unlock(&trace_cmdline_lock); 785 __raw_spin_unlock(&trace_cmdline_lock);
828} 786}
829 787
830char *trace_find_cmdline(int pid) 788void trace_find_cmdline(int pid, char comm[])
831{ 789{
832 char *cmdline = "<...>";
833 unsigned map; 790 unsigned map;
834 791
835 if (!pid) 792 if (!pid) {
836 return "<idle>"; 793 strcpy(comm, "<idle>");
794 return;
795 }
837 796
838 if (pid > PID_MAX_DEFAULT) 797 if (pid > PID_MAX_DEFAULT) {
839 goto out; 798 strcpy(comm, "<...>");
799 return;
800 }
840 801
802 __raw_spin_lock(&trace_cmdline_lock);
841 map = map_pid_to_cmdline[pid]; 803 map = map_pid_to_cmdline[pid];
842 if (map >= SAVED_CMDLINES) 804 if (map != NO_CMDLINE_MAP)
843 goto out; 805 strcpy(comm, saved_cmdlines[map]);
844 806 else
845 cmdline = saved_cmdlines[map]; 807 strcpy(comm, "<...>");
846 808
847 out: 809 __raw_spin_unlock(&trace_cmdline_lock);
848 return cmdline;
849} 810}
850 811
851void tracing_record_cmdline(struct task_struct *tsk) 812void tracing_record_cmdline(struct task_struct *tsk)
852{ 813{
853 if (atomic_read(&trace_record_cmdline_disabled)) 814 if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
815 !tracing_is_on())
854 return; 816 return;
855 817
856 trace_save_cmdline(tsk); 818 trace_save_cmdline(tsk);
@@ -864,7 +826,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
864 826
865 entry->preempt_count = pc & 0xff; 827 entry->preempt_count = pc & 0xff;
866 entry->pid = (tsk) ? tsk->pid : 0; 828 entry->pid = (tsk) ? tsk->pid : 0;
867 entry->tgid = (tsk) ? tsk->tgid : 0; 829 entry->tgid = (tsk) ? tsk->tgid : 0;
868 entry->flags = 830 entry->flags =
869#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 831#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
870 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 832 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -876,78 +838,114 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
876 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 838 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
877} 839}
878 840
841struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
842 unsigned char type,
843 unsigned long len,
844 unsigned long flags, int pc)
845{
846 struct ring_buffer_event *event;
847
848 event = ring_buffer_lock_reserve(tr->buffer, len);
849 if (event != NULL) {
850 struct trace_entry *ent = ring_buffer_event_data(event);
851
852 tracing_generic_entry_update(ent, flags, pc);
853 ent->type = type;
854 }
855
856 return event;
857}
858static void ftrace_trace_stack(struct trace_array *tr,
859 unsigned long flags, int skip, int pc);
860static void ftrace_trace_userstack(struct trace_array *tr,
861 unsigned long flags, int pc);
862
863void trace_buffer_unlock_commit(struct trace_array *tr,
864 struct ring_buffer_event *event,
865 unsigned long flags, int pc)
866{
867 ring_buffer_unlock_commit(tr->buffer, event);
868
869 ftrace_trace_stack(tr, flags, 6, pc);
870 ftrace_trace_userstack(tr, flags, pc);
871 trace_wake_up();
872}
873
874struct ring_buffer_event *
875trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
876 unsigned long flags, int pc)
877{
878 return trace_buffer_lock_reserve(&global_trace,
879 type, len, flags, pc);
880}
881
882void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
883 unsigned long flags, int pc)
884{
885 return trace_buffer_unlock_commit(&global_trace, event, flags, pc);
886}
887
879void 888void
880trace_function(struct trace_array *tr, struct trace_array_cpu *data, 889trace_function(struct trace_array *tr,
881 unsigned long ip, unsigned long parent_ip, unsigned long flags, 890 unsigned long ip, unsigned long parent_ip, unsigned long flags,
882 int pc) 891 int pc)
883{ 892{
884 struct ring_buffer_event *event; 893 struct ring_buffer_event *event;
885 struct ftrace_entry *entry; 894 struct ftrace_entry *entry;
886 unsigned long irq_flags;
887 895
888 /* If we are reading the ring buffer, don't trace */ 896 /* If we are reading the ring buffer, don't trace */
889 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 897 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
890 return; 898 return;
891 899
892 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 900 event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
893 &irq_flags); 901 flags, pc);
894 if (!event) 902 if (!event)
895 return; 903 return;
896 entry = ring_buffer_event_data(event); 904 entry = ring_buffer_event_data(event);
897 tracing_generic_entry_update(&entry->ent, flags, pc);
898 entry->ent.type = TRACE_FN;
899 entry->ip = ip; 905 entry->ip = ip;
900 entry->parent_ip = parent_ip; 906 entry->parent_ip = parent_ip;
901 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 907 ring_buffer_unlock_commit(tr->buffer, event);
902} 908}
903 909
904#ifdef CONFIG_FUNCTION_GRAPH_TRACER 910#ifdef CONFIG_FUNCTION_GRAPH_TRACER
905static void __trace_graph_entry(struct trace_array *tr, 911static void __trace_graph_entry(struct trace_array *tr,
906 struct trace_array_cpu *data,
907 struct ftrace_graph_ent *trace, 912 struct ftrace_graph_ent *trace,
908 unsigned long flags, 913 unsigned long flags,
909 int pc) 914 int pc)
910{ 915{
911 struct ring_buffer_event *event; 916 struct ring_buffer_event *event;
912 struct ftrace_graph_ent_entry *entry; 917 struct ftrace_graph_ent_entry *entry;
913 unsigned long irq_flags;
914 918
915 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 919 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
916 return; 920 return;
917 921
918 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), 922 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
919 &irq_flags); 923 sizeof(*entry), flags, pc);
920 if (!event) 924 if (!event)
921 return; 925 return;
922 entry = ring_buffer_event_data(event); 926 entry = ring_buffer_event_data(event);
923 tracing_generic_entry_update(&entry->ent, flags, pc);
924 entry->ent.type = TRACE_GRAPH_ENT;
925 entry->graph_ent = *trace; 927 entry->graph_ent = *trace;
926 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); 928 ring_buffer_unlock_commit(global_trace.buffer, event);
927} 929}
928 930
929static void __trace_graph_return(struct trace_array *tr, 931static void __trace_graph_return(struct trace_array *tr,
930 struct trace_array_cpu *data,
931 struct ftrace_graph_ret *trace, 932 struct ftrace_graph_ret *trace,
932 unsigned long flags, 933 unsigned long flags,
933 int pc) 934 int pc)
934{ 935{
935 struct ring_buffer_event *event; 936 struct ring_buffer_event *event;
936 struct ftrace_graph_ret_entry *entry; 937 struct ftrace_graph_ret_entry *entry;
937 unsigned long irq_flags;
938 938
939 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 939 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
940 return; 940 return;
941 941
942 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), 942 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
943 &irq_flags); 943 sizeof(*entry), flags, pc);
944 if (!event) 944 if (!event)
945 return; 945 return;
946 entry = ring_buffer_event_data(event); 946 entry = ring_buffer_event_data(event);
947 tracing_generic_entry_update(&entry->ent, flags, pc);
948 entry->ent.type = TRACE_GRAPH_RET;
949 entry->ret = *trace; 947 entry->ret = *trace;
950 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); 948 ring_buffer_unlock_commit(global_trace.buffer, event);
951} 949}
952#endif 950#endif
953 951
@@ -957,31 +955,23 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
957 int pc) 955 int pc)
958{ 956{
959 if (likely(!atomic_read(&data->disabled))) 957 if (likely(!atomic_read(&data->disabled)))
960 trace_function(tr, data, ip, parent_ip, flags, pc); 958 trace_function(tr, ip, parent_ip, flags, pc);
961} 959}
962 960
963static void ftrace_trace_stack(struct trace_array *tr, 961static void __ftrace_trace_stack(struct trace_array *tr,
964 struct trace_array_cpu *data, 962 unsigned long flags,
965 unsigned long flags, 963 int skip, int pc)
966 int skip, int pc)
967{ 964{
968#ifdef CONFIG_STACKTRACE 965#ifdef CONFIG_STACKTRACE
969 struct ring_buffer_event *event; 966 struct ring_buffer_event *event;
970 struct stack_entry *entry; 967 struct stack_entry *entry;
971 struct stack_trace trace; 968 struct stack_trace trace;
972 unsigned long irq_flags;
973
974 if (!(trace_flags & TRACE_ITER_STACKTRACE))
975 return;
976 969
977 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 970 event = trace_buffer_lock_reserve(tr, TRACE_STACK,
978 &irq_flags); 971 sizeof(*entry), flags, pc);
979 if (!event) 972 if (!event)
980 return; 973 return;
981 entry = ring_buffer_event_data(event); 974 entry = ring_buffer_event_data(event);
982 tracing_generic_entry_update(&entry->ent, flags, pc);
983 entry->ent.type = TRACE_STACK;
984
985 memset(&entry->caller, 0, sizeof(entry->caller)); 975 memset(&entry->caller, 0, sizeof(entry->caller));
986 976
987 trace.nr_entries = 0; 977 trace.nr_entries = 0;
@@ -990,38 +980,43 @@ static void ftrace_trace_stack(struct trace_array *tr,
990 trace.entries = entry->caller; 980 trace.entries = entry->caller;
991 981
992 save_stack_trace(&trace); 982 save_stack_trace(&trace);
993 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 983 ring_buffer_unlock_commit(tr->buffer, event);
994#endif 984#endif
995} 985}
996 986
987static void ftrace_trace_stack(struct trace_array *tr,
988 unsigned long flags,
989 int skip, int pc)
990{
991 if (!(trace_flags & TRACE_ITER_STACKTRACE))
992 return;
993
994 __ftrace_trace_stack(tr, flags, skip, pc);
995}
996
997void __trace_stack(struct trace_array *tr, 997void __trace_stack(struct trace_array *tr,
998 struct trace_array_cpu *data,
999 unsigned long flags, 998 unsigned long flags,
1000 int skip) 999 int skip, int pc)
1001{ 1000{
1002 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 1001 __ftrace_trace_stack(tr, flags, skip, pc);
1003} 1002}
1004 1003
1005static void ftrace_trace_userstack(struct trace_array *tr, 1004static void ftrace_trace_userstack(struct trace_array *tr,
1006 struct trace_array_cpu *data, 1005 unsigned long flags, int pc)
1007 unsigned long flags, int pc)
1008{ 1006{
1009#ifdef CONFIG_STACKTRACE 1007#ifdef CONFIG_STACKTRACE
1010 struct ring_buffer_event *event; 1008 struct ring_buffer_event *event;
1011 struct userstack_entry *entry; 1009 struct userstack_entry *entry;
1012 struct stack_trace trace; 1010 struct stack_trace trace;
1013 unsigned long irq_flags;
1014 1011
1015 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1012 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1016 return; 1013 return;
1017 1014
1018 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1015 event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
1019 &irq_flags); 1016 sizeof(*entry), flags, pc);
1020 if (!event) 1017 if (!event)
1021 return; 1018 return;
1022 entry = ring_buffer_event_data(event); 1019 entry = ring_buffer_event_data(event);
1023 tracing_generic_entry_update(&entry->ent, flags, pc);
1024 entry->ent.type = TRACE_USER_STACK;
1025 1020
1026 memset(&entry->caller, 0, sizeof(entry->caller)); 1021 memset(&entry->caller, 0, sizeof(entry->caller));
1027 1022
@@ -1031,70 +1026,58 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1031 trace.entries = entry->caller; 1026 trace.entries = entry->caller;
1032 1027
1033 save_stack_trace_user(&trace); 1028 save_stack_trace_user(&trace);
1034 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1029 ring_buffer_unlock_commit(tr->buffer, event);
1035#endif 1030#endif
1036} 1031}
1037 1032
1038void __trace_userstack(struct trace_array *tr, 1033#ifdef UNUSED
1039 struct trace_array_cpu *data, 1034static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1040 unsigned long flags)
1041{ 1035{
1042 ftrace_trace_userstack(tr, data, flags, preempt_count()); 1036 ftrace_trace_userstack(tr, flags, preempt_count());
1043} 1037}
1038#endif /* UNUSED */
1044 1039
1045static void 1040static void
1046ftrace_trace_special(void *__tr, void *__data, 1041ftrace_trace_special(void *__tr,
1047 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1042 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1048 int pc) 1043 int pc)
1049{ 1044{
1050 struct ring_buffer_event *event; 1045 struct ring_buffer_event *event;
1051 struct trace_array_cpu *data = __data;
1052 struct trace_array *tr = __tr; 1046 struct trace_array *tr = __tr;
1053 struct special_entry *entry; 1047 struct special_entry *entry;
1054 unsigned long irq_flags;
1055 1048
1056 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1049 event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
1057 &irq_flags); 1050 sizeof(*entry), 0, pc);
1058 if (!event) 1051 if (!event)
1059 return; 1052 return;
1060 entry = ring_buffer_event_data(event); 1053 entry = ring_buffer_event_data(event);
1061 tracing_generic_entry_update(&entry->ent, 0, pc);
1062 entry->ent.type = TRACE_SPECIAL;
1063 entry->arg1 = arg1; 1054 entry->arg1 = arg1;
1064 entry->arg2 = arg2; 1055 entry->arg2 = arg2;
1065 entry->arg3 = arg3; 1056 entry->arg3 = arg3;
1066 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1057 trace_buffer_unlock_commit(tr, event, 0, pc);
1067 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1068 ftrace_trace_userstack(tr, data, irq_flags, pc);
1069
1070 trace_wake_up();
1071} 1058}
1072 1059
1073void 1060void
1074__trace_special(void *__tr, void *__data, 1061__trace_special(void *__tr, void *__data,
1075 unsigned long arg1, unsigned long arg2, unsigned long arg3) 1062 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1076{ 1063{
1077 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); 1064 ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1078} 1065}
1079 1066
1080void 1067void
1081tracing_sched_switch_trace(struct trace_array *tr, 1068tracing_sched_switch_trace(struct trace_array *tr,
1082 struct trace_array_cpu *data,
1083 struct task_struct *prev, 1069 struct task_struct *prev,
1084 struct task_struct *next, 1070 struct task_struct *next,
1085 unsigned long flags, int pc) 1071 unsigned long flags, int pc)
1086{ 1072{
1087 struct ring_buffer_event *event; 1073 struct ring_buffer_event *event;
1088 struct ctx_switch_entry *entry; 1074 struct ctx_switch_entry *entry;
1089 unsigned long irq_flags;
1090 1075
1091 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1076 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1092 &irq_flags); 1077 sizeof(*entry), flags, pc);
1093 if (!event) 1078 if (!event)
1094 return; 1079 return;
1095 entry = ring_buffer_event_data(event); 1080 entry = ring_buffer_event_data(event);
1096 tracing_generic_entry_update(&entry->ent, flags, pc);
1097 entry->ent.type = TRACE_CTX;
1098 entry->prev_pid = prev->pid; 1081 entry->prev_pid = prev->pid;
1099 entry->prev_prio = prev->prio; 1082 entry->prev_prio = prev->prio;
1100 entry->prev_state = prev->state; 1083 entry->prev_state = prev->state;
@@ -1102,29 +1085,23 @@ tracing_sched_switch_trace(struct trace_array *tr,
1102 entry->next_prio = next->prio; 1085 entry->next_prio = next->prio;
1103 entry->next_state = next->state; 1086 entry->next_state = next->state;
1104 entry->next_cpu = task_cpu(next); 1087 entry->next_cpu = task_cpu(next);
1105 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1088 trace_buffer_unlock_commit(tr, event, flags, pc);
1106 ftrace_trace_stack(tr, data, flags, 5, pc);
1107 ftrace_trace_userstack(tr, data, flags, pc);
1108} 1089}
1109 1090
1110void 1091void
1111tracing_sched_wakeup_trace(struct trace_array *tr, 1092tracing_sched_wakeup_trace(struct trace_array *tr,
1112 struct trace_array_cpu *data,
1113 struct task_struct *wakee, 1093 struct task_struct *wakee,
1114 struct task_struct *curr, 1094 struct task_struct *curr,
1115 unsigned long flags, int pc) 1095 unsigned long flags, int pc)
1116{ 1096{
1117 struct ring_buffer_event *event; 1097 struct ring_buffer_event *event;
1118 struct ctx_switch_entry *entry; 1098 struct ctx_switch_entry *entry;
1119 unsigned long irq_flags;
1120 1099
1121 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1100 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1122 &irq_flags); 1101 sizeof(*entry), flags, pc);
1123 if (!event) 1102 if (!event)
1124 return; 1103 return;
1125 entry = ring_buffer_event_data(event); 1104 entry = ring_buffer_event_data(event);
1126 tracing_generic_entry_update(&entry->ent, flags, pc);
1127 entry->ent.type = TRACE_WAKE;
1128 entry->prev_pid = curr->pid; 1105 entry->prev_pid = curr->pid;
1129 entry->prev_prio = curr->prio; 1106 entry->prev_prio = curr->prio;
1130 entry->prev_state = curr->state; 1107 entry->prev_state = curr->state;
@@ -1132,11 +1109,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1132 entry->next_prio = wakee->prio; 1109 entry->next_prio = wakee->prio;
1133 entry->next_state = wakee->state; 1110 entry->next_state = wakee->state;
1134 entry->next_cpu = task_cpu(wakee); 1111 entry->next_cpu = task_cpu(wakee);
1135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1136 ftrace_trace_stack(tr, data, flags, 6, pc);
1137 ftrace_trace_userstack(tr, data, flags, pc);
1138 1112
1139 trace_wake_up(); 1113 ring_buffer_unlock_commit(tr->buffer, event);
1114 ftrace_trace_stack(tr, flags, 6, pc);
1115 ftrace_trace_userstack(tr, flags, pc);
1140} 1116}
1141 1117
1142void 1118void
@@ -1157,66 +1133,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1157 data = tr->data[cpu]; 1133 data = tr->data[cpu];
1158 1134
1159 if (likely(atomic_inc_return(&data->disabled) == 1)) 1135 if (likely(atomic_inc_return(&data->disabled) == 1))
1160 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1136 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1161
1162 atomic_dec(&data->disabled);
1163 local_irq_restore(flags);
1164}
1165
1166#ifdef CONFIG_FUNCTION_TRACER
1167static void
1168function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
1169{
1170 struct trace_array *tr = &global_trace;
1171 struct trace_array_cpu *data;
1172 unsigned long flags;
1173 long disabled;
1174 int cpu, resched;
1175 int pc;
1176
1177 if (unlikely(!ftrace_function_enabled))
1178 return;
1179
1180 pc = preempt_count();
1181 resched = ftrace_preempt_disable();
1182 local_save_flags(flags);
1183 cpu = raw_smp_processor_id();
1184 data = tr->data[cpu];
1185 disabled = atomic_inc_return(&data->disabled);
1186
1187 if (likely(disabled == 1))
1188 trace_function(tr, data, ip, parent_ip, flags, pc);
1189
1190 atomic_dec(&data->disabled);
1191 ftrace_preempt_enable(resched);
1192}
1193
1194static void
1195function_trace_call(unsigned long ip, unsigned long parent_ip)
1196{
1197 struct trace_array *tr = &global_trace;
1198 struct trace_array_cpu *data;
1199 unsigned long flags;
1200 long disabled;
1201 int cpu;
1202 int pc;
1203
1204 if (unlikely(!ftrace_function_enabled))
1205 return;
1206
1207 /*
1208 * Need to use raw, since this must be called before the
1209 * recursive protection is performed.
1210 */
1211 local_irq_save(flags);
1212 cpu = raw_smp_processor_id();
1213 data = tr->data[cpu];
1214 disabled = atomic_inc_return(&data->disabled);
1215
1216 if (likely(disabled == 1)) {
1217 pc = preempt_count();
1218 trace_function(tr, data, ip, parent_ip, flags, pc);
1219 }
1220 1137
1221 atomic_dec(&data->disabled); 1138 atomic_dec(&data->disabled);
1222 local_irq_restore(flags); 1139 local_irq_restore(flags);
@@ -1244,7 +1161,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1244 disabled = atomic_inc_return(&data->disabled); 1161 disabled = atomic_inc_return(&data->disabled);
1245 if (likely(disabled == 1)) { 1162 if (likely(disabled == 1)) {
1246 pc = preempt_count(); 1163 pc = preempt_count();
1247 __trace_graph_entry(tr, data, trace, flags, pc); 1164 __trace_graph_entry(tr, trace, flags, pc);
1248 } 1165 }
1249 /* Only do the atomic if it is not already set */ 1166 /* Only do the atomic if it is not already set */
1250 if (!test_tsk_trace_graph(current)) 1167 if (!test_tsk_trace_graph(current))
@@ -1270,7 +1187,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1270 disabled = atomic_inc_return(&data->disabled); 1187 disabled = atomic_inc_return(&data->disabled);
1271 if (likely(disabled == 1)) { 1188 if (likely(disabled == 1)) {
1272 pc = preempt_count(); 1189 pc = preempt_count();
1273 __trace_graph_return(tr, data, trace, flags, pc); 1190 __trace_graph_return(tr, trace, flags, pc);
1274 } 1191 }
1275 if (!trace->depth) 1192 if (!trace->depth)
1276 clear_tsk_trace_graph(current); 1193 clear_tsk_trace_graph(current);
@@ -1279,30 +1196,124 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1279} 1196}
1280#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 1197#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1281 1198
1282static struct ftrace_ops trace_ops __read_mostly =
1283{
1284 .func = function_trace_call,
1285};
1286 1199
1287void tracing_start_function_trace(void) 1200/**
1201 * trace_vbprintk - write binary msg to tracing buffer
1202 *
1203 */
1204int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args)
1288{ 1205{
1289 ftrace_function_enabled = 0; 1206 static raw_spinlock_t trace_buf_lock =
1207 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
1208 static u32 trace_buf[TRACE_BUF_SIZE];
1290 1209
1291 if (trace_flags & TRACE_ITER_PREEMPTONLY) 1210 struct ring_buffer_event *event;
1292 trace_ops.func = function_trace_call_preempt_only; 1211 struct trace_array *tr = &global_trace;
1293 else 1212 struct trace_array_cpu *data;
1294 trace_ops.func = function_trace_call; 1213 struct bprint_entry *entry;
1214 unsigned long flags;
1215 int resched;
1216 int cpu, len = 0, size, pc;
1217
1218 if (unlikely(tracing_selftest_running || tracing_disabled))
1219 return 0;
1220
1221 /* Don't pollute graph traces with trace_vprintk internals */
1222 pause_graph_tracing();
1223
1224 pc = preempt_count();
1225 resched = ftrace_preempt_disable();
1226 cpu = raw_smp_processor_id();
1227 data = tr->data[cpu];
1228
1229 if (unlikely(atomic_read(&data->disabled)))
1230 goto out;
1295 1231
1296 register_ftrace_function(&trace_ops); 1232 /* Lockdep uses trace_printk for lock tracing */
1297 ftrace_function_enabled = 1; 1233 local_irq_save(flags);
1234 __raw_spin_lock(&trace_buf_lock);
1235 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1236
1237 if (len > TRACE_BUF_SIZE || len < 0)
1238 goto out_unlock;
1239
1240 size = sizeof(*entry) + sizeof(u32) * len;
1241 event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc);
1242 if (!event)
1243 goto out_unlock;
1244 entry = ring_buffer_event_data(event);
1245 entry->ip = ip;
1246 entry->depth = depth;
1247 entry->fmt = fmt;
1248
1249 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1250 ring_buffer_unlock_commit(tr->buffer, event);
1251
1252out_unlock:
1253 __raw_spin_unlock(&trace_buf_lock);
1254 local_irq_restore(flags);
1255
1256out:
1257 ftrace_preempt_enable(resched);
1258 unpause_graph_tracing();
1259
1260 return len;
1298} 1261}
1262EXPORT_SYMBOL_GPL(trace_vbprintk);
1299 1263
1300void tracing_stop_function_trace(void) 1264int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
1301{ 1265{
1302 ftrace_function_enabled = 0; 1266 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1303 unregister_ftrace_function(&trace_ops); 1267 static char trace_buf[TRACE_BUF_SIZE];
1268
1269 struct ring_buffer_event *event;
1270 struct trace_array *tr = &global_trace;
1271 struct trace_array_cpu *data;
1272 int cpu, len = 0, size, pc;
1273 struct print_entry *entry;
1274 unsigned long irq_flags;
1275
1276 if (tracing_disabled || tracing_selftest_running)
1277 return 0;
1278
1279 pc = preempt_count();
1280 preempt_disable_notrace();
1281 cpu = raw_smp_processor_id();
1282 data = tr->data[cpu];
1283
1284 if (unlikely(atomic_read(&data->disabled)))
1285 goto out;
1286
1287 pause_graph_tracing();
1288 raw_local_irq_save(irq_flags);
1289 __raw_spin_lock(&trace_buf_lock);
1290 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1291
1292 len = min(len, TRACE_BUF_SIZE-1);
1293 trace_buf[len] = 0;
1294
1295 size = sizeof(*entry) + len + 1;
1296 event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
1297 if (!event)
1298 goto out_unlock;
1299 entry = ring_buffer_event_data(event);
1300 entry->ip = ip;
1301 entry->depth = depth;
1302
1303 memcpy(&entry->buf, trace_buf, len);
1304 entry->buf[len] = 0;
1305 ring_buffer_unlock_commit(tr->buffer, event);
1306
1307 out_unlock:
1308 __raw_spin_unlock(&trace_buf_lock);
1309 raw_local_irq_restore(irq_flags);
1310 unpause_graph_tracing();
1311 out:
1312 preempt_enable_notrace();
1313
1314 return len;
1304} 1315}
1305#endif 1316EXPORT_SYMBOL_GPL(trace_vprintk);
1306 1317
1307enum trace_file_type { 1318enum trace_file_type {
1308 TRACE_FILE_LAT_FMT = 1, 1319 TRACE_FILE_LAT_FMT = 1,
@@ -1345,10 +1356,25 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1345{ 1356{
1346 struct ring_buffer *buffer = iter->tr->buffer; 1357 struct ring_buffer *buffer = iter->tr->buffer;
1347 struct trace_entry *ent, *next = NULL; 1358 struct trace_entry *ent, *next = NULL;
1359 int cpu_file = iter->cpu_file;
1348 u64 next_ts = 0, ts; 1360 u64 next_ts = 0, ts;
1349 int next_cpu = -1; 1361 int next_cpu = -1;
1350 int cpu; 1362 int cpu;
1351 1363
1364 /*
1365 * If we are in a per_cpu trace file, don't bother by iterating over
1366 * all cpu and peek directly.
1367 */
1368 if (cpu_file > TRACE_PIPE_ALL_CPU) {
1369 if (ring_buffer_empty_cpu(buffer, cpu_file))
1370 return NULL;
1371 ent = peek_next_entry(iter, cpu_file, ent_ts);
1372 if (ent_cpu)
1373 *ent_cpu = cpu_file;
1374
1375 return ent;
1376 }
1377
1352 for_each_tracing_cpu(cpu) { 1378 for_each_tracing_cpu(cpu) {
1353 1379
1354 if (ring_buffer_empty_cpu(buffer, cpu)) 1380 if (ring_buffer_empty_cpu(buffer, cpu))
@@ -1376,8 +1402,8 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1376} 1402}
1377 1403
1378/* Find the next real entry, without updating the iterator itself */ 1404/* Find the next real entry, without updating the iterator itself */
1379static struct trace_entry * 1405struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1380find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1406 int *ent_cpu, u64 *ent_ts)
1381{ 1407{
1382 return __find_next_entry(iter, ent_cpu, ent_ts); 1408 return __find_next_entry(iter, ent_cpu, ent_ts);
1383} 1409}
@@ -1426,19 +1452,32 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1426 return ent; 1452 return ent;
1427} 1453}
1428 1454
1455/*
1456 * No necessary locking here. The worst thing which can
1457 * happen is loosing events consumed at the same time
1458 * by a trace_pipe reader.
1459 * Other than that, we don't risk to crash the ring buffer
1460 * because it serializes the readers.
1461 *
1462 * The current tracer is copied to avoid a global locking
1463 * all around.
1464 */
1429static void *s_start(struct seq_file *m, loff_t *pos) 1465static void *s_start(struct seq_file *m, loff_t *pos)
1430{ 1466{
1431 struct trace_iterator *iter = m->private; 1467 struct trace_iterator *iter = m->private;
1468 static struct tracer *old_tracer;
1469 int cpu_file = iter->cpu_file;
1432 void *p = NULL; 1470 void *p = NULL;
1433 loff_t l = 0; 1471 loff_t l = 0;
1434 int cpu; 1472 int cpu;
1435 1473
1474 /* copy the tracer to avoid using a global lock all around */
1436 mutex_lock(&trace_types_lock); 1475 mutex_lock(&trace_types_lock);
1437 1476 if (unlikely(old_tracer != current_trace && current_trace)) {
1438 if (!current_trace || current_trace != iter->trace) { 1477 old_tracer = current_trace;
1439 mutex_unlock(&trace_types_lock); 1478 *iter->trace = *current_trace;
1440 return NULL;
1441 } 1479 }
1480 mutex_unlock(&trace_types_lock);
1442 1481
1443 atomic_inc(&trace_record_cmdline_disabled); 1482 atomic_inc(&trace_record_cmdline_disabled);
1444 1483
@@ -1449,9 +1488,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1449 1488
1450 ftrace_disable_cpu(); 1489 ftrace_disable_cpu();
1451 1490
1452 for_each_tracing_cpu(cpu) { 1491 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1453 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1492 for_each_tracing_cpu(cpu)
1454 } 1493 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1494 } else
1495 ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
1496
1455 1497
1456 ftrace_enable_cpu(); 1498 ftrace_enable_cpu();
1457 1499
@@ -1469,155 +1511,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1469static void s_stop(struct seq_file *m, void *p) 1511static void s_stop(struct seq_file *m, void *p)
1470{ 1512{
1471 atomic_dec(&trace_record_cmdline_disabled); 1513 atomic_dec(&trace_record_cmdline_disabled);
1472 mutex_unlock(&trace_types_lock);
1473}
1474
1475#ifdef CONFIG_KRETPROBES
1476static inline const char *kretprobed(const char *name)
1477{
1478 static const char tramp_name[] = "kretprobe_trampoline";
1479 int size = sizeof(tramp_name);
1480
1481 if (strncmp(tramp_name, name, size) == 0)
1482 return "[unknown/kretprobe'd]";
1483 return name;
1484}
1485#else
1486static inline const char *kretprobed(const char *name)
1487{
1488 return name;
1489}
1490#endif /* CONFIG_KRETPROBES */
1491
1492static int
1493seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1494{
1495#ifdef CONFIG_KALLSYMS
1496 char str[KSYM_SYMBOL_LEN];
1497 const char *name;
1498
1499 kallsyms_lookup(address, NULL, NULL, NULL, str);
1500
1501 name = kretprobed(str);
1502
1503 return trace_seq_printf(s, fmt, name);
1504#endif
1505 return 1;
1506}
1507
1508static int
1509seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1510 unsigned long address)
1511{
1512#ifdef CONFIG_KALLSYMS
1513 char str[KSYM_SYMBOL_LEN];
1514 const char *name;
1515
1516 sprint_symbol(str, address);
1517 name = kretprobed(str);
1518
1519 return trace_seq_printf(s, fmt, name);
1520#endif
1521 return 1;
1522}
1523
1524#ifndef CONFIG_64BIT
1525# define IP_FMT "%08lx"
1526#else
1527# define IP_FMT "%016lx"
1528#endif
1529
1530int
1531seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1532{
1533 int ret;
1534
1535 if (!ip)
1536 return trace_seq_printf(s, "0");
1537
1538 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1539 ret = seq_print_sym_offset(s, "%s", ip);
1540 else
1541 ret = seq_print_sym_short(s, "%s", ip);
1542
1543 if (!ret)
1544 return 0;
1545
1546 if (sym_flags & TRACE_ITER_SYM_ADDR)
1547 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1548 return ret;
1549}
1550
1551static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1552 unsigned long ip, unsigned long sym_flags)
1553{
1554 struct file *file = NULL;
1555 unsigned long vmstart = 0;
1556 int ret = 1;
1557
1558 if (mm) {
1559 const struct vm_area_struct *vma;
1560
1561 down_read(&mm->mmap_sem);
1562 vma = find_vma(mm, ip);
1563 if (vma) {
1564 file = vma->vm_file;
1565 vmstart = vma->vm_start;
1566 }
1567 if (file) {
1568 ret = trace_seq_path(s, &file->f_path);
1569 if (ret)
1570 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1571 }
1572 up_read(&mm->mmap_sem);
1573 }
1574 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1575 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1576 return ret;
1577}
1578
1579static int
1580seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1581 unsigned long sym_flags)
1582{
1583 struct mm_struct *mm = NULL;
1584 int ret = 1;
1585 unsigned int i;
1586
1587 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1588 struct task_struct *task;
1589 /*
1590 * we do the lookup on the thread group leader,
1591 * since individual threads might have already quit!
1592 */
1593 rcu_read_lock();
1594 task = find_task_by_vpid(entry->ent.tgid);
1595 if (task)
1596 mm = get_task_mm(task);
1597 rcu_read_unlock();
1598 }
1599
1600 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1601 unsigned long ip = entry->caller[i];
1602
1603 if (ip == ULONG_MAX || !ret)
1604 break;
1605 if (i && ret)
1606 ret = trace_seq_puts(s, " <- ");
1607 if (!ip) {
1608 if (ret)
1609 ret = trace_seq_puts(s, "??");
1610 continue;
1611 }
1612 if (!ret)
1613 break;
1614 if (ret)
1615 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1616 }
1617
1618 if (mm)
1619 mmput(mm);
1620 return ret;
1621} 1514}
1622 1515
1623static void print_lat_help_header(struct seq_file *m) 1516static void print_lat_help_header(struct seq_file *m)
@@ -1658,11 +1551,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1658 total = entries + 1551 total = entries +
1659 ring_buffer_overruns(iter->tr->buffer); 1552 ring_buffer_overruns(iter->tr->buffer);
1660 1553
1661 seq_printf(m, "%s latency trace v1.1.5 on %s\n", 1554 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1662 name, UTS_RELEASE); 1555 name, UTS_RELEASE);
1663 seq_puts(m, "-----------------------------------" 1556 seq_puts(m, "# -----------------------------------"
1664 "---------------------------------\n"); 1557 "---------------------------------\n");
1665 seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |" 1558 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
1666 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 1559 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1667 nsecs_to_usecs(data->saved_latency), 1560 nsecs_to_usecs(data->saved_latency),
1668 entries, 1561 entries,
@@ -1684,121 +1577,24 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1684#else 1577#else
1685 seq_puts(m, ")\n"); 1578 seq_puts(m, ")\n");
1686#endif 1579#endif
1687 seq_puts(m, " -----------------\n"); 1580 seq_puts(m, "# -----------------\n");
1688 seq_printf(m, " | task: %.16s-%d " 1581 seq_printf(m, "# | task: %.16s-%d "
1689 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 1582 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1690 data->comm, data->pid, data->uid, data->nice, 1583 data->comm, data->pid, data->uid, data->nice,
1691 data->policy, data->rt_priority); 1584 data->policy, data->rt_priority);
1692 seq_puts(m, " -----------------\n"); 1585 seq_puts(m, "# -----------------\n");
1693 1586
1694 if (data->critical_start) { 1587 if (data->critical_start) {
1695 seq_puts(m, " => started at: "); 1588 seq_puts(m, "# => started at: ");
1696 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 1589 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1697 trace_print_seq(m, &iter->seq); 1590 trace_print_seq(m, &iter->seq);
1698 seq_puts(m, "\n => ended at: "); 1591 seq_puts(m, "\n# => ended at: ");
1699 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 1592 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1700 trace_print_seq(m, &iter->seq); 1593 trace_print_seq(m, &iter->seq);
1701 seq_puts(m, "\n"); 1594 seq_puts(m, "#\n");
1702 }
1703
1704 seq_puts(m, "\n");
1705}
1706
1707static void
1708lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1709{
1710 int hardirq, softirq;
1711 char *comm;
1712
1713 comm = trace_find_cmdline(entry->pid);
1714
1715 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1716 trace_seq_printf(s, "%3d", cpu);
1717 trace_seq_printf(s, "%c%c",
1718 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1719 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1720 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1721
1722 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1723 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1724 if (hardirq && softirq) {
1725 trace_seq_putc(s, 'H');
1726 } else {
1727 if (hardirq) {
1728 trace_seq_putc(s, 'h');
1729 } else {
1730 if (softirq)
1731 trace_seq_putc(s, 's');
1732 else
1733 trace_seq_putc(s, '.');
1734 }
1735 } 1595 }
1736 1596
1737 if (entry->preempt_count) 1597 seq_puts(m, "#\n");
1738 trace_seq_printf(s, "%x", entry->preempt_count);
1739 else
1740 trace_seq_puts(s, ".");
1741}
1742
1743unsigned long preempt_mark_thresh = 100;
1744
1745static void
1746lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1747 unsigned long rel_usecs)
1748{
1749 trace_seq_printf(s, " %4lldus", abs_usecs);
1750 if (rel_usecs > preempt_mark_thresh)
1751 trace_seq_puts(s, "!: ");
1752 else if (rel_usecs > 1)
1753 trace_seq_puts(s, "+: ");
1754 else
1755 trace_seq_puts(s, " : ");
1756}
1757
1758static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1759
1760static int task_state_char(unsigned long state)
1761{
1762 int bit = state ? __ffs(state) + 1 : 0;
1763
1764 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
1765}
1766
1767/*
1768 * The message is supposed to contain an ending newline.
1769 * If the printing stops prematurely, try to add a newline of our own.
1770 */
1771void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1772{
1773 struct trace_entry *ent;
1774 struct trace_field_cont *cont;
1775 bool ok = true;
1776
1777 ent = peek_next_entry(iter, iter->cpu, NULL);
1778 if (!ent || ent->type != TRACE_CONT) {
1779 trace_seq_putc(s, '\n');
1780 return;
1781 }
1782
1783 do {
1784 cont = (struct trace_field_cont *)ent;
1785 if (ok)
1786 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1787
1788 ftrace_disable_cpu();
1789
1790 if (iter->buffer_iter[iter->cpu])
1791 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1792 else
1793 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1794
1795 ftrace_enable_cpu();
1796
1797 ent = peek_next_entry(iter, iter->cpu, NULL);
1798 } while (ent && ent->type == TRACE_CONT);
1799
1800 if (!ok)
1801 trace_seq_putc(s, '\n');
1802} 1598}
1803 1599
1804static void test_cpu_buff_start(struct trace_iterator *iter) 1600static void test_cpu_buff_start(struct trace_iterator *iter)
@@ -1818,453 +1614,105 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1818 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); 1614 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1819} 1615}
1820 1616
1821static enum print_line_t
1822print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1823{
1824 struct trace_seq *s = &iter->seq;
1825 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1826 struct trace_entry *next_entry;
1827 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1828 struct trace_entry *entry = iter->ent;
1829 unsigned long abs_usecs;
1830 unsigned long rel_usecs;
1831 u64 next_ts;
1832 char *comm;
1833 int S, T;
1834 int i;
1835
1836 if (entry->type == TRACE_CONT)
1837 return TRACE_TYPE_HANDLED;
1838
1839 test_cpu_buff_start(iter);
1840
1841 next_entry = find_next_entry(iter, NULL, &next_ts);
1842 if (!next_entry)
1843 next_ts = iter->ts;
1844 rel_usecs = ns2usecs(next_ts - iter->ts);
1845 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1846
1847 if (verbose) {
1848 comm = trace_find_cmdline(entry->pid);
1849 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1850 " %ld.%03ldms (+%ld.%03ldms): ",
1851 comm,
1852 entry->pid, cpu, entry->flags,
1853 entry->preempt_count, trace_idx,
1854 ns2usecs(iter->ts),
1855 abs_usecs/1000,
1856 abs_usecs % 1000, rel_usecs/1000,
1857 rel_usecs % 1000);
1858 } else {
1859 lat_print_generic(s, entry, cpu);
1860 lat_print_timestamp(s, abs_usecs, rel_usecs);
1861 }
1862 switch (entry->type) {
1863 case TRACE_FN: {
1864 struct ftrace_entry *field;
1865
1866 trace_assign_type(field, entry);
1867
1868 seq_print_ip_sym(s, field->ip, sym_flags);
1869 trace_seq_puts(s, " (");
1870 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1871 trace_seq_puts(s, ")\n");
1872 break;
1873 }
1874 case TRACE_CTX:
1875 case TRACE_WAKE: {
1876 struct ctx_switch_entry *field;
1877
1878 trace_assign_type(field, entry);
1879
1880 T = task_state_char(field->next_state);
1881 S = task_state_char(field->prev_state);
1882 comm = trace_find_cmdline(field->next_pid);
1883 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1884 field->prev_pid,
1885 field->prev_prio,
1886 S, entry->type == TRACE_CTX ? "==>" : " +",
1887 field->next_cpu,
1888 field->next_pid,
1889 field->next_prio,
1890 T, comm);
1891 break;
1892 }
1893 case TRACE_SPECIAL: {
1894 struct special_entry *field;
1895
1896 trace_assign_type(field, entry);
1897
1898 trace_seq_printf(s, "# %ld %ld %ld\n",
1899 field->arg1,
1900 field->arg2,
1901 field->arg3);
1902 break;
1903 }
1904 case TRACE_STACK: {
1905 struct stack_entry *field;
1906
1907 trace_assign_type(field, entry);
1908
1909 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1910 if (i)
1911 trace_seq_puts(s, " <= ");
1912 seq_print_ip_sym(s, field->caller[i], sym_flags);
1913 }
1914 trace_seq_puts(s, "\n");
1915 break;
1916 }
1917 case TRACE_PRINT: {
1918 struct print_entry *field;
1919
1920 trace_assign_type(field, entry);
1921
1922 seq_print_ip_sym(s, field->ip, sym_flags);
1923 trace_seq_printf(s, ": %s", field->buf);
1924 if (entry->flags & TRACE_FLAG_CONT)
1925 trace_seq_print_cont(s, iter);
1926 break;
1927 }
1928 case TRACE_BRANCH: {
1929 struct trace_branch *field;
1930
1931 trace_assign_type(field, entry);
1932
1933 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1934 field->correct ? " ok " : " MISS ",
1935 field->func,
1936 field->file,
1937 field->line);
1938 break;
1939 }
1940 case TRACE_USER_STACK: {
1941 struct userstack_entry *field;
1942
1943 trace_assign_type(field, entry);
1944
1945 seq_print_userip_objs(field, s, sym_flags);
1946 trace_seq_putc(s, '\n');
1947 break;
1948 }
1949 default:
1950 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1951 }
1952 return TRACE_TYPE_HANDLED;
1953}
1954
1955static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 1617static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1956{ 1618{
1957 struct trace_seq *s = &iter->seq; 1619 struct trace_seq *s = &iter->seq;
1958 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1620 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1959 struct trace_entry *entry; 1621 struct trace_entry *entry;
1960 unsigned long usec_rem; 1622 struct trace_event *event;
1961 unsigned long long t;
1962 unsigned long secs;
1963 char *comm;
1964 int ret;
1965 int S, T;
1966 int i;
1967 1623
1968 entry = iter->ent; 1624 entry = iter->ent;
1969 1625
1970 if (entry->type == TRACE_CONT)
1971 return TRACE_TYPE_HANDLED;
1972
1973 test_cpu_buff_start(iter); 1626 test_cpu_buff_start(iter);
1974 1627
1975 comm = trace_find_cmdline(iter->ent->pid); 1628 event = ftrace_find_event(entry->type);
1976 1629
1977 t = ns2usecs(iter->ts); 1630 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1978 usec_rem = do_div(t, 1000000ULL); 1631 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1979 secs = (unsigned long)t; 1632 if (!trace_print_lat_context(iter))
1980 1633 goto partial;
1981 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); 1634 } else {
1982 if (!ret) 1635 if (!trace_print_context(iter))
1983 return TRACE_TYPE_PARTIAL_LINE; 1636 goto partial;
1984 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1985 if (!ret)
1986 return TRACE_TYPE_PARTIAL_LINE;
1987 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1988 if (!ret)
1989 return TRACE_TYPE_PARTIAL_LINE;
1990
1991 switch (entry->type) {
1992 case TRACE_FN: {
1993 struct ftrace_entry *field;
1994
1995 trace_assign_type(field, entry);
1996
1997 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1998 if (!ret)
1999 return TRACE_TYPE_PARTIAL_LINE;
2000 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
2001 field->parent_ip) {
2002 ret = trace_seq_printf(s, " <-");
2003 if (!ret)
2004 return TRACE_TYPE_PARTIAL_LINE;
2005 ret = seq_print_ip_sym(s,
2006 field->parent_ip,
2007 sym_flags);
2008 if (!ret)
2009 return TRACE_TYPE_PARTIAL_LINE;
2010 }
2011 ret = trace_seq_printf(s, "\n");
2012 if (!ret)
2013 return TRACE_TYPE_PARTIAL_LINE;
2014 break;
2015 }
2016 case TRACE_CTX:
2017 case TRACE_WAKE: {
2018 struct ctx_switch_entry *field;
2019
2020 trace_assign_type(field, entry);
2021
2022 T = task_state_char(field->next_state);
2023 S = task_state_char(field->prev_state);
2024 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
2025 field->prev_pid,
2026 field->prev_prio,
2027 S,
2028 entry->type == TRACE_CTX ? "==>" : " +",
2029 field->next_cpu,
2030 field->next_pid,
2031 field->next_prio,
2032 T);
2033 if (!ret)
2034 return TRACE_TYPE_PARTIAL_LINE;
2035 break;
2036 }
2037 case TRACE_SPECIAL: {
2038 struct special_entry *field;
2039
2040 trace_assign_type(field, entry);
2041
2042 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
2043 field->arg1,
2044 field->arg2,
2045 field->arg3);
2046 if (!ret)
2047 return TRACE_TYPE_PARTIAL_LINE;
2048 break;
2049 }
2050 case TRACE_STACK: {
2051 struct stack_entry *field;
2052
2053 trace_assign_type(field, entry);
2054
2055 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
2056 if (i) {
2057 ret = trace_seq_puts(s, " <= ");
2058 if (!ret)
2059 return TRACE_TYPE_PARTIAL_LINE;
2060 }
2061 ret = seq_print_ip_sym(s, field->caller[i],
2062 sym_flags);
2063 if (!ret)
2064 return TRACE_TYPE_PARTIAL_LINE;
2065 } 1637 }
2066 ret = trace_seq_puts(s, "\n");
2067 if (!ret)
2068 return TRACE_TYPE_PARTIAL_LINE;
2069 break;
2070 } 1638 }
2071 case TRACE_PRINT: {
2072 struct print_entry *field;
2073 1639
2074 trace_assign_type(field, entry); 1640 if (event)
1641 return event->trace(iter, sym_flags);
2075 1642
2076 seq_print_ip_sym(s, field->ip, sym_flags); 1643 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2077 trace_seq_printf(s, ": %s", field->buf); 1644 goto partial;
2078 if (entry->flags & TRACE_FLAG_CONT)
2079 trace_seq_print_cont(s, iter);
2080 break;
2081 }
2082 case TRACE_GRAPH_RET: {
2083 return print_graph_function(iter);
2084 }
2085 case TRACE_GRAPH_ENT: {
2086 return print_graph_function(iter);
2087 }
2088 case TRACE_BRANCH: {
2089 struct trace_branch *field;
2090 1645
2091 trace_assign_type(field, entry);
2092
2093 trace_seq_printf(s, "[%s] %s:%s:%d\n",
2094 field->correct ? " ok " : " MISS ",
2095 field->func,
2096 field->file,
2097 field->line);
2098 break;
2099 }
2100 case TRACE_USER_STACK: {
2101 struct userstack_entry *field;
2102
2103 trace_assign_type(field, entry);
2104
2105 ret = seq_print_userip_objs(field, s, sym_flags);
2106 if (!ret)
2107 return TRACE_TYPE_PARTIAL_LINE;
2108 ret = trace_seq_putc(s, '\n');
2109 if (!ret)
2110 return TRACE_TYPE_PARTIAL_LINE;
2111 break;
2112 }
2113 }
2114 return TRACE_TYPE_HANDLED; 1646 return TRACE_TYPE_HANDLED;
1647partial:
1648 return TRACE_TYPE_PARTIAL_LINE;
2115} 1649}
2116 1650
2117static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 1651static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2118{ 1652{
2119 struct trace_seq *s = &iter->seq; 1653 struct trace_seq *s = &iter->seq;
2120 struct trace_entry *entry; 1654 struct trace_entry *entry;
2121 int ret; 1655 struct trace_event *event;
2122 int S, T;
2123 1656
2124 entry = iter->ent; 1657 entry = iter->ent;
2125 1658
2126 if (entry->type == TRACE_CONT) 1659 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2127 return TRACE_TYPE_HANDLED; 1660 if (!trace_seq_printf(s, "%d %d %llu ",
2128 1661 entry->pid, iter->cpu, iter->ts))
2129 ret = trace_seq_printf(s, "%d %d %llu ", 1662 goto partial;
2130 entry->pid, iter->cpu, iter->ts);
2131 if (!ret)
2132 return TRACE_TYPE_PARTIAL_LINE;
2133
2134 switch (entry->type) {
2135 case TRACE_FN: {
2136 struct ftrace_entry *field;
2137
2138 trace_assign_type(field, entry);
2139
2140 ret = trace_seq_printf(s, "%x %x\n",
2141 field->ip,
2142 field->parent_ip);
2143 if (!ret)
2144 return TRACE_TYPE_PARTIAL_LINE;
2145 break;
2146 } 1663 }
2147 case TRACE_CTX:
2148 case TRACE_WAKE: {
2149 struct ctx_switch_entry *field;
2150
2151 trace_assign_type(field, entry);
2152
2153 T = task_state_char(field->next_state);
2154 S = entry->type == TRACE_WAKE ? '+' :
2155 task_state_char(field->prev_state);
2156 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
2157 field->prev_pid,
2158 field->prev_prio,
2159 S,
2160 field->next_cpu,
2161 field->next_pid,
2162 field->next_prio,
2163 T);
2164 if (!ret)
2165 return TRACE_TYPE_PARTIAL_LINE;
2166 break;
2167 }
2168 case TRACE_SPECIAL:
2169 case TRACE_USER_STACK:
2170 case TRACE_STACK: {
2171 struct special_entry *field;
2172
2173 trace_assign_type(field, entry);
2174 1664
2175 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1665 event = ftrace_find_event(entry->type);
2176 field->arg1, 1666 if (event)
2177 field->arg2, 1667 return event->raw(iter, 0);
2178 field->arg3);
2179 if (!ret)
2180 return TRACE_TYPE_PARTIAL_LINE;
2181 break;
2182 }
2183 case TRACE_PRINT: {
2184 struct print_entry *field;
2185 1668
2186 trace_assign_type(field, entry); 1669 if (!trace_seq_printf(s, "%d ?\n", entry->type))
1670 goto partial;
2187 1671
2188 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
2189 if (entry->flags & TRACE_FLAG_CONT)
2190 trace_seq_print_cont(s, iter);
2191 break;
2192 }
2193 }
2194 return TRACE_TYPE_HANDLED; 1672 return TRACE_TYPE_HANDLED;
1673partial:
1674 return TRACE_TYPE_PARTIAL_LINE;
2195} 1675}
2196 1676
2197#define SEQ_PUT_FIELD_RET(s, x) \
2198do { \
2199 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
2200 return 0; \
2201} while (0)
2202
2203#define SEQ_PUT_HEX_FIELD_RET(s, x) \
2204do { \
2205 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
2206 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
2207 return 0; \
2208} while (0)
2209
2210static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 1677static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2211{ 1678{
2212 struct trace_seq *s = &iter->seq; 1679 struct trace_seq *s = &iter->seq;
2213 unsigned char newline = '\n'; 1680 unsigned char newline = '\n';
2214 struct trace_entry *entry; 1681 struct trace_entry *entry;
2215 int S, T; 1682 struct trace_event *event;
2216 1683
2217 entry = iter->ent; 1684 entry = iter->ent;
2218 1685
2219 if (entry->type == TRACE_CONT) 1686 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2220 return TRACE_TYPE_HANDLED; 1687 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1688 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1689 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1690 }
2221 1691
2222 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1692 event = ftrace_find_event(entry->type);
2223 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); 1693 if (event) {
2224 SEQ_PUT_HEX_FIELD_RET(s, iter->ts); 1694 enum print_line_t ret = event->hex(iter, 0);
1695 if (ret != TRACE_TYPE_HANDLED)
1696 return ret;
1697 }
2225 1698
2226 switch (entry->type) { 1699 SEQ_PUT_FIELD_RET(s, newline);
2227 case TRACE_FN: {
2228 struct ftrace_entry *field;
2229 1700
2230 trace_assign_type(field, entry); 1701 return TRACE_TYPE_HANDLED;
1702}
2231 1703
2232 SEQ_PUT_HEX_FIELD_RET(s, field->ip); 1704static enum print_line_t print_bprintk_msg_only(struct trace_iterator *iter)
2233 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip); 1705{
2234 break; 1706 struct trace_seq *s = &iter->seq;
2235 } 1707 struct trace_entry *entry = iter->ent;
2236 case TRACE_CTX: 1708 struct bprint_entry *field;
2237 case TRACE_WAKE: { 1709 int ret;
2238 struct ctx_switch_entry *field;
2239
2240 trace_assign_type(field, entry);
2241
2242 T = task_state_char(field->next_state);
2243 S = entry->type == TRACE_WAKE ? '+' :
2244 task_state_char(field->prev_state);
2245 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
2246 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
2247 SEQ_PUT_HEX_FIELD_RET(s, S);
2248 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
2249 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
2250 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
2251 SEQ_PUT_HEX_FIELD_RET(s, T);
2252 break;
2253 }
2254 case TRACE_SPECIAL:
2255 case TRACE_USER_STACK:
2256 case TRACE_STACK: {
2257 struct special_entry *field;
2258 1710
2259 trace_assign_type(field, entry); 1711 trace_assign_type(field, entry);
2260 1712
2261 SEQ_PUT_HEX_FIELD_RET(s, field->arg1); 1713 ret = trace_seq_bprintf(s, field->fmt, field->buf);
2262 SEQ_PUT_HEX_FIELD_RET(s, field->arg2); 1714 if (!ret)
2263 SEQ_PUT_HEX_FIELD_RET(s, field->arg3); 1715 return TRACE_TYPE_PARTIAL_LINE;
2264 break;
2265 }
2266 }
2267 SEQ_PUT_FIELD_RET(s, newline);
2268 1716
2269 return TRACE_TYPE_HANDLED; 1717 return TRACE_TYPE_HANDLED;
2270} 1718}
@@ -2278,13 +1726,10 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
2278 1726
2279 trace_assign_type(field, entry); 1727 trace_assign_type(field, entry);
2280 1728
2281 ret = trace_seq_printf(s, field->buf); 1729 ret = trace_seq_printf(s, "%s", field->buf);
2282 if (!ret) 1730 if (!ret)
2283 return TRACE_TYPE_PARTIAL_LINE; 1731 return TRACE_TYPE_PARTIAL_LINE;
2284 1732
2285 if (entry->flags & TRACE_FLAG_CONT)
2286 trace_seq_print_cont(s, iter);
2287
2288 return TRACE_TYPE_HANDLED; 1733 return TRACE_TYPE_HANDLED;
2289} 1734}
2290 1735
@@ -2292,59 +1737,37 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2292{ 1737{
2293 struct trace_seq *s = &iter->seq; 1738 struct trace_seq *s = &iter->seq;
2294 struct trace_entry *entry; 1739 struct trace_entry *entry;
1740 struct trace_event *event;
2295 1741
2296 entry = iter->ent; 1742 entry = iter->ent;
2297 1743
2298 if (entry->type == TRACE_CONT) 1744 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2299 return TRACE_TYPE_HANDLED; 1745 SEQ_PUT_FIELD_RET(s, entry->pid);
2300 1746 SEQ_PUT_FIELD_RET(s, iter->cpu);
2301 SEQ_PUT_FIELD_RET(s, entry->pid); 1747 SEQ_PUT_FIELD_RET(s, iter->ts);
2302 SEQ_PUT_FIELD_RET(s, entry->cpu);
2303 SEQ_PUT_FIELD_RET(s, iter->ts);
2304
2305 switch (entry->type) {
2306 case TRACE_FN: {
2307 struct ftrace_entry *field;
2308
2309 trace_assign_type(field, entry);
2310
2311 SEQ_PUT_FIELD_RET(s, field->ip);
2312 SEQ_PUT_FIELD_RET(s, field->parent_ip);
2313 break;
2314 }
2315 case TRACE_CTX: {
2316 struct ctx_switch_entry *field;
2317
2318 trace_assign_type(field, entry);
2319
2320 SEQ_PUT_FIELD_RET(s, field->prev_pid);
2321 SEQ_PUT_FIELD_RET(s, field->prev_prio);
2322 SEQ_PUT_FIELD_RET(s, field->prev_state);
2323 SEQ_PUT_FIELD_RET(s, field->next_pid);
2324 SEQ_PUT_FIELD_RET(s, field->next_prio);
2325 SEQ_PUT_FIELD_RET(s, field->next_state);
2326 break;
2327 } 1748 }
2328 case TRACE_SPECIAL:
2329 case TRACE_USER_STACK:
2330 case TRACE_STACK: {
2331 struct special_entry *field;
2332
2333 trace_assign_type(field, entry);
2334 1749
2335 SEQ_PUT_FIELD_RET(s, field->arg1); 1750 event = ftrace_find_event(entry->type);
2336 SEQ_PUT_FIELD_RET(s, field->arg2); 1751 return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
2337 SEQ_PUT_FIELD_RET(s, field->arg3);
2338 break;
2339 }
2340 }
2341 return 1;
2342} 1752}
2343 1753
2344static int trace_empty(struct trace_iterator *iter) 1754static int trace_empty(struct trace_iterator *iter)
2345{ 1755{
2346 int cpu; 1756 int cpu;
2347 1757
1758 /* If we are looking at one CPU buffer, only check that one */
1759 if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
1760 cpu = iter->cpu_file;
1761 if (iter->buffer_iter[cpu]) {
1762 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1763 return 0;
1764 } else {
1765 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1766 return 0;
1767 }
1768 return 1;
1769 }
1770
2348 for_each_tracing_cpu(cpu) { 1771 for_each_tracing_cpu(cpu) {
2349 if (iter->buffer_iter[cpu]) { 1772 if (iter->buffer_iter[cpu]) {
2350 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) 1773 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
@@ -2368,6 +1791,11 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
2368 return ret; 1791 return ret;
2369 } 1792 }
2370 1793
1794 if (iter->ent->type == TRACE_BPRINT &&
1795 trace_flags & TRACE_ITER_PRINTK &&
1796 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1797 return print_bprintk_msg_only(iter);
1798
2371 if (iter->ent->type == TRACE_PRINT && 1799 if (iter->ent->type == TRACE_PRINT &&
2372 trace_flags & TRACE_ITER_PRINTK && 1800 trace_flags & TRACE_ITER_PRINTK &&
2373 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 1801 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
@@ -2382,9 +1810,6 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
2382 if (trace_flags & TRACE_ITER_RAW) 1810 if (trace_flags & TRACE_ITER_RAW)
2383 return print_raw_fmt(iter); 1811 return print_raw_fmt(iter);
2384 1812
2385 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2386 return print_lat_fmt(iter, iter->idx, iter->cpu);
2387
2388 return print_trace_fmt(iter); 1813 return print_trace_fmt(iter);
2389} 1814}
2390 1815
@@ -2426,30 +1851,40 @@ static struct seq_operations tracer_seq_ops = {
2426}; 1851};
2427 1852
2428static struct trace_iterator * 1853static struct trace_iterator *
2429__tracing_open(struct inode *inode, struct file *file, int *ret) 1854__tracing_open(struct inode *inode, struct file *file)
2430{ 1855{
1856 long cpu_file = (long) inode->i_private;
1857 void *fail_ret = ERR_PTR(-ENOMEM);
2431 struct trace_iterator *iter; 1858 struct trace_iterator *iter;
2432 struct seq_file *m; 1859 struct seq_file *m;
2433 int cpu; 1860 int cpu, ret;
2434 1861
2435 if (tracing_disabled) { 1862 if (tracing_disabled)
2436 *ret = -ENODEV; 1863 return ERR_PTR(-ENODEV);
2437 return NULL;
2438 }
2439 1864
2440 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 1865 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2441 if (!iter) { 1866 if (!iter)
2442 *ret = -ENOMEM; 1867 return ERR_PTR(-ENOMEM);
2443 goto out;
2444 }
2445 1868
1869 /*
1870 * We make a copy of the current tracer to avoid concurrent
1871 * changes on it while we are reading.
1872 */
2446 mutex_lock(&trace_types_lock); 1873 mutex_lock(&trace_types_lock);
1874 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
1875 if (!iter->trace)
1876 goto fail;
1877
1878 if (current_trace)
1879 *iter->trace = *current_trace;
1880
2447 if (current_trace && current_trace->print_max) 1881 if (current_trace && current_trace->print_max)
2448 iter->tr = &max_tr; 1882 iter->tr = &max_tr;
2449 else 1883 else
2450 iter->tr = inode->i_private; 1884 iter->tr = &global_trace;
2451 iter->trace = current_trace;
2452 iter->pos = -1; 1885 iter->pos = -1;
1886 mutex_init(&iter->mutex);
1887 iter->cpu_file = cpu_file;
2453 1888
2454 /* Notify the tracer early; before we stop tracing. */ 1889 /* Notify the tracer early; before we stop tracing. */
2455 if (iter->trace && iter->trace->open) 1890 if (iter->trace && iter->trace->open)
@@ -2459,20 +1894,24 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2459 if (ring_buffer_overruns(iter->tr->buffer)) 1894 if (ring_buffer_overruns(iter->tr->buffer))
2460 iter->iter_flags |= TRACE_FILE_ANNOTATE; 1895 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2461 1896
1897 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1898 for_each_tracing_cpu(cpu) {
2462 1899
2463 for_each_tracing_cpu(cpu) { 1900 iter->buffer_iter[cpu] =
2464 1901 ring_buffer_read_start(iter->tr->buffer, cpu);
1902 }
1903 } else {
1904 cpu = iter->cpu_file;
2465 iter->buffer_iter[cpu] = 1905 iter->buffer_iter[cpu] =
2466 ring_buffer_read_start(iter->tr->buffer, cpu); 1906 ring_buffer_read_start(iter->tr->buffer, cpu);
2467
2468 if (!iter->buffer_iter[cpu])
2469 goto fail_buffer;
2470 } 1907 }
2471 1908
2472 /* TODO stop tracer */ 1909 /* TODO stop tracer */
2473 *ret = seq_open(file, &tracer_seq_ops); 1910 ret = seq_open(file, &tracer_seq_ops);
2474 if (*ret) 1911 if (ret < 0) {
1912 fail_ret = ERR_PTR(ret);
2475 goto fail_buffer; 1913 goto fail_buffer;
1914 }
2476 1915
2477 m = file->private_data; 1916 m = file->private_data;
2478 m->private = iter; 1917 m->private = iter;
@@ -2482,7 +1921,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2482 1921
2483 mutex_unlock(&trace_types_lock); 1922 mutex_unlock(&trace_types_lock);
2484 1923
2485 out:
2486 return iter; 1924 return iter;
2487 1925
2488 fail_buffer: 1926 fail_buffer:
@@ -2490,10 +1928,12 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2490 if (iter->buffer_iter[cpu]) 1928 if (iter->buffer_iter[cpu])
2491 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1929 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2492 } 1930 }
1931 fail:
2493 mutex_unlock(&trace_types_lock); 1932 mutex_unlock(&trace_types_lock);
1933 kfree(iter->trace);
2494 kfree(iter); 1934 kfree(iter);
2495 1935
2496 return ERR_PTR(-ENOMEM); 1936 return fail_ret;
2497} 1937}
2498 1938
2499int tracing_open_generic(struct inode *inode, struct file *filp) 1939int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2505,7 +1945,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
2505 return 0; 1945 return 0;
2506} 1946}
2507 1947
2508int tracing_release(struct inode *inode, struct file *file) 1948static int tracing_release(struct inode *inode, struct file *file)
2509{ 1949{
2510 struct seq_file *m = (struct seq_file *)file->private_data; 1950 struct seq_file *m = (struct seq_file *)file->private_data;
2511 struct trace_iterator *iter = m->private; 1951 struct trace_iterator *iter = m->private;
@@ -2525,33 +1965,26 @@ int tracing_release(struct inode *inode, struct file *file)
2525 mutex_unlock(&trace_types_lock); 1965 mutex_unlock(&trace_types_lock);
2526 1966
2527 seq_release(inode, file); 1967 seq_release(inode, file);
1968 mutex_destroy(&iter->mutex);
1969 kfree(iter->trace);
2528 kfree(iter); 1970 kfree(iter);
2529 return 0; 1971 return 0;
2530} 1972}
2531 1973
2532static int tracing_open(struct inode *inode, struct file *file) 1974static int tracing_open(struct inode *inode, struct file *file)
2533{ 1975{
2534 int ret;
2535
2536 __tracing_open(inode, file, &ret);
2537
2538 return ret;
2539}
2540
2541static int tracing_lt_open(struct inode *inode, struct file *file)
2542{
2543 struct trace_iterator *iter; 1976 struct trace_iterator *iter;
2544 int ret; 1977 int ret = 0;
2545
2546 iter = __tracing_open(inode, file, &ret);
2547 1978
2548 if (!ret) 1979 iter = __tracing_open(inode, file);
1980 if (IS_ERR(iter))
1981 ret = PTR_ERR(iter);
1982 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
2549 iter->iter_flags |= TRACE_FILE_LAT_FMT; 1983 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2550 1984
2551 return ret; 1985 return ret;
2552} 1986}
2553 1987
2554
2555static void * 1988static void *
2556t_next(struct seq_file *m, void *v, loff_t *pos) 1989t_next(struct seq_file *m, void *v, loff_t *pos)
2557{ 1990{
@@ -2623,21 +2056,14 @@ static int show_traces_open(struct inode *inode, struct file *file)
2623 return ret; 2056 return ret;
2624} 2057}
2625 2058
2626static struct file_operations tracing_fops = { 2059static const struct file_operations tracing_fops = {
2627 .open = tracing_open, 2060 .open = tracing_open,
2628 .read = seq_read, 2061 .read = seq_read,
2629 .llseek = seq_lseek, 2062 .llseek = seq_lseek,
2630 .release = tracing_release, 2063 .release = tracing_release,
2631}; 2064};
2632 2065
2633static struct file_operations tracing_lt_fops = { 2066static const struct file_operations show_traces_fops = {
2634 .open = tracing_lt_open,
2635 .read = seq_read,
2636 .llseek = seq_lseek,
2637 .release = tracing_release,
2638};
2639
2640static struct file_operations show_traces_fops = {
2641 .open = show_traces_open, 2067 .open = show_traces_open,
2642 .read = seq_read, 2068 .read = seq_read,
2643 .release = seq_release, 2069 .release = seq_release,
@@ -2730,7 +2156,7 @@ err_unlock:
2730 return err; 2156 return err;
2731} 2157}
2732 2158
2733static struct file_operations tracing_cpumask_fops = { 2159static const struct file_operations tracing_cpumask_fops = {
2734 .open = tracing_open_generic, 2160 .open = tracing_open_generic,
2735 .read = tracing_cpumask_read, 2161 .read = tracing_cpumask_read,
2736 .write = tracing_cpumask_write, 2162 .write = tracing_cpumask_write,
@@ -2740,57 +2166,62 @@ static ssize_t
2740tracing_trace_options_read(struct file *filp, char __user *ubuf, 2166tracing_trace_options_read(struct file *filp, char __user *ubuf,
2741 size_t cnt, loff_t *ppos) 2167 size_t cnt, loff_t *ppos)
2742{ 2168{
2743 int i; 2169 struct tracer_opt *trace_opts;
2170 u32 tracer_flags;
2171 int len = 0;
2744 char *buf; 2172 char *buf;
2745 int r = 0; 2173 int r = 0;
2746 int len = 0; 2174 int i;
2747 u32 tracer_flags = current_trace->flags->val;
2748 struct tracer_opt *trace_opts = current_trace->flags->opts;
2749 2175
2750 2176
2751 /* calulate max size */ 2177 /* calculate max size */
2752 for (i = 0; trace_options[i]; i++) { 2178 for (i = 0; trace_options[i]; i++) {
2753 len += strlen(trace_options[i]); 2179 len += strlen(trace_options[i]);
2754 len += 3; /* "no" and space */ 2180 len += 3; /* "no" and newline */
2755 } 2181 }
2756 2182
2183 mutex_lock(&trace_types_lock);
2184 tracer_flags = current_trace->flags->val;
2185 trace_opts = current_trace->flags->opts;
2186
2757 /* 2187 /*
2758 * Increase the size with names of options specific 2188 * Increase the size with names of options specific
2759 * of the current tracer. 2189 * of the current tracer.
2760 */ 2190 */
2761 for (i = 0; trace_opts[i].name; i++) { 2191 for (i = 0; trace_opts[i].name; i++) {
2762 len += strlen(trace_opts[i].name); 2192 len += strlen(trace_opts[i].name);
2763 len += 3; /* "no" and space */ 2193 len += 3; /* "no" and newline */
2764 } 2194 }
2765 2195
2766 /* +2 for \n and \0 */ 2196 /* +2 for \n and \0 */
2767 buf = kmalloc(len + 2, GFP_KERNEL); 2197 buf = kmalloc(len + 2, GFP_KERNEL);
2768 if (!buf) 2198 if (!buf) {
2199 mutex_unlock(&trace_types_lock);
2769 return -ENOMEM; 2200 return -ENOMEM;
2201 }
2770 2202
2771 for (i = 0; trace_options[i]; i++) { 2203 for (i = 0; trace_options[i]; i++) {
2772 if (trace_flags & (1 << i)) 2204 if (trace_flags & (1 << i))
2773 r += sprintf(buf + r, "%s ", trace_options[i]); 2205 r += sprintf(buf + r, "%s\n", trace_options[i]);
2774 else 2206 else
2775 r += sprintf(buf + r, "no%s ", trace_options[i]); 2207 r += sprintf(buf + r, "no%s\n", trace_options[i]);
2776 } 2208 }
2777 2209
2778 for (i = 0; trace_opts[i].name; i++) { 2210 for (i = 0; trace_opts[i].name; i++) {
2779 if (tracer_flags & trace_opts[i].bit) 2211 if (tracer_flags & trace_opts[i].bit)
2780 r += sprintf(buf + r, "%s ", 2212 r += sprintf(buf + r, "%s\n",
2781 trace_opts[i].name); 2213 trace_opts[i].name);
2782 else 2214 else
2783 r += sprintf(buf + r, "no%s ", 2215 r += sprintf(buf + r, "no%s\n",
2784 trace_opts[i].name); 2216 trace_opts[i].name);
2785 } 2217 }
2218 mutex_unlock(&trace_types_lock);
2786 2219
2787 r += sprintf(buf + r, "\n");
2788 WARN_ON(r >= len + 2); 2220 WARN_ON(r >= len + 2);
2789 2221
2790 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2222 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2791 2223
2792 kfree(buf); 2224 kfree(buf);
2793
2794 return r; 2225 return r;
2795} 2226}
2796 2227
@@ -2828,6 +2259,34 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2828 return 0; 2259 return 0;
2829} 2260}
2830 2261
2262static void set_tracer_flags(unsigned int mask, int enabled)
2263{
2264 /* do nothing if flag is already set */
2265 if (!!(trace_flags & mask) == !!enabled)
2266 return;
2267
2268 if (enabled)
2269 trace_flags |= mask;
2270 else
2271 trace_flags &= ~mask;
2272
2273 if (mask == TRACE_ITER_GLOBAL_CLK) {
2274 u64 (*func)(void);
2275
2276 if (enabled)
2277 func = trace_clock_global;
2278 else
2279 func = trace_clock_local;
2280
2281 mutex_lock(&trace_types_lock);
2282 ring_buffer_set_clock(global_trace.buffer, func);
2283
2284 if (max_tr.buffer)
2285 ring_buffer_set_clock(max_tr.buffer, func);
2286 mutex_unlock(&trace_types_lock);
2287 }
2288}
2289
2831static ssize_t 2290static ssize_t
2832tracing_trace_options_write(struct file *filp, const char __user *ubuf, 2291tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2833 size_t cnt, loff_t *ppos) 2292 size_t cnt, loff_t *ppos)
@@ -2855,17 +2314,16 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2855 int len = strlen(trace_options[i]); 2314 int len = strlen(trace_options[i]);
2856 2315
2857 if (strncmp(cmp, trace_options[i], len) == 0) { 2316 if (strncmp(cmp, trace_options[i], len) == 0) {
2858 if (neg) 2317 set_tracer_flags(1 << i, !neg);
2859 trace_flags &= ~(1 << i);
2860 else
2861 trace_flags |= (1 << i);
2862 break; 2318 break;
2863 } 2319 }
2864 } 2320 }
2865 2321
2866 /* If no option could be set, test the specific tracer options */ 2322 /* If no option could be set, test the specific tracer options */
2867 if (!trace_options[i]) { 2323 if (!trace_options[i]) {
2324 mutex_lock(&trace_types_lock);
2868 ret = set_tracer_option(current_trace, cmp, neg); 2325 ret = set_tracer_option(current_trace, cmp, neg);
2326 mutex_unlock(&trace_types_lock);
2869 if (ret) 2327 if (ret)
2870 return ret; 2328 return ret;
2871 } 2329 }
@@ -2875,7 +2333,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2875 return cnt; 2333 return cnt;
2876} 2334}
2877 2335
2878static struct file_operations tracing_iter_fops = { 2336static const struct file_operations tracing_iter_fops = {
2879 .open = tracing_open_generic, 2337 .open = tracing_open_generic,
2880 .read = tracing_trace_options_read, 2338 .read = tracing_trace_options_read,
2881 .write = tracing_trace_options_write, 2339 .write = tracing_trace_options_write,
@@ -2908,7 +2366,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
2908 readme_msg, strlen(readme_msg)); 2366 readme_msg, strlen(readme_msg));
2909} 2367}
2910 2368
2911static struct file_operations tracing_readme_fops = { 2369static const struct file_operations tracing_readme_fops = {
2912 .open = tracing_open_generic, 2370 .open = tracing_open_generic,
2913 .read = tracing_readme_read, 2371 .read = tracing_readme_read,
2914}; 2372};
@@ -2930,7 +2388,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2930{ 2388{
2931 struct trace_array *tr = filp->private_data; 2389 struct trace_array *tr = filp->private_data;
2932 char buf[64]; 2390 char buf[64];
2933 long val; 2391 unsigned long val;
2934 int ret; 2392 int ret;
2935 2393
2936 if (cnt >= sizeof(buf)) 2394 if (cnt >= sizeof(buf))
@@ -2985,13 +2443,105 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2985 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2443 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2986} 2444}
2987 2445
2988static int tracing_set_tracer(char *buf) 2446int tracer_init(struct tracer *t, struct trace_array *tr)
2447{
2448 tracing_reset_online_cpus(tr);
2449 return t->init(tr);
2450}
2451
2452static int tracing_resize_ring_buffer(unsigned long size)
2453{
2454 int ret;
2455
2456 /*
2457 * If kernel or user changes the size of the ring buffer
2458 * we use the size that was given, and we can forget about
2459 * expanding it later.
2460 */
2461 ring_buffer_expanded = 1;
2462
2463 ret = ring_buffer_resize(global_trace.buffer, size);
2464 if (ret < 0)
2465 return ret;
2466
2467 ret = ring_buffer_resize(max_tr.buffer, size);
2468 if (ret < 0) {
2469 int r;
2470
2471 r = ring_buffer_resize(global_trace.buffer,
2472 global_trace.entries);
2473 if (r < 0) {
2474 /*
2475 * AARGH! We are left with different
2476 * size max buffer!!!!
2477 * The max buffer is our "snapshot" buffer.
2478 * When a tracer needs a snapshot (one of the
2479 * latency tracers), it swaps the max buffer
2480 * with the saved snap shot. We succeeded to
2481 * update the size of the main buffer, but failed to
2482 * update the size of the max buffer. But when we tried
2483 * to reset the main buffer to the original size, we
2484 * failed there too. This is very unlikely to
2485 * happen, but if it does, warn and kill all
2486 * tracing.
2487 */
2488 WARN_ON(1);
2489 tracing_disabled = 1;
2490 }
2491 return ret;
2492 }
2493
2494 global_trace.entries = size;
2495
2496 return ret;
2497}
2498
2499/**
2500 * tracing_update_buffers - used by tracing facility to expand ring buffers
2501 *
2502 * To save on memory when the tracing is never used on a system with it
2503 * configured in. The ring buffers are set to a minimum size. But once
2504 * a user starts to use the tracing facility, then they need to grow
2505 * to their default size.
2506 *
2507 * This function is to be called when a tracer is about to be used.
2508 */
2509int tracing_update_buffers(void)
2510{
2511 int ret = 0;
2512
2513 mutex_lock(&trace_types_lock);
2514 if (!ring_buffer_expanded)
2515 ret = tracing_resize_ring_buffer(trace_buf_size);
2516 mutex_unlock(&trace_types_lock);
2517
2518 return ret;
2519}
2520
2521struct trace_option_dentry;
2522
2523static struct trace_option_dentry *
2524create_trace_option_files(struct tracer *tracer);
2525
2526static void
2527destroy_trace_option_files(struct trace_option_dentry *topts);
2528
2529static int tracing_set_tracer(const char *buf)
2989{ 2530{
2531 static struct trace_option_dentry *topts;
2990 struct trace_array *tr = &global_trace; 2532 struct trace_array *tr = &global_trace;
2991 struct tracer *t; 2533 struct tracer *t;
2992 int ret = 0; 2534 int ret = 0;
2993 2535
2994 mutex_lock(&trace_types_lock); 2536 mutex_lock(&trace_types_lock);
2537
2538 if (!ring_buffer_expanded) {
2539 ret = tracing_resize_ring_buffer(trace_buf_size);
2540 if (ret < 0)
2541 goto out;
2542 ret = 0;
2543 }
2544
2995 for (t = trace_types; t; t = t->next) { 2545 for (t = trace_types; t; t = t->next) {
2996 if (strcmp(t->name, buf) == 0) 2546 if (strcmp(t->name, buf) == 0)
2997 break; 2547 break;
@@ -3007,9 +2557,14 @@ static int tracing_set_tracer(char *buf)
3007 if (current_trace && current_trace->reset) 2557 if (current_trace && current_trace->reset)
3008 current_trace->reset(tr); 2558 current_trace->reset(tr);
3009 2559
2560 destroy_trace_option_files(topts);
2561
3010 current_trace = t; 2562 current_trace = t;
2563
2564 topts = create_trace_option_files(current_trace);
2565
3011 if (t->init) { 2566 if (t->init) {
3012 ret = t->init(tr); 2567 ret = tracer_init(t, tr);
3013 if (ret) 2568 if (ret)
3014 goto out; 2569 goto out;
3015 } 2570 }
@@ -3072,9 +2627,9 @@ static ssize_t
3072tracing_max_lat_write(struct file *filp, const char __user *ubuf, 2627tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3073 size_t cnt, loff_t *ppos) 2628 size_t cnt, loff_t *ppos)
3074{ 2629{
3075 long *ptr = filp->private_data; 2630 unsigned long *ptr = filp->private_data;
3076 char buf[64]; 2631 char buf[64];
3077 long val; 2632 unsigned long val;
3078 int ret; 2633 int ret;
3079 2634
3080 if (cnt >= sizeof(buf)) 2635 if (cnt >= sizeof(buf))
@@ -3094,54 +2649,96 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3094 return cnt; 2649 return cnt;
3095} 2650}
3096 2651
3097static atomic_t tracing_reader;
3098
3099static int tracing_open_pipe(struct inode *inode, struct file *filp) 2652static int tracing_open_pipe(struct inode *inode, struct file *filp)
3100{ 2653{
2654 long cpu_file = (long) inode->i_private;
3101 struct trace_iterator *iter; 2655 struct trace_iterator *iter;
2656 int ret = 0;
3102 2657
3103 if (tracing_disabled) 2658 if (tracing_disabled)
3104 return -ENODEV; 2659 return -ENODEV;
3105 2660
3106 /* We only allow for reader of the pipe */ 2661 mutex_lock(&trace_types_lock);
3107 if (atomic_inc_return(&tracing_reader) != 1) { 2662
3108 atomic_dec(&tracing_reader); 2663 /* We only allow one reader per cpu */
3109 return -EBUSY; 2664 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2665 if (!cpumask_empty(tracing_reader_cpumask)) {
2666 ret = -EBUSY;
2667 goto out;
2668 }
2669 cpumask_setall(tracing_reader_cpumask);
2670 } else {
2671 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2672 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2673 else {
2674 ret = -EBUSY;
2675 goto out;
2676 }
3110 } 2677 }
3111 2678
3112 /* create a buffer to store the information to pass to userspace */ 2679 /* create a buffer to store the information to pass to userspace */
3113 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2680 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3114 if (!iter) 2681 if (!iter) {
3115 return -ENOMEM; 2682 ret = -ENOMEM;
2683 goto out;
2684 }
3116 2685
3117 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 2686 /*
3118 kfree(iter); 2687 * We make a copy of the current tracer to avoid concurrent
3119 return -ENOMEM; 2688 * changes on it while we are reading.
2689 */
2690 iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
2691 if (!iter->trace) {
2692 ret = -ENOMEM;
2693 goto fail;
3120 } 2694 }
2695 if (current_trace)
2696 *iter->trace = *current_trace;
3121 2697
3122 mutex_lock(&trace_types_lock); 2698 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
2699 ret = -ENOMEM;
2700 goto fail;
2701 }
3123 2702
3124 /* trace pipe does not show start of buffer */ 2703 /* trace pipe does not show start of buffer */
3125 cpumask_setall(iter->started); 2704 cpumask_setall(iter->started);
3126 2705
2706 iter->cpu_file = cpu_file;
3127 iter->tr = &global_trace; 2707 iter->tr = &global_trace;
3128 iter->trace = current_trace; 2708 mutex_init(&iter->mutex);
3129 filp->private_data = iter; 2709 filp->private_data = iter;
3130 2710
3131 if (iter->trace->pipe_open) 2711 if (iter->trace->pipe_open)
3132 iter->trace->pipe_open(iter); 2712 iter->trace->pipe_open(iter);
2713
2714out:
3133 mutex_unlock(&trace_types_lock); 2715 mutex_unlock(&trace_types_lock);
2716 return ret;
3134 2717
3135 return 0; 2718fail:
2719 kfree(iter->trace);
2720 kfree(iter);
2721 mutex_unlock(&trace_types_lock);
2722 return ret;
3136} 2723}
3137 2724
3138static int tracing_release_pipe(struct inode *inode, struct file *file) 2725static int tracing_release_pipe(struct inode *inode, struct file *file)
3139{ 2726{
3140 struct trace_iterator *iter = file->private_data; 2727 struct trace_iterator *iter = file->private_data;
3141 2728
2729 mutex_lock(&trace_types_lock);
2730
2731 if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2732 cpumask_clear(tracing_reader_cpumask);
2733 else
2734 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2735
2736 mutex_unlock(&trace_types_lock);
2737
3142 free_cpumask_var(iter->started); 2738 free_cpumask_var(iter->started);
2739 mutex_destroy(&iter->mutex);
2740 kfree(iter->trace);
3143 kfree(iter); 2741 kfree(iter);
3144 atomic_dec(&tracing_reader);
3145 2742
3146 return 0; 2743 return 0;
3147} 2744}
@@ -3167,67 +2764,57 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
3167 } 2764 }
3168} 2765}
3169 2766
3170/* 2767
3171 * Consumer reader. 2768void default_wait_pipe(struct trace_iterator *iter)
3172 */
3173static ssize_t
3174tracing_read_pipe(struct file *filp, char __user *ubuf,
3175 size_t cnt, loff_t *ppos)
3176{ 2769{
3177 struct trace_iterator *iter = filp->private_data; 2770 DEFINE_WAIT(wait);
3178 ssize_t sret;
3179 2771
3180 /* return any leftover data */ 2772 prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
3181 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3182 if (sret != -EBUSY)
3183 return sret;
3184 2773
3185 trace_seq_reset(&iter->seq); 2774 if (trace_empty(iter))
2775 schedule();
3186 2776
3187 mutex_lock(&trace_types_lock); 2777 finish_wait(&trace_wait, &wait);
3188 if (iter->trace->read) { 2778}
3189 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 2779
3190 if (sret) 2780/*
3191 goto out; 2781 * This is a make-shift waitqueue.
3192 } 2782 * A tracer might use this callback on some rare cases:
2783 *
2784 * 1) the current tracer might hold the runqueue lock when it wakes up
2785 * a reader, hence a deadlock (sched, function, and function graph tracers)
2786 * 2) the function tracers, trace all functions, we don't want
2787 * the overhead of calling wake_up and friends
2788 * (and tracing them too)
2789 *
2790 * Anyway, this is really very primitive wakeup.
2791 */
2792void poll_wait_pipe(struct trace_iterator *iter)
2793{
2794 set_current_state(TASK_INTERRUPTIBLE);
2795 /* sleep for 100 msecs, and try again. */
2796 schedule_timeout(HZ / 10);
2797}
2798
2799/* Must be called with trace_types_lock mutex held. */
2800static int tracing_wait_pipe(struct file *filp)
2801{
2802 struct trace_iterator *iter = filp->private_data;
3193 2803
3194waitagain:
3195 sret = 0;
3196 while (trace_empty(iter)) { 2804 while (trace_empty(iter)) {
3197 2805
3198 if ((filp->f_flags & O_NONBLOCK)) { 2806 if ((filp->f_flags & O_NONBLOCK)) {
3199 sret = -EAGAIN; 2807 return -EAGAIN;
3200 goto out;
3201 } 2808 }
3202 2809
3203 /* 2810 mutex_unlock(&iter->mutex);
3204 * This is a make-shift waitqueue. The reason we don't use
3205 * an actual wait queue is because:
3206 * 1) we only ever have one waiter
3207 * 2) the tracing, traces all functions, we don't want
3208 * the overhead of calling wake_up and friends
3209 * (and tracing them too)
3210 * Anyway, this is really very primitive wakeup.
3211 */
3212 set_current_state(TASK_INTERRUPTIBLE);
3213 iter->tr->waiter = current;
3214 2811
3215 mutex_unlock(&trace_types_lock); 2812 iter->trace->wait_pipe(iter);
3216 2813
3217 /* sleep for 100 msecs, and try again. */ 2814 mutex_lock(&iter->mutex);
3218 schedule_timeout(HZ/10);
3219 2815
3220 mutex_lock(&trace_types_lock); 2816 if (signal_pending(current))
3221 2817 return -EINTR;
3222 iter->tr->waiter = NULL;
3223
3224 if (signal_pending(current)) {
3225 sret = -EINTR;
3226 goto out;
3227 }
3228
3229 if (iter->trace != current_trace)
3230 goto out;
3231 2818
3232 /* 2819 /*
3233 * We block until we read something and tracing is disabled. 2820 * We block until we read something and tracing is disabled.
@@ -3240,13 +2827,59 @@ waitagain:
3240 */ 2827 */
3241 if (!tracer_enabled && iter->pos) 2828 if (!tracer_enabled && iter->pos)
3242 break; 2829 break;
2830 }
2831
2832 return 1;
2833}
2834
2835/*
2836 * Consumer reader.
2837 */
2838static ssize_t
2839tracing_read_pipe(struct file *filp, char __user *ubuf,
2840 size_t cnt, loff_t *ppos)
2841{
2842 struct trace_iterator *iter = filp->private_data;
2843 static struct tracer *old_tracer;
2844 ssize_t sret;
2845
2846 /* return any leftover data */
2847 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2848 if (sret != -EBUSY)
2849 return sret;
2850
2851 trace_seq_init(&iter->seq);
2852
2853 /* copy the tracer to avoid using a global lock all around */
2854 mutex_lock(&trace_types_lock);
2855 if (unlikely(old_tracer != current_trace && current_trace)) {
2856 old_tracer = current_trace;
2857 *iter->trace = *current_trace;
2858 }
2859 mutex_unlock(&trace_types_lock);
3243 2860
3244 continue; 2861 /*
2862 * Avoid more than one consumer on a single file descriptor
2863 * This is just a matter of traces coherency, the ring buffer itself
2864 * is protected.
2865 */
2866 mutex_lock(&iter->mutex);
2867 if (iter->trace->read) {
2868 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2869 if (sret)
2870 goto out;
3245 } 2871 }
3246 2872
2873waitagain:
2874 sret = tracing_wait_pipe(filp);
2875 if (sret <= 0)
2876 goto out;
2877
3247 /* stop when tracing is finished */ 2878 /* stop when tracing is finished */
3248 if (trace_empty(iter)) 2879 if (trace_empty(iter)) {
2880 sret = 0;
3249 goto out; 2881 goto out;
2882 }
3250 2883
3251 if (cnt >= PAGE_SIZE) 2884 if (cnt >= PAGE_SIZE)
3252 cnt = PAGE_SIZE - 1; 2885 cnt = PAGE_SIZE - 1;
@@ -3267,8 +2900,8 @@ waitagain:
3267 iter->seq.len = len; 2900 iter->seq.len = len;
3268 break; 2901 break;
3269 } 2902 }
3270 2903 if (ret != TRACE_TYPE_NO_CONSUME)
3271 trace_consume(iter); 2904 trace_consume(iter);
3272 2905
3273 if (iter->seq.len >= cnt) 2906 if (iter->seq.len >= cnt)
3274 break; 2907 break;
@@ -3277,7 +2910,7 @@ waitagain:
3277 /* Now copy what we have to the user */ 2910 /* Now copy what we have to the user */
3278 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2911 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3279 if (iter->seq.readpos >= iter->seq.len) 2912 if (iter->seq.readpos >= iter->seq.len)
3280 trace_seq_reset(&iter->seq); 2913 trace_seq_init(&iter->seq);
3281 2914
3282 /* 2915 /*
3283 * If there was nothing to send to user, inspite of consuming trace 2916 * If there was nothing to send to user, inspite of consuming trace
@@ -3287,20 +2920,165 @@ waitagain:
3287 goto waitagain; 2920 goto waitagain;
3288 2921
3289out: 2922out:
3290 mutex_unlock(&trace_types_lock); 2923 mutex_unlock(&iter->mutex);
3291 2924
3292 return sret; 2925 return sret;
3293} 2926}
3294 2927
2928static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
2929 struct pipe_buffer *buf)
2930{
2931 __free_page(buf->page);
2932}
2933
2934static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
2935 unsigned int idx)
2936{
2937 __free_page(spd->pages[idx]);
2938}
2939
2940static struct pipe_buf_operations tracing_pipe_buf_ops = {
2941 .can_merge = 0,
2942 .map = generic_pipe_buf_map,
2943 .unmap = generic_pipe_buf_unmap,
2944 .confirm = generic_pipe_buf_confirm,
2945 .release = tracing_pipe_buf_release,
2946 .steal = generic_pipe_buf_steal,
2947 .get = generic_pipe_buf_get,
2948};
2949
2950static size_t
2951tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
2952{
2953 size_t count;
2954 int ret;
2955
2956 /* Seq buffer is page-sized, exactly what we need. */
2957 for (;;) {
2958 count = iter->seq.len;
2959 ret = print_trace_line(iter);
2960 count = iter->seq.len - count;
2961 if (rem < count) {
2962 rem = 0;
2963 iter->seq.len -= count;
2964 break;
2965 }
2966 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2967 iter->seq.len -= count;
2968 break;
2969 }
2970
2971 trace_consume(iter);
2972 rem -= count;
2973 if (!find_next_entry_inc(iter)) {
2974 rem = 0;
2975 iter->ent = NULL;
2976 break;
2977 }
2978 }
2979
2980 return rem;
2981}
2982
2983static ssize_t tracing_splice_read_pipe(struct file *filp,
2984 loff_t *ppos,
2985 struct pipe_inode_info *pipe,
2986 size_t len,
2987 unsigned int flags)
2988{
2989 struct page *pages[PIPE_BUFFERS];
2990 struct partial_page partial[PIPE_BUFFERS];
2991 struct trace_iterator *iter = filp->private_data;
2992 struct splice_pipe_desc spd = {
2993 .pages = pages,
2994 .partial = partial,
2995 .nr_pages = 0, /* This gets updated below. */
2996 .flags = flags,
2997 .ops = &tracing_pipe_buf_ops,
2998 .spd_release = tracing_spd_release_pipe,
2999 };
3000 static struct tracer *old_tracer;
3001 ssize_t ret;
3002 size_t rem;
3003 unsigned int i;
3004
3005 /* copy the tracer to avoid using a global lock all around */
3006 mutex_lock(&trace_types_lock);
3007 if (unlikely(old_tracer != current_trace && current_trace)) {
3008 old_tracer = current_trace;
3009 *iter->trace = *current_trace;
3010 }
3011 mutex_unlock(&trace_types_lock);
3012
3013 mutex_lock(&iter->mutex);
3014
3015 if (iter->trace->splice_read) {
3016 ret = iter->trace->splice_read(iter, filp,
3017 ppos, pipe, len, flags);
3018 if (ret)
3019 goto out_err;
3020 }
3021
3022 ret = tracing_wait_pipe(filp);
3023 if (ret <= 0)
3024 goto out_err;
3025
3026 if (!iter->ent && !find_next_entry_inc(iter)) {
3027 ret = -EFAULT;
3028 goto out_err;
3029 }
3030
3031 /* Fill as many pages as possible. */
3032 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
3033 pages[i] = alloc_page(GFP_KERNEL);
3034 if (!pages[i])
3035 break;
3036
3037 rem = tracing_fill_pipe_page(rem, iter);
3038
3039 /* Copy the data into the page, so we can start over. */
3040 ret = trace_seq_to_buffer(&iter->seq,
3041 page_address(pages[i]),
3042 iter->seq.len);
3043 if (ret < 0) {
3044 __free_page(pages[i]);
3045 break;
3046 }
3047 partial[i].offset = 0;
3048 partial[i].len = iter->seq.len;
3049
3050 trace_seq_init(&iter->seq);
3051 }
3052
3053 mutex_unlock(&iter->mutex);
3054
3055 spd.nr_pages = i;
3056
3057 return splice_to_pipe(pipe, &spd);
3058
3059out_err:
3060 mutex_unlock(&iter->mutex);
3061
3062 return ret;
3063}
3064
3295static ssize_t 3065static ssize_t
3296tracing_entries_read(struct file *filp, char __user *ubuf, 3066tracing_entries_read(struct file *filp, char __user *ubuf,
3297 size_t cnt, loff_t *ppos) 3067 size_t cnt, loff_t *ppos)
3298{ 3068{
3299 struct trace_array *tr = filp->private_data; 3069 struct trace_array *tr = filp->private_data;
3300 char buf[64]; 3070 char buf[96];
3301 int r; 3071 int r;
3302 3072
3303 r = sprintf(buf, "%lu\n", tr->entries >> 10); 3073 mutex_lock(&trace_types_lock);
3074 if (!ring_buffer_expanded)
3075 r = sprintf(buf, "%lu (expanded: %lu)\n",
3076 tr->entries >> 10,
3077 trace_buf_size >> 10);
3078 else
3079 r = sprintf(buf, "%lu\n", tr->entries >> 10);
3080 mutex_unlock(&trace_types_lock);
3081
3304 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3082 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3305} 3083}
3306 3084
@@ -3344,28 +3122,11 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3344 val <<= 10; 3122 val <<= 10;
3345 3123
3346 if (val != global_trace.entries) { 3124 if (val != global_trace.entries) {
3347 ret = ring_buffer_resize(global_trace.buffer, val); 3125 ret = tracing_resize_ring_buffer(val);
3348 if (ret < 0) {
3349 cnt = ret;
3350 goto out;
3351 }
3352
3353 ret = ring_buffer_resize(max_tr.buffer, val);
3354 if (ret < 0) { 3126 if (ret < 0) {
3355 int r;
3356 cnt = ret; 3127 cnt = ret;
3357 r = ring_buffer_resize(global_trace.buffer,
3358 global_trace.entries);
3359 if (r < 0) {
3360 /* AARGH! We are left with different
3361 * size max buffer!!!! */
3362 WARN_ON(1);
3363 tracing_disabled = 1;
3364 }
3365 goto out; 3128 goto out;
3366 } 3129 }
3367
3368 global_trace.entries = val;
3369 } 3130 }
3370 3131
3371 filp->f_pos += cnt; 3132 filp->f_pos += cnt;
@@ -3433,42 +3194,288 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3433 return cnt; 3194 return cnt;
3434} 3195}
3435 3196
3436static struct file_operations tracing_max_lat_fops = { 3197static const struct file_operations tracing_max_lat_fops = {
3437 .open = tracing_open_generic, 3198 .open = tracing_open_generic,
3438 .read = tracing_max_lat_read, 3199 .read = tracing_max_lat_read,
3439 .write = tracing_max_lat_write, 3200 .write = tracing_max_lat_write,
3440}; 3201};
3441 3202
3442static struct file_operations tracing_ctrl_fops = { 3203static const struct file_operations tracing_ctrl_fops = {
3443 .open = tracing_open_generic, 3204 .open = tracing_open_generic,
3444 .read = tracing_ctrl_read, 3205 .read = tracing_ctrl_read,
3445 .write = tracing_ctrl_write, 3206 .write = tracing_ctrl_write,
3446}; 3207};
3447 3208
3448static struct file_operations set_tracer_fops = { 3209static const struct file_operations set_tracer_fops = {
3449 .open = tracing_open_generic, 3210 .open = tracing_open_generic,
3450 .read = tracing_set_trace_read, 3211 .read = tracing_set_trace_read,
3451 .write = tracing_set_trace_write, 3212 .write = tracing_set_trace_write,
3452}; 3213};
3453 3214
3454static struct file_operations tracing_pipe_fops = { 3215static const struct file_operations tracing_pipe_fops = {
3455 .open = tracing_open_pipe, 3216 .open = tracing_open_pipe,
3456 .poll = tracing_poll_pipe, 3217 .poll = tracing_poll_pipe,
3457 .read = tracing_read_pipe, 3218 .read = tracing_read_pipe,
3219 .splice_read = tracing_splice_read_pipe,
3458 .release = tracing_release_pipe, 3220 .release = tracing_release_pipe,
3459}; 3221};
3460 3222
3461static struct file_operations tracing_entries_fops = { 3223static const struct file_operations tracing_entries_fops = {
3462 .open = tracing_open_generic, 3224 .open = tracing_open_generic,
3463 .read = tracing_entries_read, 3225 .read = tracing_entries_read,
3464 .write = tracing_entries_write, 3226 .write = tracing_entries_write,
3465}; 3227};
3466 3228
3467static struct file_operations tracing_mark_fops = { 3229static const struct file_operations tracing_mark_fops = {
3468 .open = tracing_open_generic, 3230 .open = tracing_open_generic,
3469 .write = tracing_mark_write, 3231 .write = tracing_mark_write,
3470}; 3232};
3471 3233
3234struct ftrace_buffer_info {
3235 struct trace_array *tr;
3236 void *spare;
3237 int cpu;
3238 unsigned int read;
3239};
3240
3241static int tracing_buffers_open(struct inode *inode, struct file *filp)
3242{
3243 int cpu = (int)(long)inode->i_private;
3244 struct ftrace_buffer_info *info;
3245
3246 if (tracing_disabled)
3247 return -ENODEV;
3248
3249 info = kzalloc(sizeof(*info), GFP_KERNEL);
3250 if (!info)
3251 return -ENOMEM;
3252
3253 info->tr = &global_trace;
3254 info->cpu = cpu;
3255 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3256 /* Force reading ring buffer for first read */
3257 info->read = (unsigned int)-1;
3258 if (!info->spare)
3259 goto out;
3260
3261 filp->private_data = info;
3262
3263 return 0;
3264
3265 out:
3266 kfree(info);
3267 return -ENOMEM;
3268}
3269
3270static ssize_t
3271tracing_buffers_read(struct file *filp, char __user *ubuf,
3272 size_t count, loff_t *ppos)
3273{
3274 struct ftrace_buffer_info *info = filp->private_data;
3275 unsigned int pos;
3276 ssize_t ret;
3277 size_t size;
3278
3279 if (!count)
3280 return 0;
3281
3282 /* Do we have previous read data to read? */
3283 if (info->read < PAGE_SIZE)
3284 goto read;
3285
3286 info->read = 0;
3287
3288 ret = ring_buffer_read_page(info->tr->buffer,
3289 &info->spare,
3290 count,
3291 info->cpu, 0);
3292 if (ret < 0)
3293 return 0;
3294
3295 pos = ring_buffer_page_len(info->spare);
3296
3297 if (pos < PAGE_SIZE)
3298 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3299
3300read:
3301 size = PAGE_SIZE - info->read;
3302 if (size > count)
3303 size = count;
3304
3305 ret = copy_to_user(ubuf, info->spare + info->read, size);
3306 if (ret == size)
3307 return -EFAULT;
3308 size -= ret;
3309
3310 *ppos += size;
3311 info->read += size;
3312
3313 return size;
3314}
3315
3316static int tracing_buffers_release(struct inode *inode, struct file *file)
3317{
3318 struct ftrace_buffer_info *info = file->private_data;
3319
3320 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3321 kfree(info);
3322
3323 return 0;
3324}
3325
3326struct buffer_ref {
3327 struct ring_buffer *buffer;
3328 void *page;
3329 int ref;
3330};
3331
3332static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
3333 struct pipe_buffer *buf)
3334{
3335 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3336
3337 if (--ref->ref)
3338 return;
3339
3340 ring_buffer_free_read_page(ref->buffer, ref->page);
3341 kfree(ref);
3342 buf->private = 0;
3343}
3344
3345static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3346 struct pipe_buffer *buf)
3347{
3348 return 1;
3349}
3350
3351static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3352 struct pipe_buffer *buf)
3353{
3354 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3355
3356 ref->ref++;
3357}
3358
3359/* Pipe buffer operations for a buffer. */
3360static struct pipe_buf_operations buffer_pipe_buf_ops = {
3361 .can_merge = 0,
3362 .map = generic_pipe_buf_map,
3363 .unmap = generic_pipe_buf_unmap,
3364 .confirm = generic_pipe_buf_confirm,
3365 .release = buffer_pipe_buf_release,
3366 .steal = buffer_pipe_buf_steal,
3367 .get = buffer_pipe_buf_get,
3368};
3369
3370/*
3371 * Callback from splice_to_pipe(), if we need to release some pages
3372 * at the end of the spd in case we error'ed out in filling the pipe.
3373 */
3374static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
3375{
3376 struct buffer_ref *ref =
3377 (struct buffer_ref *)spd->partial[i].private;
3378
3379 if (--ref->ref)
3380 return;
3381
3382 ring_buffer_free_read_page(ref->buffer, ref->page);
3383 kfree(ref);
3384 spd->partial[i].private = 0;
3385}
3386
3387static ssize_t
3388tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3389 struct pipe_inode_info *pipe, size_t len,
3390 unsigned int flags)
3391{
3392 struct ftrace_buffer_info *info = file->private_data;
3393 struct partial_page partial[PIPE_BUFFERS];
3394 struct page *pages[PIPE_BUFFERS];
3395 struct splice_pipe_desc spd = {
3396 .pages = pages,
3397 .partial = partial,
3398 .flags = flags,
3399 .ops = &buffer_pipe_buf_ops,
3400 .spd_release = buffer_spd_release,
3401 };
3402 struct buffer_ref *ref;
3403 int size, i;
3404 size_t ret;
3405
3406 /*
3407 * We can't seek on a buffer input
3408 */
3409 if (unlikely(*ppos))
3410 return -ESPIPE;
3411
3412
3413 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
3414 struct page *page;
3415 int r;
3416
3417 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
3418 if (!ref)
3419 break;
3420
3421 ref->buffer = info->tr->buffer;
3422 ref->page = ring_buffer_alloc_read_page(ref->buffer);
3423 if (!ref->page) {
3424 kfree(ref);
3425 break;
3426 }
3427
3428 r = ring_buffer_read_page(ref->buffer, &ref->page,
3429 len, info->cpu, 0);
3430 if (r < 0) {
3431 ring_buffer_free_read_page(ref->buffer,
3432 ref->page);
3433 kfree(ref);
3434 break;
3435 }
3436
3437 /*
3438 * zero out any left over data, this is going to
3439 * user land.
3440 */
3441 size = ring_buffer_page_len(ref->page);
3442 if (size < PAGE_SIZE)
3443 memset(ref->page + size, 0, PAGE_SIZE - size);
3444
3445 page = virt_to_page(ref->page);
3446
3447 spd.pages[i] = page;
3448 spd.partial[i].len = PAGE_SIZE;
3449 spd.partial[i].offset = 0;
3450 spd.partial[i].private = (unsigned long)ref;
3451 spd.nr_pages++;
3452 }
3453
3454 spd.nr_pages = i;
3455
3456 /* did we read anything? */
3457 if (!spd.nr_pages) {
3458 if (flags & SPLICE_F_NONBLOCK)
3459 ret = -EAGAIN;
3460 else
3461 ret = 0;
3462 /* TODO: block */
3463 return ret;
3464 }
3465
3466 ret = splice_to_pipe(pipe, &spd);
3467
3468 return ret;
3469}
3470
3471static const struct file_operations tracing_buffers_fops = {
3472 .open = tracing_buffers_open,
3473 .read = tracing_buffers_read,
3474 .release = tracing_buffers_release,
3475 .splice_read = tracing_buffers_splice_read,
3476 .llseek = no_llseek,
3477};
3478
3472#ifdef CONFIG_DYNAMIC_FTRACE 3479#ifdef CONFIG_DYNAMIC_FTRACE
3473 3480
3474int __weak ftrace_arch_read_dyn_info(char *buf, int size) 3481int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3500,7 +3507,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3500 return r; 3507 return r;
3501} 3508}
3502 3509
3503static struct file_operations tracing_dyn_info_fops = { 3510static const struct file_operations tracing_dyn_info_fops = {
3504 .open = tracing_open_generic, 3511 .open = tracing_open_generic,
3505 .read = tracing_read_dyn_info, 3512 .read = tracing_read_dyn_info,
3506}; 3513};
@@ -3526,15 +3533,350 @@ struct dentry *tracing_init_dentry(void)
3526 return d_tracer; 3533 return d_tracer;
3527} 3534}
3528 3535
3536static struct dentry *d_percpu;
3537
3538struct dentry *tracing_dentry_percpu(void)
3539{
3540 static int once;
3541 struct dentry *d_tracer;
3542
3543 if (d_percpu)
3544 return d_percpu;
3545
3546 d_tracer = tracing_init_dentry();
3547
3548 if (!d_tracer)
3549 return NULL;
3550
3551 d_percpu = debugfs_create_dir("per_cpu", d_tracer);
3552
3553 if (!d_percpu && !once) {
3554 once = 1;
3555 pr_warning("Could not create debugfs directory 'per_cpu'\n");
3556 return NULL;
3557 }
3558
3559 return d_percpu;
3560}
3561
3562static void tracing_init_debugfs_percpu(long cpu)
3563{
3564 struct dentry *d_percpu = tracing_dentry_percpu();
3565 struct dentry *entry, *d_cpu;
3566 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3567 char cpu_dir[7];
3568
3569 if (cpu > 999 || cpu < 0)
3570 return;
3571
3572 sprintf(cpu_dir, "cpu%ld", cpu);
3573 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3574 if (!d_cpu) {
3575 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
3576 return;
3577 }
3578
3579 /* per cpu trace_pipe */
3580 entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
3581 (void *) cpu, &tracing_pipe_fops);
3582 if (!entry)
3583 pr_warning("Could not create debugfs 'trace_pipe' entry\n");
3584
3585 /* per cpu trace */
3586 entry = debugfs_create_file("trace", 0444, d_cpu,
3587 (void *) cpu, &tracing_fops);
3588 if (!entry)
3589 pr_warning("Could not create debugfs 'trace' entry\n");
3590
3591 entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
3592 (void *) cpu, &tracing_buffers_fops);
3593 if (!entry)
3594 pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
3595}
3596
3529#ifdef CONFIG_FTRACE_SELFTEST 3597#ifdef CONFIG_FTRACE_SELFTEST
3530/* Let selftest have access to static functions in this file */ 3598/* Let selftest have access to static functions in this file */
3531#include "trace_selftest.c" 3599#include "trace_selftest.c"
3532#endif 3600#endif
3533 3601
3602struct trace_option_dentry {
3603 struct tracer_opt *opt;
3604 struct tracer_flags *flags;
3605 struct dentry *entry;
3606};
3607
3608static ssize_t
3609trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
3610 loff_t *ppos)
3611{
3612 struct trace_option_dentry *topt = filp->private_data;
3613 char *buf;
3614
3615 if (topt->flags->val & topt->opt->bit)
3616 buf = "1\n";
3617 else
3618 buf = "0\n";
3619
3620 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3621}
3622
3623static ssize_t
3624trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3625 loff_t *ppos)
3626{
3627 struct trace_option_dentry *topt = filp->private_data;
3628 unsigned long val;
3629 char buf[64];
3630 int ret;
3631
3632 if (cnt >= sizeof(buf))
3633 return -EINVAL;
3634
3635 if (copy_from_user(&buf, ubuf, cnt))
3636 return -EFAULT;
3637
3638 buf[cnt] = 0;
3639
3640 ret = strict_strtoul(buf, 10, &val);
3641 if (ret < 0)
3642 return ret;
3643
3644 ret = 0;
3645 switch (val) {
3646 case 0:
3647 /* do nothing if already cleared */
3648 if (!(topt->flags->val & topt->opt->bit))
3649 break;
3650
3651 mutex_lock(&trace_types_lock);
3652 if (current_trace->set_flag)
3653 ret = current_trace->set_flag(topt->flags->val,
3654 topt->opt->bit, 0);
3655 mutex_unlock(&trace_types_lock);
3656 if (ret)
3657 return ret;
3658 topt->flags->val &= ~topt->opt->bit;
3659 break;
3660 case 1:
3661 /* do nothing if already set */
3662 if (topt->flags->val & topt->opt->bit)
3663 break;
3664
3665 mutex_lock(&trace_types_lock);
3666 if (current_trace->set_flag)
3667 ret = current_trace->set_flag(topt->flags->val,
3668 topt->opt->bit, 1);
3669 mutex_unlock(&trace_types_lock);
3670 if (ret)
3671 return ret;
3672 topt->flags->val |= topt->opt->bit;
3673 break;
3674
3675 default:
3676 return -EINVAL;
3677 }
3678
3679 *ppos += cnt;
3680
3681 return cnt;
3682}
3683
3684
3685static const struct file_operations trace_options_fops = {
3686 .open = tracing_open_generic,
3687 .read = trace_options_read,
3688 .write = trace_options_write,
3689};
3690
3691static ssize_t
3692trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
3693 loff_t *ppos)
3694{
3695 long index = (long)filp->private_data;
3696 char *buf;
3697
3698 if (trace_flags & (1 << index))
3699 buf = "1\n";
3700 else
3701 buf = "0\n";
3702
3703 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3704}
3705
3706static ssize_t
3707trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3708 loff_t *ppos)
3709{
3710 long index = (long)filp->private_data;
3711 char buf[64];
3712 unsigned long val;
3713 int ret;
3714
3715 if (cnt >= sizeof(buf))
3716 return -EINVAL;
3717
3718 if (copy_from_user(&buf, ubuf, cnt))
3719 return -EFAULT;
3720
3721 buf[cnt] = 0;
3722
3723 ret = strict_strtoul(buf, 10, &val);
3724 if (ret < 0)
3725 return ret;
3726
3727 switch (val) {
3728 case 0:
3729 trace_flags &= ~(1 << index);
3730 break;
3731 case 1:
3732 trace_flags |= 1 << index;
3733 break;
3734
3735 default:
3736 return -EINVAL;
3737 }
3738
3739 *ppos += cnt;
3740
3741 return cnt;
3742}
3743
3744static const struct file_operations trace_options_core_fops = {
3745 .open = tracing_open_generic,
3746 .read = trace_options_core_read,
3747 .write = trace_options_core_write,
3748};
3749
3750static struct dentry *trace_options_init_dentry(void)
3751{
3752 struct dentry *d_tracer;
3753 static struct dentry *t_options;
3754
3755 if (t_options)
3756 return t_options;
3757
3758 d_tracer = tracing_init_dentry();
3759 if (!d_tracer)
3760 return NULL;
3761
3762 t_options = debugfs_create_dir("options", d_tracer);
3763 if (!t_options) {
3764 pr_warning("Could not create debugfs directory 'options'\n");
3765 return NULL;
3766 }
3767
3768 return t_options;
3769}
3770
3771static void
3772create_trace_option_file(struct trace_option_dentry *topt,
3773 struct tracer_flags *flags,
3774 struct tracer_opt *opt)
3775{
3776 struct dentry *t_options;
3777 struct dentry *entry;
3778
3779 t_options = trace_options_init_dentry();
3780 if (!t_options)
3781 return;
3782
3783 topt->flags = flags;
3784 topt->opt = opt;
3785
3786 entry = debugfs_create_file(opt->name, 0644, t_options, topt,
3787 &trace_options_fops);
3788
3789 topt->entry = entry;
3790
3791}
3792
3793static struct trace_option_dentry *
3794create_trace_option_files(struct tracer *tracer)
3795{
3796 struct trace_option_dentry *topts;
3797 struct tracer_flags *flags;
3798 struct tracer_opt *opts;
3799 int cnt;
3800
3801 if (!tracer)
3802 return NULL;
3803
3804 flags = tracer->flags;
3805
3806 if (!flags || !flags->opts)
3807 return NULL;
3808
3809 opts = flags->opts;
3810
3811 for (cnt = 0; opts[cnt].name; cnt++)
3812 ;
3813
3814 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
3815 if (!topts)
3816 return NULL;
3817
3818 for (cnt = 0; opts[cnt].name; cnt++)
3819 create_trace_option_file(&topts[cnt], flags,
3820 &opts[cnt]);
3821
3822 return topts;
3823}
3824
3825static void
3826destroy_trace_option_files(struct trace_option_dentry *topts)
3827{
3828 int cnt;
3829
3830 if (!topts)
3831 return;
3832
3833 for (cnt = 0; topts[cnt].opt; cnt++) {
3834 if (topts[cnt].entry)
3835 debugfs_remove(topts[cnt].entry);
3836 }
3837
3838 kfree(topts);
3839}
3840
3841static struct dentry *
3842create_trace_option_core_file(const char *option, long index)
3843{
3844 struct dentry *t_options;
3845 struct dentry *entry;
3846
3847 t_options = trace_options_init_dentry();
3848 if (!t_options)
3849 return NULL;
3850
3851 entry = debugfs_create_file(option, 0644, t_options, (void *)index,
3852 &trace_options_core_fops);
3853
3854 return entry;
3855}
3856
3857static __init void create_trace_options_dir(void)
3858{
3859 struct dentry *t_options;
3860 struct dentry *entry;
3861 int i;
3862
3863 t_options = trace_options_init_dentry();
3864 if (!t_options)
3865 return;
3866
3867 for (i = 0; trace_options[i]; i++) {
3868 entry = create_trace_option_core_file(trace_options[i], i);
3869 if (!entry)
3870 pr_warning("Could not create debugfs %s entry\n",
3871 trace_options[i]);
3872 }
3873}
3874
3534static __init int tracer_init_debugfs(void) 3875static __init int tracer_init_debugfs(void)
3535{ 3876{
3536 struct dentry *d_tracer; 3877 struct dentry *d_tracer;
3537 struct dentry *entry; 3878 struct dentry *entry;
3879 int cpu;
3538 3880
3539 d_tracer = tracing_init_dentry(); 3881 d_tracer = tracing_init_dentry();
3540 3882
@@ -3548,18 +3890,15 @@ static __init int tracer_init_debugfs(void)
3548 if (!entry) 3890 if (!entry)
3549 pr_warning("Could not create debugfs 'trace_options' entry\n"); 3891 pr_warning("Could not create debugfs 'trace_options' entry\n");
3550 3892
3893 create_trace_options_dir();
3894
3551 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3895 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
3552 NULL, &tracing_cpumask_fops); 3896 NULL, &tracing_cpumask_fops);
3553 if (!entry) 3897 if (!entry)
3554 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); 3898 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
3555 3899
3556 entry = debugfs_create_file("latency_trace", 0444, d_tracer,
3557 &global_trace, &tracing_lt_fops);
3558 if (!entry)
3559 pr_warning("Could not create debugfs 'latency_trace' entry\n");
3560
3561 entry = debugfs_create_file("trace", 0444, d_tracer, 3900 entry = debugfs_create_file("trace", 0444, d_tracer,
3562 &global_trace, &tracing_fops); 3901 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
3563 if (!entry) 3902 if (!entry)
3564 pr_warning("Could not create debugfs 'trace' entry\n"); 3903 pr_warning("Could not create debugfs 'trace' entry\n");
3565 3904
@@ -3590,8 +3929,8 @@ static __init int tracer_init_debugfs(void)
3590 if (!entry) 3929 if (!entry)
3591 pr_warning("Could not create debugfs 'README' entry\n"); 3930 pr_warning("Could not create debugfs 'README' entry\n");
3592 3931
3593 entry = debugfs_create_file("trace_pipe", 0644, d_tracer, 3932 entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
3594 NULL, &tracing_pipe_fops); 3933 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
3595 if (!entry) 3934 if (!entry)
3596 pr_warning("Could not create debugfs " 3935 pr_warning("Could not create debugfs "
3597 "'trace_pipe' entry\n"); 3936 "'trace_pipe' entry\n");
@@ -3619,77 +3958,12 @@ static __init int tracer_init_debugfs(void)
3619#ifdef CONFIG_SYSPROF_TRACER 3958#ifdef CONFIG_SYSPROF_TRACER
3620 init_tracer_sysprof_debugfs(d_tracer); 3959 init_tracer_sysprof_debugfs(d_tracer);
3621#endif 3960#endif
3622 return 0;
3623}
3624
3625int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3626{
3627 static DEFINE_SPINLOCK(trace_buf_lock);
3628 static char trace_buf[TRACE_BUF_SIZE];
3629
3630 struct ring_buffer_event *event;
3631 struct trace_array *tr = &global_trace;
3632 struct trace_array_cpu *data;
3633 int cpu, len = 0, size, pc;
3634 struct print_entry *entry;
3635 unsigned long irq_flags;
3636
3637 if (tracing_disabled || tracing_selftest_running)
3638 return 0;
3639
3640 pc = preempt_count();
3641 preempt_disable_notrace();
3642 cpu = raw_smp_processor_id();
3643 data = tr->data[cpu];
3644
3645 if (unlikely(atomic_read(&data->disabled)))
3646 goto out;
3647
3648 pause_graph_tracing();
3649 spin_lock_irqsave(&trace_buf_lock, irq_flags);
3650 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3651
3652 len = min(len, TRACE_BUF_SIZE-1);
3653 trace_buf[len] = 0;
3654
3655 size = sizeof(*entry) + len + 1;
3656 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
3657 if (!event)
3658 goto out_unlock;
3659 entry = ring_buffer_event_data(event);
3660 tracing_generic_entry_update(&entry->ent, irq_flags, pc);
3661 entry->ent.type = TRACE_PRINT;
3662 entry->ip = ip;
3663 entry->depth = depth;
3664
3665 memcpy(&entry->buf, trace_buf, len);
3666 entry->buf[len] = 0;
3667 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3668
3669 out_unlock:
3670 spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
3671 unpause_graph_tracing();
3672 out:
3673 preempt_enable_notrace();
3674
3675 return len;
3676}
3677EXPORT_SYMBOL_GPL(trace_vprintk);
3678
3679int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3680{
3681 int ret;
3682 va_list ap;
3683 3961
3684 if (!(trace_flags & TRACE_ITER_PRINTK)) 3962 for_each_tracing_cpu(cpu)
3685 return 0; 3963 tracing_init_debugfs_percpu(cpu);
3686 3964
3687 va_start(ap, fmt); 3965 return 0;
3688 ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3689 va_end(ap);
3690 return ret;
3691} 3966}
3692EXPORT_SYMBOL_GPL(__ftrace_printk);
3693 3967
3694static int trace_panic_handler(struct notifier_block *this, 3968static int trace_panic_handler(struct notifier_block *this,
3695 unsigned long event, void *unused) 3969 unsigned long event, void *unused)
@@ -3750,7 +4024,7 @@ trace_printk_seq(struct trace_seq *s)
3750 4024
3751 printk(KERN_TRACE "%s", s->buffer); 4025 printk(KERN_TRACE "%s", s->buffer);
3752 4026
3753 trace_seq_reset(s); 4027 trace_seq_init(s);
3754} 4028}
3755 4029
3756void ftrace_dump(void) 4030void ftrace_dump(void)
@@ -3782,8 +4056,10 @@ void ftrace_dump(void)
3782 4056
3783 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 4057 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3784 4058
4059 /* Simulate the iterator */
3785 iter.tr = &global_trace; 4060 iter.tr = &global_trace;
3786 iter.trace = current_trace; 4061 iter.trace = current_trace;
4062 iter.cpu_file = TRACE_PIPE_ALL_CPU;
3787 4063
3788 /* 4064 /*
3789 * We need to stop all tracing on all CPUS to read the 4065 * We need to stop all tracing on all CPUS to read the
@@ -3826,6 +4102,7 @@ void ftrace_dump(void)
3826__init static int tracer_alloc_buffers(void) 4102__init static int tracer_alloc_buffers(void)
3827{ 4103{
3828 struct trace_array_cpu *data; 4104 struct trace_array_cpu *data;
4105 int ring_buf_size;
3829 int i; 4106 int i;
3830 int ret = -ENOMEM; 4107 int ret = -ENOMEM;
3831 4108
@@ -3835,11 +4112,21 @@ __init static int tracer_alloc_buffers(void)
3835 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4112 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
3836 goto out_free_buffer_mask; 4113 goto out_free_buffer_mask;
3837 4114
4115 if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4116 goto out_free_tracing_cpumask;
4117
4118 /* To save memory, keep the ring buffer size to its minimum */
4119 if (ring_buffer_expanded)
4120 ring_buf_size = trace_buf_size;
4121 else
4122 ring_buf_size = 1;
4123
3838 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 4124 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
3839 cpumask_copy(tracing_cpumask, cpu_all_mask); 4125 cpumask_copy(tracing_cpumask, cpu_all_mask);
4126 cpumask_clear(tracing_reader_cpumask);
3840 4127
3841 /* TODO: make the number of buffers hot pluggable with CPUS */ 4128 /* TODO: make the number of buffers hot pluggable with CPUS */
3842 global_trace.buffer = ring_buffer_alloc(trace_buf_size, 4129 global_trace.buffer = ring_buffer_alloc(ring_buf_size,
3843 TRACE_BUFFER_FLAGS); 4130 TRACE_BUFFER_FLAGS);
3844 if (!global_trace.buffer) { 4131 if (!global_trace.buffer) {
3845 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); 4132 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
@@ -3850,7 +4137,7 @@ __init static int tracer_alloc_buffers(void)
3850 4137
3851 4138
3852#ifdef CONFIG_TRACER_MAX_TRACE 4139#ifdef CONFIG_TRACER_MAX_TRACE
3853 max_tr.buffer = ring_buffer_alloc(trace_buf_size, 4140 max_tr.buffer = ring_buffer_alloc(ring_buf_size,
3854 TRACE_BUFFER_FLAGS); 4141 TRACE_BUFFER_FLAGS);
3855 if (!max_tr.buffer) { 4142 if (!max_tr.buffer) {
3856 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); 4143 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
@@ -3871,14 +4158,10 @@ __init static int tracer_alloc_buffers(void)
3871 trace_init_cmdlines(); 4158 trace_init_cmdlines();
3872 4159
3873 register_tracer(&nop_trace); 4160 register_tracer(&nop_trace);
4161 current_trace = &nop_trace;
3874#ifdef CONFIG_BOOT_TRACER 4162#ifdef CONFIG_BOOT_TRACER
3875 register_tracer(&boot_tracer); 4163 register_tracer(&boot_tracer);
3876 current_trace = &boot_tracer;
3877 current_trace->init(&global_trace);
3878#else
3879 current_trace = &nop_trace;
3880#endif 4164#endif
3881
3882 /* All seems OK, enable tracing */ 4165 /* All seems OK, enable tracing */
3883 tracing_disabled = 0; 4166 tracing_disabled = 0;
3884 4167
@@ -3886,14 +4169,38 @@ __init static int tracer_alloc_buffers(void)
3886 &trace_panic_notifier); 4169 &trace_panic_notifier);
3887 4170
3888 register_die_notifier(&trace_die_notifier); 4171 register_die_notifier(&trace_die_notifier);
3889 ret = 0; 4172
4173 return 0;
3890 4174
3891out_free_cpumask: 4175out_free_cpumask:
4176 free_cpumask_var(tracing_reader_cpumask);
4177out_free_tracing_cpumask:
3892 free_cpumask_var(tracing_cpumask); 4178 free_cpumask_var(tracing_cpumask);
3893out_free_buffer_mask: 4179out_free_buffer_mask:
3894 free_cpumask_var(tracing_buffer_mask); 4180 free_cpumask_var(tracing_buffer_mask);
3895out: 4181out:
3896 return ret; 4182 return ret;
3897} 4183}
4184
4185__init static int clear_boot_tracer(void)
4186{
4187 /*
4188 * The default tracer at boot buffer is an init section.
4189 * This function is called in lateinit. If we did not
4190 * find the boot tracer, then clear it out, to prevent
4191 * later registration from accessing the buffer that is
4192 * about to be freed.
4193 */
4194 if (!default_bootup_tracer)
4195 return 0;
4196
4197 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
4198 default_bootup_tracer);
4199 default_bootup_tracer = NULL;
4200
4201 return 0;
4202}
4203
3898early_initcall(tracer_alloc_buffers); 4204early_initcall(tracer_alloc_buffers);
3899fs_initcall(tracer_init_debugfs); 4205fs_initcall(tracer_init_debugfs);
4206late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381bfd95..38276d1638e3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,8 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h> 11#include <trace/boot.h>
12#include <trace/kmemtrace.h>
13#include <trace/power.h>
12 14
13enum trace_type { 15enum trace_type {
14 __TRACE_FIRST_TYPE = 0, 16 __TRACE_FIRST_TYPE = 0,
@@ -16,9 +18,9 @@ enum trace_type {
16 TRACE_FN, 18 TRACE_FN,
17 TRACE_CTX, 19 TRACE_CTX,
18 TRACE_WAKE, 20 TRACE_WAKE,
19 TRACE_CONT,
20 TRACE_STACK, 21 TRACE_STACK,
21 TRACE_PRINT, 22 TRACE_PRINT,
23 TRACE_BPRINT,
22 TRACE_SPECIAL, 24 TRACE_SPECIAL,
23 TRACE_MMIO_RW, 25 TRACE_MMIO_RW,
24 TRACE_MMIO_MAP, 26 TRACE_MMIO_MAP,
@@ -29,9 +31,14 @@ enum trace_type {
29 TRACE_GRAPH_ENT, 31 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK, 32 TRACE_USER_STACK,
31 TRACE_HW_BRANCHES, 33 TRACE_HW_BRANCHES,
34 TRACE_SYSCALL_ENTER,
35 TRACE_SYSCALL_EXIT,
36 TRACE_KMEM_ALLOC,
37 TRACE_KMEM_FREE,
32 TRACE_POWER, 38 TRACE_POWER,
39 TRACE_BLK,
33 40
34 __TRACE_LAST_TYPE 41 __TRACE_LAST_TYPE,
35}; 42};
36 43
37/* 44/*
@@ -42,7 +49,6 @@ enum trace_type {
42 */ 49 */
43struct trace_entry { 50struct trace_entry {
44 unsigned char type; 51 unsigned char type;
45 unsigned char cpu;
46 unsigned char flags; 52 unsigned char flags;
47 unsigned char preempt_count; 53 unsigned char preempt_count;
48 int pid; 54 int pid;
@@ -60,13 +66,13 @@ struct ftrace_entry {
60 66
61/* Function call entry */ 67/* Function call entry */
62struct ftrace_graph_ent_entry { 68struct ftrace_graph_ent_entry {
63 struct trace_entry ent; 69 struct trace_entry ent;
64 struct ftrace_graph_ent graph_ent; 70 struct ftrace_graph_ent graph_ent;
65}; 71};
66 72
67/* Function return entry */ 73/* Function return entry */
68struct ftrace_graph_ret_entry { 74struct ftrace_graph_ret_entry {
69 struct trace_entry ent; 75 struct trace_entry ent;
70 struct ftrace_graph_ret ret; 76 struct ftrace_graph_ret ret;
71}; 77};
72extern struct tracer boot_tracer; 78extern struct tracer boot_tracer;
@@ -112,8 +118,16 @@ struct userstack_entry {
112}; 118};
113 119
114/* 120/*
115 * ftrace_printk entry: 121 * trace_printk entry:
116 */ 122 */
123struct bprint_entry {
124 struct trace_entry ent;
125 unsigned long ip;
126 int depth;
127 const char *fmt;
128 u32 buf[];
129};
130
117struct print_entry { 131struct print_entry {
118 struct trace_entry ent; 132 struct trace_entry ent;
119 unsigned long ip; 133 unsigned long ip;
@@ -170,15 +184,45 @@ struct trace_power {
170 struct power_trace state_data; 184 struct power_trace state_data;
171}; 185};
172 186
187struct kmemtrace_alloc_entry {
188 struct trace_entry ent;
189 enum kmemtrace_type_id type_id;
190 unsigned long call_site;
191 const void *ptr;
192 size_t bytes_req;
193 size_t bytes_alloc;
194 gfp_t gfp_flags;
195 int node;
196};
197
198struct kmemtrace_free_entry {
199 struct trace_entry ent;
200 enum kmemtrace_type_id type_id;
201 unsigned long call_site;
202 const void *ptr;
203};
204
205struct syscall_trace_enter {
206 struct trace_entry ent;
207 int nr;
208 unsigned long args[];
209};
210
211struct syscall_trace_exit {
212 struct trace_entry ent;
213 int nr;
214 unsigned long ret;
215};
216
217
173/* 218/*
174 * trace_flag_type is an enumeration that holds different 219 * trace_flag_type is an enumeration that holds different
175 * states when a trace occurs. These are: 220 * states when a trace occurs. These are:
176 * IRQS_OFF - interrupts were disabled 221 * IRQS_OFF - interrupts were disabled
177 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags 222 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
178 * NEED_RESCED - reschedule is requested 223 * NEED_RESCED - reschedule is requested
179 * HARDIRQ - inside an interrupt handler 224 * HARDIRQ - inside an interrupt handler
180 * SOFTIRQ - inside a softirq handler 225 * SOFTIRQ - inside a softirq handler
181 * CONT - multiple entries hold the trace item
182 */ 226 */
183enum trace_flag_type { 227enum trace_flag_type {
184 TRACE_FLAG_IRQS_OFF = 0x01, 228 TRACE_FLAG_IRQS_OFF = 0x01,
@@ -186,7 +230,6 @@ enum trace_flag_type {
186 TRACE_FLAG_NEED_RESCHED = 0x04, 230 TRACE_FLAG_NEED_RESCHED = 0x04,
187 TRACE_FLAG_HARDIRQ = 0x08, 231 TRACE_FLAG_HARDIRQ = 0x08,
188 TRACE_FLAG_SOFTIRQ = 0x10, 232 TRACE_FLAG_SOFTIRQ = 0x10,
189 TRACE_FLAG_CONT = 0x20,
190}; 233};
191 234
192#define TRACE_BUF_SIZE 1024 235#define TRACE_BUF_SIZE 1024
@@ -198,6 +241,7 @@ enum trace_flag_type {
198 */ 241 */
199struct trace_array_cpu { 242struct trace_array_cpu {
200 atomic_t disabled; 243 atomic_t disabled;
244 void *buffer_page; /* ring buffer spare */
201 245
202 /* these fields get copied into max-trace: */ 246 /* these fields get copied into max-trace: */
203 unsigned long trace_idx; 247 unsigned long trace_idx;
@@ -262,10 +306,10 @@ extern void __ftrace_bad_type(void);
262 do { \ 306 do { \
263 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ 307 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
264 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 308 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
265 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
266 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 309 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
267 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ 310 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
268 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 311 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
312 IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
269 IF_ASSIGN(var, ent, struct special_entry, 0); \ 313 IF_ASSIGN(var, ent, struct special_entry, 0); \
270 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 314 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
271 TRACE_MMIO_RW); \ 315 TRACE_MMIO_RW); \
@@ -279,7 +323,15 @@ extern void __ftrace_bad_type(void);
279 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 323 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
280 TRACE_GRAPH_RET); \ 324 TRACE_GRAPH_RET); \
281 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ 325 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
282 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ 326 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
327 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
328 TRACE_KMEM_ALLOC); \
329 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
330 TRACE_KMEM_FREE); \
331 IF_ASSIGN(var, ent, struct syscall_trace_enter, \
332 TRACE_SYSCALL_ENTER); \
333 IF_ASSIGN(var, ent, struct syscall_trace_exit, \
334 TRACE_SYSCALL_EXIT); \
283 __ftrace_bad_type(); \ 335 __ftrace_bad_type(); \
284 } while (0) 336 } while (0)
285 337
@@ -287,7 +339,8 @@ extern void __ftrace_bad_type(void);
287enum print_line_t { 339enum print_line_t {
288 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ 340 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
289 TRACE_TYPE_HANDLED = 1, 341 TRACE_TYPE_HANDLED = 1,
290 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 342 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
343 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
291}; 344};
292 345
293 346
@@ -297,8 +350,8 @@ enum print_line_t {
297 * flags value in struct tracer_flags. 350 * flags value in struct tracer_flags.
298 */ 351 */
299struct tracer_opt { 352struct tracer_opt {
300 const char *name; /* Will appear on the trace_options file */ 353 const char *name; /* Will appear on the trace_options file */
301 u32 bit; /* Mask assigned in val field in tracer_flags */ 354 u32 bit; /* Mask assigned in val field in tracer_flags */
302}; 355};
303 356
304/* 357/*
@@ -307,28 +360,51 @@ struct tracer_opt {
307 */ 360 */
308struct tracer_flags { 361struct tracer_flags {
309 u32 val; 362 u32 val;
310 struct tracer_opt *opts; 363 struct tracer_opt *opts;
311}; 364};
312 365
313/* Makes more easy to define a tracer opt */ 366/* Makes more easy to define a tracer opt */
314#define TRACER_OPT(s, b) .name = #s, .bit = b 367#define TRACER_OPT(s, b) .name = #s, .bit = b
315 368
316/* 369
317 * A specific tracer, represented by methods that operate on a trace array: 370/**
371 * struct tracer - a specific tracer and its callbacks to interact with debugfs
372 * @name: the name chosen to select it on the available_tracers file
373 * @init: called when one switches to this tracer (echo name > current_tracer)
374 * @reset: called when one switches to another tracer
375 * @start: called when tracing is unpaused (echo 1 > tracing_enabled)
376 * @stop: called when tracing is paused (echo 0 > tracing_enabled)
377 * @open: called when the trace file is opened
378 * @pipe_open: called when the trace_pipe file is opened
379 * @wait_pipe: override how the user waits for traces on trace_pipe
380 * @close: called when the trace file is released
381 * @read: override the default read callback on trace_pipe
382 * @splice_read: override the default splice_read callback on trace_pipe
383 * @selftest: selftest to run on boot (see trace_selftest.c)
384 * @print_headers: override the first lines that describe your columns
385 * @print_line: callback that prints a trace
386 * @set_flag: signals one of your private flags changed (trace_options file)
387 * @flags: your private flags
318 */ 388 */
319struct tracer { 389struct tracer {
320 const char *name; 390 const char *name;
321 /* Your tracer should raise a warning if init fails */
322 int (*init)(struct trace_array *tr); 391 int (*init)(struct trace_array *tr);
323 void (*reset)(struct trace_array *tr); 392 void (*reset)(struct trace_array *tr);
324 void (*start)(struct trace_array *tr); 393 void (*start)(struct trace_array *tr);
325 void (*stop)(struct trace_array *tr); 394 void (*stop)(struct trace_array *tr);
326 void (*open)(struct trace_iterator *iter); 395 void (*open)(struct trace_iterator *iter);
327 void (*pipe_open)(struct trace_iterator *iter); 396 void (*pipe_open)(struct trace_iterator *iter);
397 void (*wait_pipe)(struct trace_iterator *iter);
328 void (*close)(struct trace_iterator *iter); 398 void (*close)(struct trace_iterator *iter);
329 ssize_t (*read)(struct trace_iterator *iter, 399 ssize_t (*read)(struct trace_iterator *iter,
330 struct file *filp, char __user *ubuf, 400 struct file *filp, char __user *ubuf,
331 size_t cnt, loff_t *ppos); 401 size_t cnt, loff_t *ppos);
402 ssize_t (*splice_read)(struct trace_iterator *iter,
403 struct file *filp,
404 loff_t *ppos,
405 struct pipe_inode_info *pipe,
406 size_t len,
407 unsigned int flags);
332#ifdef CONFIG_FTRACE_STARTUP_TEST 408#ifdef CONFIG_FTRACE_STARTUP_TEST
333 int (*selftest)(struct tracer *trace, 409 int (*selftest)(struct tracer *trace,
334 struct trace_array *tr); 410 struct trace_array *tr);
@@ -339,7 +415,8 @@ struct tracer {
339 int (*set_flag)(u32 old_flags, u32 bit, int set); 415 int (*set_flag)(u32 old_flags, u32 bit, int set);
340 struct tracer *next; 416 struct tracer *next;
341 int print_max; 417 int print_max;
342 struct tracer_flags *flags; 418 struct tracer_flags *flags;
419 struct tracer_stat *stats;
343}; 420};
344 421
345struct trace_seq { 422struct trace_seq {
@@ -348,6 +425,16 @@ struct trace_seq {
348 unsigned int readpos; 425 unsigned int readpos;
349}; 426};
350 427
428static inline void
429trace_seq_init(struct trace_seq *s)
430{
431 s->len = 0;
432 s->readpos = 0;
433}
434
435
436#define TRACE_PIPE_ALL_CPU -1
437
351/* 438/*
352 * Trace iterator - used by printout routines who present trace 439 * Trace iterator - used by printout routines who present trace
353 * results to users and which routines might sleep, etc: 440 * results to users and which routines might sleep, etc:
@@ -356,6 +443,8 @@ struct trace_iterator {
356 struct trace_array *tr; 443 struct trace_array *tr;
357 struct tracer *trace; 444 struct tracer *trace;
358 void *private; 445 void *private;
446 int cpu_file;
447 struct mutex mutex;
359 struct ring_buffer_iter *buffer_iter[NR_CPUS]; 448 struct ring_buffer_iter *buffer_iter[NR_CPUS];
360 449
361 /* The below is zeroed out in pipe_read */ 450 /* The below is zeroed out in pipe_read */
@@ -371,6 +460,7 @@ struct trace_iterator {
371 cpumask_var_t started; 460 cpumask_var_t started;
372}; 461};
373 462
463int tracer_init(struct tracer *t, struct trace_array *tr);
374int tracing_is_enabled(void); 464int tracing_is_enabled(void);
375void trace_wake_up(void); 465void trace_wake_up(void);
376void tracing_reset(struct trace_array *tr, int cpu); 466void tracing_reset(struct trace_array *tr, int cpu);
@@ -379,26 +469,48 @@ int tracing_open_generic(struct inode *inode, struct file *filp);
379struct dentry *tracing_init_dentry(void); 469struct dentry *tracing_init_dentry(void);
380void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 470void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
381 471
472struct ring_buffer_event;
473
474struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
475 unsigned char type,
476 unsigned long len,
477 unsigned long flags,
478 int pc);
479void trace_buffer_unlock_commit(struct trace_array *tr,
480 struct ring_buffer_event *event,
481 unsigned long flags, int pc);
482
483struct ring_buffer_event *
484trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
485 unsigned long flags, int pc);
486void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
487 unsigned long flags, int pc);
488
382struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 489struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
383 struct trace_array_cpu *data); 490 struct trace_array_cpu *data);
491
492struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
493 int *ent_cpu, u64 *ent_ts);
494
384void tracing_generic_entry_update(struct trace_entry *entry, 495void tracing_generic_entry_update(struct trace_entry *entry,
385 unsigned long flags, 496 unsigned long flags,
386 int pc); 497 int pc);
387 498
499void default_wait_pipe(struct trace_iterator *iter);
500void poll_wait_pipe(struct trace_iterator *iter);
501
388void ftrace(struct trace_array *tr, 502void ftrace(struct trace_array *tr,
389 struct trace_array_cpu *data, 503 struct trace_array_cpu *data,
390 unsigned long ip, 504 unsigned long ip,
391 unsigned long parent_ip, 505 unsigned long parent_ip,
392 unsigned long flags, int pc); 506 unsigned long flags, int pc);
393void tracing_sched_switch_trace(struct trace_array *tr, 507void tracing_sched_switch_trace(struct trace_array *tr,
394 struct trace_array_cpu *data,
395 struct task_struct *prev, 508 struct task_struct *prev,
396 struct task_struct *next, 509 struct task_struct *next,
397 unsigned long flags, int pc); 510 unsigned long flags, int pc);
398void tracing_record_cmdline(struct task_struct *tsk); 511void tracing_record_cmdline(struct task_struct *tsk);
399 512
400void tracing_sched_wakeup_trace(struct trace_array *tr, 513void tracing_sched_wakeup_trace(struct trace_array *tr,
401 struct trace_array_cpu *data,
402 struct task_struct *wakee, 514 struct task_struct *wakee,
403 struct task_struct *cur, 515 struct task_struct *cur,
404 unsigned long flags, int pc); 516 unsigned long flags, int pc);
@@ -408,14 +520,12 @@ void trace_special(struct trace_array *tr,
408 unsigned long arg2, 520 unsigned long arg2,
409 unsigned long arg3, int pc); 521 unsigned long arg3, int pc);
410void trace_function(struct trace_array *tr, 522void trace_function(struct trace_array *tr,
411 struct trace_array_cpu *data,
412 unsigned long ip, 523 unsigned long ip,
413 unsigned long parent_ip, 524 unsigned long parent_ip,
414 unsigned long flags, int pc); 525 unsigned long flags, int pc);
415 526
416void trace_graph_return(struct ftrace_graph_ret *trace); 527void trace_graph_return(struct ftrace_graph_ret *trace);
417int trace_graph_entry(struct ftrace_graph_ent *trace); 528int trace_graph_entry(struct ftrace_graph_ent *trace);
418void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
419 529
420void tracing_start_cmdline_record(void); 530void tracing_start_cmdline_record(void);
421void tracing_stop_cmdline_record(void); 531void tracing_stop_cmdline_record(void);
@@ -434,15 +544,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
434void update_max_tr_single(struct trace_array *tr, 544void update_max_tr_single(struct trace_array *tr,
435 struct task_struct *tsk, int cpu); 545 struct task_struct *tsk, int cpu);
436 546
437extern cycle_t ftrace_now(int cpu); 547void __trace_stack(struct trace_array *tr,
548 unsigned long flags,
549 int skip, int pc);
438 550
439#ifdef CONFIG_FUNCTION_TRACER 551extern cycle_t ftrace_now(int cpu);
440void tracing_start_function_trace(void);
441void tracing_stop_function_trace(void);
442#else
443# define tracing_start_function_trace() do { } while (0)
444# define tracing_stop_function_trace() do { } while (0)
445#endif
446 552
447#ifdef CONFIG_CONTEXT_SWITCH_TRACER 553#ifdef CONFIG_CONTEXT_SWITCH_TRACER
448typedef void 554typedef void
@@ -456,10 +562,10 @@ struct tracer_switch_ops {
456 void *private; 562 void *private;
457 struct tracer_switch_ops *next; 563 struct tracer_switch_ops *next;
458}; 564};
459
460char *trace_find_cmdline(int pid);
461#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 565#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
462 566
567extern void trace_find_cmdline(int pid, char comm[]);
568
463#ifdef CONFIG_DYNAMIC_FTRACE 569#ifdef CONFIG_DYNAMIC_FTRACE
464extern unsigned long ftrace_update_tot_cnt; 570extern unsigned long ftrace_update_tot_cnt;
465#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func 571#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -469,6 +575,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
469#ifdef CONFIG_FTRACE_STARTUP_TEST 575#ifdef CONFIG_FTRACE_STARTUP_TEST
470extern int trace_selftest_startup_function(struct tracer *trace, 576extern int trace_selftest_startup_function(struct tracer *trace,
471 struct trace_array *tr); 577 struct trace_array *tr);
578extern int trace_selftest_startup_function_graph(struct tracer *trace,
579 struct trace_array *tr);
472extern int trace_selftest_startup_irqsoff(struct tracer *trace, 580extern int trace_selftest_startup_irqsoff(struct tracer *trace,
473 struct trace_array *tr); 581 struct trace_array *tr);
474extern int trace_selftest_startup_preemptoff(struct tracer *trace, 582extern int trace_selftest_startup_preemptoff(struct tracer *trace,
@@ -488,17 +596,10 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
488#endif /* CONFIG_FTRACE_STARTUP_TEST */ 596#endif /* CONFIG_FTRACE_STARTUP_TEST */
489 597
490extern void *head_page(struct trace_array_cpu *data); 598extern void *head_page(struct trace_array_cpu *data);
491extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
492extern void trace_seq_print_cont(struct trace_seq *s,
493 struct trace_iterator *iter);
494
495extern int
496seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
497 unsigned long sym_flags);
498extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
499 size_t cnt);
500extern long ns2usecs(cycle_t nsec); 599extern long ns2usecs(cycle_t nsec);
501extern int 600extern int
601trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args);
602extern int
502trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); 603trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
503 604
504extern unsigned long trace_flags; 605extern unsigned long trace_flags;
@@ -580,7 +681,10 @@ enum trace_iterator_flags {
580 TRACE_ITER_ANNOTATE = 0x2000, 681 TRACE_ITER_ANNOTATE = 0x2000,
581 TRACE_ITER_USERSTACKTRACE = 0x4000, 682 TRACE_ITER_USERSTACKTRACE = 0x4000,
582 TRACE_ITER_SYM_USEROBJ = 0x8000, 683 TRACE_ITER_SYM_USEROBJ = 0x8000,
583 TRACE_ITER_PRINTK_MSGONLY = 0x10000 684 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
685 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
686 TRACE_ITER_LATENCY_FMT = 0x40000,
687 TRACE_ITER_GLOBAL_CLK = 0x80000,
584}; 688};
585 689
586/* 690/*
@@ -601,12 +705,12 @@ extern struct tracer nop_trace;
601 * preempt_enable (after a disable), a schedule might take place 705 * preempt_enable (after a disable), a schedule might take place
602 * causing an infinite recursion. 706 * causing an infinite recursion.
603 * 707 *
604 * To prevent this, we read the need_recshed flag before 708 * To prevent this, we read the need_resched flag before
605 * disabling preemption. When we want to enable preemption we 709 * disabling preemption. When we want to enable preemption we
606 * check the flag, if it is set, then we call preempt_enable_no_resched. 710 * check the flag, if it is set, then we call preempt_enable_no_resched.
607 * Otherwise, we call preempt_enable. 711 * Otherwise, we call preempt_enable.
608 * 712 *
609 * The rational for doing the above is that if need resched is set 713 * The rational for doing the above is that if need_resched is set
610 * and we have yet to reschedule, we are either in an atomic location 714 * and we have yet to reschedule, we are either in an atomic location
611 * (where we do not need to check for scheduling) or we are inside 715 * (where we do not need to check for scheduling) or we are inside
612 * the scheduler and do not want to resched. 716 * the scheduler and do not want to resched.
@@ -627,7 +731,7 @@ static inline int ftrace_preempt_disable(void)
627 * 731 *
628 * This is a scheduler safe way to enable preemption and not miss 732 * This is a scheduler safe way to enable preemption and not miss
629 * any preemption checks. The disabled saved the state of preemption. 733 * any preemption checks. The disabled saved the state of preemption.
630 * If resched is set, then we were either inside an atomic or 734 * If resched is set, then we are either inside an atomic or
631 * are inside the scheduler (we would have already scheduled 735 * are inside the scheduler (we would have already scheduled
632 * otherwise). In this case, we do not want to call normal 736 * otherwise). In this case, we do not want to call normal
633 * preempt_enable, but preempt_enable_no_resched instead. 737 * preempt_enable, but preempt_enable_no_resched instead.
@@ -664,4 +768,51 @@ static inline void trace_branch_disable(void)
664} 768}
665#endif /* CONFIG_BRANCH_TRACER */ 769#endif /* CONFIG_BRANCH_TRACER */
666 770
771/* set ring buffers to default size if not already done so */
772int tracing_update_buffers(void);
773
774/* trace event type bit fields, not numeric */
775enum {
776 TRACE_EVENT_TYPE_PRINTF = 1,
777 TRACE_EVENT_TYPE_RAW = 2,
778};
779
780struct ftrace_event_call {
781 char *name;
782 char *system;
783 struct dentry *dir;
784 int enabled;
785 int (*regfunc)(void);
786 void (*unregfunc)(void);
787 int id;
788 int (*raw_init)(void);
789 int (*show_format)(struct trace_seq *s);
790};
791
792void event_trace_printk(unsigned long ip, const char *fmt, ...);
793extern struct ftrace_event_call __start_ftrace_events[];
794extern struct ftrace_event_call __stop_ftrace_events[];
795
796extern const char *__start___trace_bprintk_fmt[];
797extern const char *__stop___trace_bprintk_fmt[];
798
799/*
800 * The double __builtin_constant_p is because gcc will give us an error
801 * if we try to allocate the static variable to fmt if it is not a
802 * constant. Even with the outer if statement optimizing out.
803 */
804#define event_trace_printk(ip, fmt, args...) \
805do { \
806 __trace_printk_check_format(fmt, ##args); \
807 tracing_record_cmdline(current); \
808 if (__builtin_constant_p(fmt)) { \
809 static const char *trace_printk_fmt \
810 __attribute__((section("__trace_printk_fmt"))) = \
811 __builtin_constant_p(fmt) ? fmt : NULL; \
812 \
813 __trace_bprintk(ip, trace_printk_fmt, ##args); \
814 } else \
815 __trace_printk(ip, fmt, ##args); \
816} while (0)
817
667#endif /* _LINUX_KERNEL_TRACE_H */ 818#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 366c8c333e13..7a30fc4c3642 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -11,6 +11,7 @@
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12 12
13#include "trace.h" 13#include "trace.h"
14#include "trace_output.h"
14 15
15static struct trace_array *boot_trace; 16static struct trace_array *boot_trace;
16static bool pre_initcalls_finished; 17static bool pre_initcalls_finished;
@@ -27,13 +28,13 @@ void start_boot_trace(void)
27 28
28void enable_boot_trace(void) 29void enable_boot_trace(void)
29{ 30{
30 if (pre_initcalls_finished) 31 if (boot_trace && pre_initcalls_finished)
31 tracing_start_sched_switch_record(); 32 tracing_start_sched_switch_record();
32} 33}
33 34
34void disable_boot_trace(void) 35void disable_boot_trace(void)
35{ 36{
36 if (pre_initcalls_finished) 37 if (boot_trace && pre_initcalls_finished)
37 tracing_stop_sched_switch_record(); 38 tracing_stop_sched_switch_record();
38} 39}
39 40
@@ -42,6 +43,9 @@ static int boot_trace_init(struct trace_array *tr)
42 int cpu; 43 int cpu;
43 boot_trace = tr; 44 boot_trace = tr;
44 45
46 if (!tr)
47 return 0;
48
45 for_each_cpu(cpu, cpu_possible_mask) 49 for_each_cpu(cpu, cpu_possible_mask)
46 tracing_reset(tr, cpu); 50 tracing_reset(tr, cpu);
47 51
@@ -128,10 +132,9 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
128{ 132{
129 struct ring_buffer_event *event; 133 struct ring_buffer_event *event;
130 struct trace_boot_call *entry; 134 struct trace_boot_call *entry;
131 unsigned long irq_flags;
132 struct trace_array *tr = boot_trace; 135 struct trace_array *tr = boot_trace;
133 136
134 if (!pre_initcalls_finished) 137 if (!tr || !pre_initcalls_finished)
135 return; 138 return;
136 139
137 /* Get its name now since this function could 140 /* Get its name now since this function could
@@ -140,18 +143,13 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
140 sprint_symbol(bt->func, (unsigned long)fn); 143 sprint_symbol(bt->func, (unsigned long)fn);
141 preempt_disable(); 144 preempt_disable();
142 145
143 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 146 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
144 &irq_flags); 147 sizeof(*entry), 0, 0);
145 if (!event) 148 if (!event)
146 goto out; 149 goto out;
147 entry = ring_buffer_event_data(event); 150 entry = ring_buffer_event_data(event);
148 tracing_generic_entry_update(&entry->ent, 0, 0);
149 entry->ent.type = TRACE_BOOT_CALL;
150 entry->boot_call = *bt; 151 entry->boot_call = *bt;
151 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 152 trace_buffer_unlock_commit(tr, event, 0, 0);
152
153 trace_wake_up();
154
155 out: 153 out:
156 preempt_enable(); 154 preempt_enable();
157} 155}
@@ -160,27 +158,21 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
160{ 158{
161 struct ring_buffer_event *event; 159 struct ring_buffer_event *event;
162 struct trace_boot_ret *entry; 160 struct trace_boot_ret *entry;
163 unsigned long irq_flags;
164 struct trace_array *tr = boot_trace; 161 struct trace_array *tr = boot_trace;
165 162
166 if (!pre_initcalls_finished) 163 if (!tr || !pre_initcalls_finished)
167 return; 164 return;
168 165
169 sprint_symbol(bt->func, (unsigned long)fn); 166 sprint_symbol(bt->func, (unsigned long)fn);
170 preempt_disable(); 167 preempt_disable();
171 168
172 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 169 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
173 &irq_flags); 170 sizeof(*entry), 0, 0);
174 if (!event) 171 if (!event)
175 goto out; 172 goto out;
176 entry = ring_buffer_event_data(event); 173 entry = ring_buffer_event_data(event);
177 tracing_generic_entry_update(&entry->ent, 0, 0);
178 entry->ent.type = TRACE_BOOT_RET;
179 entry->boot_ret = *bt; 174 entry->boot_ret = *bt;
180 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 175 trace_buffer_unlock_commit(tr, event, 0, 0);
181
182 trace_wake_up();
183
184 out: 176 out:
185 preempt_enable(); 177 preempt_enable();
186} 178}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 6c00feb3bac7..ad8c22efff41 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -14,12 +14,17 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <asm/local.h> 16#include <asm/local.h>
17
17#include "trace.h" 18#include "trace.h"
19#include "trace_stat.h"
20#include "trace_output.h"
18 21
19#ifdef CONFIG_BRANCH_TRACER 22#ifdef CONFIG_BRANCH_TRACER
20 23
24static struct tracer branch_trace;
21static int branch_tracing_enabled __read_mostly; 25static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex); 26static DEFINE_MUTEX(branch_tracing_mutex);
27
23static struct trace_array *branch_tracer; 28static struct trace_array *branch_tracer;
24 29
25static void 30static void
@@ -28,7 +33,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
28 struct trace_array *tr = branch_tracer; 33 struct trace_array *tr = branch_tracer;
29 struct ring_buffer_event *event; 34 struct ring_buffer_event *event;
30 struct trace_branch *entry; 35 struct trace_branch *entry;
31 unsigned long flags, irq_flags; 36 unsigned long flags;
32 int cpu, pc; 37 int cpu, pc;
33 const char *p; 38 const char *p;
34 39
@@ -47,15 +52,13 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
47 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) 52 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
48 goto out; 53 goto out;
49 54
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 55 pc = preempt_count();
51 &irq_flags); 56 event = trace_buffer_lock_reserve(tr, TRACE_BRANCH,
57 sizeof(*entry), flags, pc);
52 if (!event) 58 if (!event)
53 goto out; 59 goto out;
54 60
55 pc = preempt_count();
56 entry = ring_buffer_event_data(event); 61 entry = ring_buffer_event_data(event);
57 tracing_generic_entry_update(&entry->ent, flags, pc);
58 entry->ent.type = TRACE_BRANCH;
59 62
60 /* Strip off the path, only save the file */ 63 /* Strip off the path, only save the file */
61 p = f->file + strlen(f->file); 64 p = f->file + strlen(f->file);
@@ -70,7 +73,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
70 entry->line = f->line; 73 entry->line = f->line;
71 entry->correct = val == expect; 74 entry->correct = val == expect;
72 75
73 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 76 ring_buffer_unlock_commit(tr->buffer, event);
74 77
75 out: 78 out:
76 atomic_dec(&tr->data[cpu]->disabled); 79 atomic_dec(&tr->data[cpu]->disabled);
@@ -88,8 +91,6 @@ void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
88 91
89int enable_branch_tracing(struct trace_array *tr) 92int enable_branch_tracing(struct trace_array *tr)
90{ 93{
91 int ret = 0;
92
93 mutex_lock(&branch_tracing_mutex); 94 mutex_lock(&branch_tracing_mutex);
94 branch_tracer = tr; 95 branch_tracer = tr;
95 /* 96 /*
@@ -100,7 +101,7 @@ int enable_branch_tracing(struct trace_array *tr)
100 branch_tracing_enabled++; 101 branch_tracing_enabled++;
101 mutex_unlock(&branch_tracing_mutex); 102 mutex_unlock(&branch_tracing_mutex);
102 103
103 return ret; 104 return 0;
104} 105}
105 106
106void disable_branch_tracing(void) 107void disable_branch_tracing(void)
@@ -128,11 +129,6 @@ static void stop_branch_trace(struct trace_array *tr)
128 129
129static int branch_trace_init(struct trace_array *tr) 130static int branch_trace_init(struct trace_array *tr)
130{ 131{
131 int cpu;
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr); 132 start_branch_trace(tr);
137 return 0; 133 return 0;
138} 134}
@@ -142,22 +138,53 @@ static void branch_trace_reset(struct trace_array *tr)
142 stop_branch_trace(tr); 138 stop_branch_trace(tr);
143} 139}
144 140
145struct tracer branch_trace __read_mostly = 141static enum print_line_t trace_branch_print(struct trace_iterator *iter,
142 int flags)
143{
144 struct trace_branch *field;
145
146 trace_assign_type(field, iter->ent);
147
148 if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n",
149 field->correct ? " ok " : " MISS ",
150 field->func,
151 field->file,
152 field->line))
153 return TRACE_TYPE_PARTIAL_LINE;
154
155 return TRACE_TYPE_HANDLED;
156}
157
158
159static struct trace_event trace_branch_event = {
160 .type = TRACE_BRANCH,
161 .trace = trace_branch_print,
162};
163
164static struct tracer branch_trace __read_mostly =
146{ 165{
147 .name = "branch", 166 .name = "branch",
148 .init = branch_trace_init, 167 .init = branch_trace_init,
149 .reset = branch_trace_reset, 168 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST 169#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch, 170 .selftest = trace_selftest_startup_branch,
152#endif 171#endif /* CONFIG_FTRACE_SELFTEST */
153}; 172};
154 173
155__init static int init_branch_trace(void) 174__init static int init_branch_tracer(void)
156{ 175{
176 int ret;
177
178 ret = register_ftrace_event(&trace_branch_event);
179 if (!ret) {
180 printk(KERN_WARNING "Warning: could not register "
181 "branch events\n");
182 return 1;
183 }
157 return register_tracer(&branch_trace); 184 return register_tracer(&branch_trace);
158} 185}
186device_initcall(init_branch_tracer);
159 187
160device_initcall(init_branch_trace);
161#else 188#else
162static inline 189static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) 190void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
@@ -183,66 +210,39 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
183} 210}
184EXPORT_SYMBOL(ftrace_likely_update); 211EXPORT_SYMBOL(ftrace_likely_update);
185 212
186struct ftrace_pointer { 213extern unsigned long __start_annotated_branch_profile[];
187 void *start; 214extern unsigned long __stop_annotated_branch_profile[];
188 void *stop;
189 int hit;
190};
191 215
192static void * 216static int annotated_branch_stat_headers(struct seq_file *m)
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{ 217{
195 const struct ftrace_pointer *f = m->private; 218 seq_printf(m, " correct incorrect %% ");
196 struct ftrace_branch_data *p = v; 219 seq_printf(m, " Function "
197 220 " File Line\n"
198 (*pos)++; 221 " ------- --------- - "
199 222 " -------- "
200 if (v == (void *)1) 223 " ---- ----\n");
201 return f->start; 224 return 0;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209} 225}
210 226
211static void *t_start(struct seq_file *m, loff_t *pos) 227static inline long get_incorrect_percent(struct ftrace_branch_data *p)
212{ 228{
213 void *t = (void *)1; 229 long percent;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218 230
219 return t; 231 if (p->correct) {
220} 232 percent = p->incorrect * 100;
233 percent /= p->correct + p->incorrect;
234 } else
235 percent = p->incorrect ? 100 : -1;
221 236
222static void t_stop(struct seq_file *m, void *p) 237 return percent;
223{
224} 238}
225 239
226static int t_show(struct seq_file *m, void *v) 240static int branch_stat_show(struct seq_file *m, void *v)
227{ 241{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v; 242 struct ftrace_branch_data *p = v;
230 const char *f; 243 const char *f;
231 long percent; 244 long percent;
232 245
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */ 246 /* Only print the file, not the path */
247 f = p->file + strlen(p->file); 247 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/') 248 while (f >= p->file && *f != '/')
@@ -252,11 +252,7 @@ static int t_show(struct seq_file *m, void *v)
252 /* 252 /*
253 * The miss is overlayed on correct, and hit on incorrect. 253 * The miss is overlayed on correct, and hit on incorrect.
254 */ 254 */
255 if (p->correct) { 255 percent = get_incorrect_percent(p);
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260 256
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); 257 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0) 258 if (percent < 0)
@@ -267,76 +263,118 @@ static int t_show(struct seq_file *m, void *v)
267 return 0; 263 return 0;
268} 264}
269 265
270static struct seq_operations tracing_likely_seq_ops = { 266static void *annotated_branch_stat_start(void)
271 .start = t_start, 267{
272 .next = t_next, 268 return __start_annotated_branch_profile;
273 .stop = t_stop, 269}
274 .show = t_show, 270
271static void *
272annotated_branch_stat_next(void *v, int idx)
273{
274 struct ftrace_branch_data *p = v;
275
276 ++p;
277
278 if ((void *)p >= (void *)__stop_annotated_branch_profile)
279 return NULL;
280
281 return p;
282}
283
284static int annotated_branch_stat_cmp(void *p1, void *p2)
285{
286 struct ftrace_branch_data *a = p1;
287 struct ftrace_branch_data *b = p2;
288
289 long percent_a, percent_b;
290
291 percent_a = get_incorrect_percent(a);
292 percent_b = get_incorrect_percent(b);
293
294 if (percent_a < percent_b)
295 return -1;
296 if (percent_a > percent_b)
297 return 1;
298 else
299 return 0;
300}
301
302static struct tracer_stat annotated_branch_stats = {
303 .name = "branch_annotated",
304 .stat_start = annotated_branch_stat_start,
305 .stat_next = annotated_branch_stat_next,
306 .stat_cmp = annotated_branch_stat_cmp,
307 .stat_headers = annotated_branch_stat_headers,
308 .stat_show = branch_stat_show
275}; 309};
276 310
277static int tracing_branch_open(struct inode *inode, struct file *file) 311__init static int init_annotated_branch_stats(void)
278{ 312{
279 int ret; 313 int ret;
280 314
281 ret = seq_open(file, &tracing_likely_seq_ops); 315 ret = register_stat_tracer(&annotated_branch_stats);
282 if (!ret) { 316 if (!ret) {
283 struct seq_file *m = file->private_data; 317 printk(KERN_WARNING "Warning: could not register "
284 m->private = (void *)inode->i_private; 318 "annotated branches stats\n");
319 return 1;
285 } 320 }
286 321 return 0;
287 return ret;
288} 322}
289 323fs_initcall(init_annotated_branch_stats);
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295 324
296#ifdef CONFIG_PROFILE_ALL_BRANCHES 325#ifdef CONFIG_PROFILE_ALL_BRANCHES
326
297extern unsigned long __start_branch_profile[]; 327extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[]; 328extern unsigned long __stop_branch_profile[];
299 329
300static const struct ftrace_pointer ftrace_branch_pos = { 330static int all_branch_stat_headers(struct seq_file *m)
301 .start = __start_branch_profile, 331{
302 .stop = __stop_branch_profile, 332 seq_printf(m, " miss hit %% ");
303 .hit = 1, 333 seq_printf(m, " Function "
304}; 334 " File Line\n"
335 " ------- --------- - "
336 " -------- "
337 " ---- ----\n");
338 return 0;
339}
305 340
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */ 341static void *all_branch_stat_start(void)
342{
343 return __start_branch_profile;
344}
307 345
308extern unsigned long __start_annotated_branch_profile[]; 346static void *
309extern unsigned long __stop_annotated_branch_profile[]; 347all_branch_stat_next(void *v, int idx)
348{
349 struct ftrace_branch_data *p = v;
310 350
311static const struct ftrace_pointer ftrace_annotated_branch_pos = { 351 ++p;
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315 352
316static __init int ftrace_branch_init(void) 353 if ((void *)p >= (void *)__stop_branch_profile)
317{ 354 return NULL;
318 struct dentry *d_tracer;
319 struct dentry *entry;
320 355
321 d_tracer = tracing_init_dentry(); 356 return p;
357}
322 358
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer, 359static struct tracer_stat all_branch_stats = {
324 (void *)&ftrace_annotated_branch_pos, 360 .name = "branch_all",
325 &tracing_branch_fops); 361 .stat_start = all_branch_stat_start,
326 if (!entry) 362 .stat_next = all_branch_stat_next,
327 pr_warning("Could not create debugfs " 363 .stat_headers = all_branch_stat_headers,
328 "'profile_annotatet_branch' entry\n"); 364 .stat_show = branch_stat_show
365};
329 366
330#ifdef CONFIG_PROFILE_ALL_BRANCHES 367__init static int all_annotated_branch_stats(void)
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer, 368{
332 (void *)&ftrace_branch_pos, 369 int ret;
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338 370
371 ret = register_stat_tracer(&all_branch_stats);
372 if (!ret) {
373 printk(KERN_WARNING "Warning: could not register "
374 "all branches stats\n");
375 return 1;
376 }
339 return 0; 377 return 0;
340} 378}
341 379fs_initcall(all_annotated_branch_stats);
342device_initcall(ftrace_branch_init); 380#endif /* CONFIG_PROFILE_ALL_BRANCHES */
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
new file mode 100644
index 000000000000..05b176abfd30
--- /dev/null
+++ b/kernel/trace/trace_clock.c
@@ -0,0 +1,108 @@
1/*
2 * tracing clocks
3 *
4 * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
5 *
6 * Implements 3 trace clock variants, with differing scalability/precision
7 * tradeoffs:
8 *
9 * - local: CPU-local trace clock
10 * - medium: scalable global clock with some jitter
11 * - global: globally monotonic, serialized clock
12 *
13 * Tracer plugins will chose a default from these clocks.
14 */
15#include <linux/spinlock.h>
16#include <linux/hardirq.h>
17#include <linux/module.h>
18#include <linux/percpu.h>
19#include <linux/sched.h>
20#include <linux/ktime.h>
21
22/*
23 * trace_clock_local(): the simplest and least coherent tracing clock.
24 *
25 * Useful for tracing that does not cross to other CPUs nor
26 * does it go through idle events.
27 */
28u64 notrace trace_clock_local(void)
29{
30 unsigned long flags;
31 u64 clock;
32
33 /*
34 * sched_clock() is an architecture implemented, fast, scalable,
35 * lockless clock. It is not guaranteed to be coherent across
36 * CPUs, nor across CPU idle events.
37 */
38 raw_local_irq_save(flags);
39 clock = sched_clock();
40 raw_local_irq_restore(flags);
41
42 return clock;
43}
44
45/*
46 * trace_clock(): 'inbetween' trace clock. Not completely serialized,
47 * but not completely incorrect when crossing CPUs either.
48 *
49 * This is based on cpu_clock(), which will allow at most ~1 jiffy of
50 * jitter between CPUs. So it's a pretty scalable clock, but there
51 * can be offsets in the trace data.
52 */
53u64 notrace trace_clock(void)
54{
55 return cpu_clock(raw_smp_processor_id());
56}
57
58
59/*
60 * trace_clock_global(): special globally coherent trace clock
61 *
62 * It has higher overhead than the other trace clocks but is still
63 * an order of magnitude faster than GTOD derived hardware clocks.
64 *
65 * Used by plugins that need globally coherent timestamps.
66 */
67
68static u64 prev_trace_clock_time;
69
70static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
71 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
72
73u64 notrace trace_clock_global(void)
74{
75 unsigned long flags;
76 int this_cpu;
77 u64 now;
78
79 raw_local_irq_save(flags);
80
81 this_cpu = raw_smp_processor_id();
82 now = cpu_clock(this_cpu);
83 /*
84 * If in an NMI context then dont risk lockups and return the
85 * cpu_clock() time:
86 */
87 if (unlikely(in_nmi()))
88 goto out;
89
90 __raw_spin_lock(&trace_clock_lock);
91
92 /*
93 * TODO: if this happens often then maybe we should reset
94 * my_scd->clock to prev_trace_clock_time+1, to make sure
95 * we start ticking with the local clock from now on?
96 */
97 if ((s64)(now - prev_trace_clock_time) < 0)
98 now = prev_trace_clock_time + 1;
99
100 prev_trace_clock_time = now;
101
102 __raw_spin_unlock(&trace_clock_lock);
103
104 out:
105 raw_local_irq_restore(flags);
106
107 return now;
108}
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
new file mode 100644
index 000000000000..019915063fe6
--- /dev/null
+++ b/kernel/trace/trace_event_types.h
@@ -0,0 +1,175 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ftrace
3
4/*
5 * We cheat and use the proto type field as the ID
6 * and args as the entry type (minus 'struct')
7 */
8TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore,
9 TRACE_STRUCT(
10 TRACE_FIELD(unsigned long, ip, ip)
11 TRACE_FIELD(unsigned long, parent_ip, parent_ip)
12 ),
13 TP_RAW_FMT(" %lx <-- %lx")
14);
15
16TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT,
17 ftrace_graph_ent_entry, ignore,
18 TRACE_STRUCT(
19 TRACE_FIELD(unsigned long, graph_ent.func, func)
20 TRACE_FIELD(int, graph_ent.depth, depth)
21 ),
22 TP_RAW_FMT("--> %lx (%d)")
23);
24
25TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(int, ret.depth, depth)
30 ),
31 TP_RAW_FMT("<-- %lx (%d)")
32);
33
34TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore,
35 TRACE_STRUCT(
36 TRACE_FIELD(unsigned int, prev_pid, prev_pid)
37 TRACE_FIELD(unsigned char, prev_prio, prev_prio)
38 TRACE_FIELD(unsigned char, prev_state, prev_state)
39 TRACE_FIELD(unsigned int, next_pid, next_pid)
40 TRACE_FIELD(unsigned char, next_prio, next_prio)
41 TRACE_FIELD(unsigned char, next_state, next_state)
42 TRACE_FIELD(unsigned int, next_cpu, next_cpu)
43 ),
44 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
45);
46
47TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
48 TRACE_STRUCT(
49 TRACE_FIELD(unsigned int, prev_pid, prev_pid)
50 TRACE_FIELD(unsigned char, prev_prio, prev_prio)
51 TRACE_FIELD(unsigned char, prev_state, prev_state)
52 TRACE_FIELD(unsigned int, next_pid, next_pid)
53 TRACE_FIELD(unsigned char, next_prio, next_prio)
54 TRACE_FIELD(unsigned char, next_state, next_state)
55 TRACE_FIELD(unsigned int, next_cpu, next_cpu)
56 ),
57 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
58);
59
60TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore,
61 TRACE_STRUCT(
62 TRACE_FIELD(unsigned long, arg1, arg1)
63 TRACE_FIELD(unsigned long, arg2, arg2)
64 TRACE_FIELD(unsigned long, arg3, arg3)
65 ),
66 TP_RAW_FMT("(%08lx) (%08lx) (%08lx)")
67);
68
69/*
70 * Stack-trace entry:
71 */
72
73/* #define FTRACE_STACK_ENTRIES 8 */
74
75TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore,
76 TRACE_STRUCT(
77 TRACE_FIELD(unsigned long, caller[0], stack0)
78 TRACE_FIELD(unsigned long, caller[1], stack1)
79 TRACE_FIELD(unsigned long, caller[2], stack2)
80 TRACE_FIELD(unsigned long, caller[3], stack3)
81 TRACE_FIELD(unsigned long, caller[4], stack4)
82 TRACE_FIELD(unsigned long, caller[5], stack5)
83 TRACE_FIELD(unsigned long, caller[6], stack6)
84 TRACE_FIELD(unsigned long, caller[7], stack7)
85 ),
86 TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
87 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
88);
89
90TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore,
91 TRACE_STRUCT(
92 TRACE_FIELD(unsigned long, caller[0], stack0)
93 TRACE_FIELD(unsigned long, caller[1], stack1)
94 TRACE_FIELD(unsigned long, caller[2], stack2)
95 TRACE_FIELD(unsigned long, caller[3], stack3)
96 TRACE_FIELD(unsigned long, caller[4], stack4)
97 TRACE_FIELD(unsigned long, caller[5], stack5)
98 TRACE_FIELD(unsigned long, caller[6], stack6)
99 TRACE_FIELD(unsigned long, caller[7], stack7)
100 ),
101 TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
102 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
103);
104
105TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
106 TRACE_STRUCT(
107 TRACE_FIELD(unsigned long, ip, ip)
108 TRACE_FIELD(unsigned int, depth, depth)
109 TRACE_FIELD(char *, fmt, fmt)
110 TRACE_FIELD_ZERO_CHAR(buf)
111 ),
112 TP_RAW_FMT("%08lx (%d) fmt:%p %s")
113);
114
115TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
116 TRACE_STRUCT(
117 TRACE_FIELD(unsigned long, ip, ip)
118 TRACE_FIELD(unsigned int, depth, depth)
119 TRACE_FIELD_ZERO_CHAR(buf)
120 ),
121 TP_RAW_FMT("%08lx (%d) fmt:%p %s")
122);
123
124TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
125 TRACE_STRUCT(
126 TRACE_FIELD(unsigned int, line, line)
127 TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
128 TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
129 TRACE_FIELD(char, correct, correct)
130 ),
131 TP_RAW_FMT("%u:%s:%s (%u)")
132);
133
134TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
135 TRACE_STRUCT(
136 TRACE_FIELD(u64, from, from)
137 TRACE_FIELD(u64, to, to)
138 ),
139 TP_RAW_FMT("from: %llx to: %llx")
140);
141
142TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
143 TRACE_STRUCT(
144 TRACE_FIELD(ktime_t, state_data.stamp, stamp)
145 TRACE_FIELD(ktime_t, state_data.end, end)
146 TRACE_FIELD(int, state_data.type, type)
147 TRACE_FIELD(int, state_data.state, state)
148 ),
149 TP_RAW_FMT("%llx->%llx type:%u state:%u")
150);
151
152TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore,
153 TRACE_STRUCT(
154 TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
155 TRACE_FIELD(unsigned long, call_site, call_site)
156 TRACE_FIELD(const void *, ptr, ptr)
157 TRACE_FIELD(size_t, bytes_req, bytes_req)
158 TRACE_FIELD(size_t, bytes_alloc, bytes_alloc)
159 TRACE_FIELD(gfp_t, gfp_flags, gfp_flags)
160 TRACE_FIELD(int, node, node)
161 ),
162 TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu"
163 " flags:%x node:%d")
164);
165
166TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore,
167 TRACE_STRUCT(
168 TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
169 TRACE_FIELD(unsigned long, call_site, call_site)
170 TRACE_FIELD(const void *, ptr, ptr)
171 ),
172 TP_RAW_FMT("type:%u call_site:%lx ptr:%p")
173);
174
175#undef TRACE_SYSTEM
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
new file mode 100644
index 000000000000..c88227b3b9db
--- /dev/null
+++ b/kernel/trace/trace_events.c
@@ -0,0 +1,604 @@
1/*
2 * event tracer
3 *
4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5 *
6 * - Added format output of fields of the trace point.
7 * This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8 *
9 */
10
11#include <linux/debugfs.h>
12#include <linux/uaccess.h>
13#include <linux/module.h>
14#include <linux/ctype.h>
15
16#include "trace_output.h"
17
18#define TRACE_SYSTEM "TRACE_SYSTEM"
19
20static DEFINE_MUTEX(event_mutex);
21
22#define events_for_each(event) \
23 for (event = __start_ftrace_events; \
24 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
25 event++)
26
27static void ftrace_clear_events(void)
28{
29 struct ftrace_event_call *call = (void *)__start_ftrace_events;
30
31
32 while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
33
34 if (call->enabled) {
35 call->enabled = 0;
36 call->unregfunc();
37 }
38 call++;
39 }
40}
41
42static void ftrace_event_enable_disable(struct ftrace_event_call *call,
43 int enable)
44{
45
46 switch (enable) {
47 case 0:
48 if (call->enabled) {
49 call->enabled = 0;
50 call->unregfunc();
51 }
52 break;
53 case 1:
54 if (!call->enabled) {
55 call->enabled = 1;
56 call->regfunc();
57 }
58 break;
59 }
60}
61
62static int ftrace_set_clr_event(char *buf, int set)
63{
64 struct ftrace_event_call *call = __start_ftrace_events;
65 char *event = NULL, *sub = NULL, *match;
66 int ret = -EINVAL;
67
68 /*
69 * The buf format can be <subsystem>:<event-name>
70 * *:<event-name> means any event by that name.
71 * :<event-name> is the same.
72 *
73 * <subsystem>:* means all events in that subsystem
74 * <subsystem>: means the same.
75 *
76 * <name> (no ':') means all events in a subsystem with
77 * the name <name> or any event that matches <name>
78 */
79
80 match = strsep(&buf, ":");
81 if (buf) {
82 sub = match;
83 event = buf;
84 match = NULL;
85
86 if (!strlen(sub) || strcmp(sub, "*") == 0)
87 sub = NULL;
88 if (!strlen(event) || strcmp(event, "*") == 0)
89 event = NULL;
90 }
91
92 mutex_lock(&event_mutex);
93 events_for_each(call) {
94
95 if (!call->name || !call->regfunc)
96 continue;
97
98 if (match &&
99 strcmp(match, call->name) != 0 &&
100 strcmp(match, call->system) != 0)
101 continue;
102
103 if (sub && strcmp(sub, call->system) != 0)
104 continue;
105
106 if (event && strcmp(event, call->name) != 0)
107 continue;
108
109 ftrace_event_enable_disable(call, set);
110
111 ret = 0;
112 }
113 mutex_unlock(&event_mutex);
114
115 return ret;
116}
117
118/* 128 should be much more than enough */
119#define EVENT_BUF_SIZE 127
120
121static ssize_t
122ftrace_event_write(struct file *file, const char __user *ubuf,
123 size_t cnt, loff_t *ppos)
124{
125 size_t read = 0;
126 int i, set = 1;
127 ssize_t ret;
128 char *buf;
129 char ch;
130
131 if (!cnt || cnt < 0)
132 return 0;
133
134 ret = tracing_update_buffers();
135 if (ret < 0)
136 return ret;
137
138 ret = get_user(ch, ubuf++);
139 if (ret)
140 return ret;
141 read++;
142 cnt--;
143
144 /* skip white space */
145 while (cnt && isspace(ch)) {
146 ret = get_user(ch, ubuf++);
147 if (ret)
148 return ret;
149 read++;
150 cnt--;
151 }
152
153 /* Only white space found? */
154 if (isspace(ch)) {
155 file->f_pos += read;
156 ret = read;
157 return ret;
158 }
159
160 buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
161 if (!buf)
162 return -ENOMEM;
163
164 if (cnt > EVENT_BUF_SIZE)
165 cnt = EVENT_BUF_SIZE;
166
167 i = 0;
168 while (cnt && !isspace(ch)) {
169 if (!i && ch == '!')
170 set = 0;
171 else
172 buf[i++] = ch;
173
174 ret = get_user(ch, ubuf++);
175 if (ret)
176 goto out_free;
177 read++;
178 cnt--;
179 }
180 buf[i] = 0;
181
182 file->f_pos += read;
183
184 ret = ftrace_set_clr_event(buf, set);
185 if (ret)
186 goto out_free;
187
188 ret = read;
189
190 out_free:
191 kfree(buf);
192
193 return ret;
194}
195
196static void *
197t_next(struct seq_file *m, void *v, loff_t *pos)
198{
199 struct ftrace_event_call *call = m->private;
200 struct ftrace_event_call *next = call;
201
202 (*pos)++;
203
204 for (;;) {
205 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
206 return NULL;
207
208 /*
209 * The ftrace subsystem is for showing formats only.
210 * They can not be enabled or disabled via the event files.
211 */
212 if (call->regfunc)
213 break;
214
215 call++;
216 next = call;
217 }
218
219 m->private = ++next;
220
221 return call;
222}
223
224static void *t_start(struct seq_file *m, loff_t *pos)
225{
226 return t_next(m, NULL, pos);
227}
228
229static void *
230s_next(struct seq_file *m, void *v, loff_t *pos)
231{
232 struct ftrace_event_call *call = m->private;
233 struct ftrace_event_call *next;
234
235 (*pos)++;
236
237 retry:
238 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
239 return NULL;
240
241 if (!call->enabled) {
242 call++;
243 goto retry;
244 }
245
246 next = call;
247 m->private = ++next;
248
249 return call;
250}
251
252static void *s_start(struct seq_file *m, loff_t *pos)
253{
254 return s_next(m, NULL, pos);
255}
256
257static int t_show(struct seq_file *m, void *v)
258{
259 struct ftrace_event_call *call = v;
260
261 if (strcmp(call->system, TRACE_SYSTEM) != 0)
262 seq_printf(m, "%s:", call->system);
263 seq_printf(m, "%s\n", call->name);
264
265 return 0;
266}
267
268static void t_stop(struct seq_file *m, void *p)
269{
270}
271
272static int
273ftrace_event_seq_open(struct inode *inode, struct file *file)
274{
275 int ret;
276 const struct seq_operations *seq_ops;
277
278 if ((file->f_mode & FMODE_WRITE) &&
279 !(file->f_flags & O_APPEND))
280 ftrace_clear_events();
281
282 seq_ops = inode->i_private;
283 ret = seq_open(file, seq_ops);
284 if (!ret) {
285 struct seq_file *m = file->private_data;
286
287 m->private = __start_ftrace_events;
288 }
289 return ret;
290}
291
292static ssize_t
293event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
294 loff_t *ppos)
295{
296 struct ftrace_event_call *call = filp->private_data;
297 char *buf;
298
299 if (call->enabled)
300 buf = "1\n";
301 else
302 buf = "0\n";
303
304 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
305}
306
307static ssize_t
308event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
309 loff_t *ppos)
310{
311 struct ftrace_event_call *call = filp->private_data;
312 char buf[64];
313 unsigned long val;
314 int ret;
315
316 if (cnt >= sizeof(buf))
317 return -EINVAL;
318
319 if (copy_from_user(&buf, ubuf, cnt))
320 return -EFAULT;
321
322 buf[cnt] = 0;
323
324 ret = strict_strtoul(buf, 10, &val);
325 if (ret < 0)
326 return ret;
327
328 ret = tracing_update_buffers();
329 if (ret < 0)
330 return ret;
331
332 switch (val) {
333 case 0:
334 case 1:
335 mutex_lock(&event_mutex);
336 ftrace_event_enable_disable(call, val);
337 mutex_unlock(&event_mutex);
338 break;
339
340 default:
341 return -EINVAL;
342 }
343
344 *ppos += cnt;
345
346 return cnt;
347}
348
349#undef FIELD
350#define FIELD(type, name) \
351 #type, #name, offsetof(typeof(field), name), sizeof(field.name)
352
353static int trace_write_header(struct trace_seq *s)
354{
355 struct trace_entry field;
356
357 /* struct trace_entry */
358 return trace_seq_printf(s,
359 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
360 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
361 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
362 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
363 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
364 "\n",
365 FIELD(unsigned char, type),
366 FIELD(unsigned char, flags),
367 FIELD(unsigned char, preempt_count),
368 FIELD(int, pid),
369 FIELD(int, tgid));
370}
371
372static ssize_t
373event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
374 loff_t *ppos)
375{
376 struct ftrace_event_call *call = filp->private_data;
377 struct trace_seq *s;
378 char *buf;
379 int r;
380
381 if (*ppos)
382 return 0;
383
384 s = kmalloc(sizeof(*s), GFP_KERNEL);
385 if (!s)
386 return -ENOMEM;
387
388 trace_seq_init(s);
389
390 /* If any of the first writes fail, so will the show_format. */
391
392 trace_seq_printf(s, "name: %s\n", call->name);
393 trace_seq_printf(s, "ID: %d\n", call->id);
394 trace_seq_printf(s, "format:\n");
395 trace_write_header(s);
396
397 r = call->show_format(s);
398 if (!r) {
399 /*
400 * ug! The format output is bigger than a PAGE!!
401 */
402 buf = "FORMAT TOO BIG\n";
403 r = simple_read_from_buffer(ubuf, cnt, ppos,
404 buf, strlen(buf));
405 goto out;
406 }
407
408 r = simple_read_from_buffer(ubuf, cnt, ppos,
409 s->buffer, s->len);
410 out:
411 kfree(s);
412 return r;
413}
414
415static const struct seq_operations show_event_seq_ops = {
416 .start = t_start,
417 .next = t_next,
418 .show = t_show,
419 .stop = t_stop,
420};
421
422static const struct seq_operations show_set_event_seq_ops = {
423 .start = s_start,
424 .next = s_next,
425 .show = t_show,
426 .stop = t_stop,
427};
428
429static const struct file_operations ftrace_avail_fops = {
430 .open = ftrace_event_seq_open,
431 .read = seq_read,
432 .llseek = seq_lseek,
433 .release = seq_release,
434};
435
436static const struct file_operations ftrace_set_event_fops = {
437 .open = ftrace_event_seq_open,
438 .read = seq_read,
439 .write = ftrace_event_write,
440 .llseek = seq_lseek,
441 .release = seq_release,
442};
443
444static const struct file_operations ftrace_enable_fops = {
445 .open = tracing_open_generic,
446 .read = event_enable_read,
447 .write = event_enable_write,
448};
449
450static const struct file_operations ftrace_event_format_fops = {
451 .open = tracing_open_generic,
452 .read = event_format_read,
453};
454
455static struct dentry *event_trace_events_dir(void)
456{
457 static struct dentry *d_tracer;
458 static struct dentry *d_events;
459
460 if (d_events)
461 return d_events;
462
463 d_tracer = tracing_init_dentry();
464 if (!d_tracer)
465 return NULL;
466
467 d_events = debugfs_create_dir("events", d_tracer);
468 if (!d_events)
469 pr_warning("Could not create debugfs "
470 "'events' directory\n");
471
472 return d_events;
473}
474
475struct event_subsystem {
476 struct list_head list;
477 const char *name;
478 struct dentry *entry;
479};
480
481static LIST_HEAD(event_subsystems);
482
483static struct dentry *
484event_subsystem_dir(const char *name, struct dentry *d_events)
485{
486 struct event_subsystem *system;
487
488 /* First see if we did not already create this dir */
489 list_for_each_entry(system, &event_subsystems, list) {
490 if (strcmp(system->name, name) == 0)
491 return system->entry;
492 }
493
494 /* need to create new entry */
495 system = kmalloc(sizeof(*system), GFP_KERNEL);
496 if (!system) {
497 pr_warning("No memory to create event subsystem %s\n",
498 name);
499 return d_events;
500 }
501
502 system->entry = debugfs_create_dir(name, d_events);
503 if (!system->entry) {
504 pr_warning("Could not create event subsystem %s\n",
505 name);
506 kfree(system);
507 return d_events;
508 }
509
510 system->name = name;
511 list_add(&system->list, &event_subsystems);
512
513 return system->entry;
514}
515
516static int
517event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
518{
519 struct dentry *entry;
520 int ret;
521
522 /*
523 * If the trace point header did not define TRACE_SYSTEM
524 * then the system would be called "TRACE_SYSTEM".
525 */
526 if (strcmp(call->system, "TRACE_SYSTEM") != 0)
527 d_events = event_subsystem_dir(call->system, d_events);
528
529 if (call->raw_init) {
530 ret = call->raw_init();
531 if (ret < 0) {
532 pr_warning("Could not initialize trace point"
533 " events/%s\n", call->name);
534 return ret;
535 }
536 }
537
538 call->dir = debugfs_create_dir(call->name, d_events);
539 if (!call->dir) {
540 pr_warning("Could not create debugfs "
541 "'%s' directory\n", call->name);
542 return -1;
543 }
544
545 if (call->regfunc) {
546 entry = debugfs_create_file("enable", 0644, call->dir, call,
547 &ftrace_enable_fops);
548 if (!entry)
549 pr_warning("Could not create debugfs "
550 "'%s/enable' entry\n", call->name);
551 }
552
553 /* A trace may not want to export its format */
554 if (!call->show_format)
555 return 0;
556
557 entry = debugfs_create_file("format", 0444, call->dir, call,
558 &ftrace_event_format_fops);
559 if (!entry)
560 pr_warning("Could not create debugfs "
561 "'%s/format' entry\n", call->name);
562
563 return 0;
564}
565
566static __init int event_trace_init(void)
567{
568 struct ftrace_event_call *call = __start_ftrace_events;
569 struct dentry *d_tracer;
570 struct dentry *entry;
571 struct dentry *d_events;
572
573 d_tracer = tracing_init_dentry();
574 if (!d_tracer)
575 return 0;
576
577 entry = debugfs_create_file("available_events", 0444, d_tracer,
578 (void *)&show_event_seq_ops,
579 &ftrace_avail_fops);
580 if (!entry)
581 pr_warning("Could not create debugfs "
582 "'available_events' entry\n");
583
584 entry = debugfs_create_file("set_event", 0644, d_tracer,
585 (void *)&show_set_event_seq_ops,
586 &ftrace_set_event_fops);
587 if (!entry)
588 pr_warning("Could not create debugfs "
589 "'set_event' entry\n");
590
591 d_events = event_trace_events_dir();
592 if (!d_events)
593 return 0;
594
595 events_for_each(call) {
596 /* The linker may leave blanks */
597 if (!call->name)
598 continue;
599 event_create_dir(call, d_events);
600 }
601
602 return 0;
603}
604fs_initcall(event_trace_init);
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
new file mode 100644
index 000000000000..38985f9b379c
--- /dev/null
+++ b/kernel/trace/trace_events_stage_1.h
@@ -0,0 +1,39 @@
1/*
2 * Stage 1 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * struct ftrace_raw_<call> {
7 * struct trace_entry ent;
8 * <type> <item>;
9 * <type2> <item2>[<len>];
10 * [...]
11 * };
12 *
13 * The <type> <item> is created by the __field(type, item) macro or
14 * the __array(type2, item2, len) macro.
15 * We simply do "type item;", and that will create the fields
16 * in the structure.
17 */
18
19#undef TRACE_FORMAT
20#define TRACE_FORMAT(call, proto, args, fmt)
21
22#undef __array
23#define __array(type, item, len) type item[len];
24
25#undef __field
26#define __field(type, item) type item;
27
28#undef TP_STRUCT__entry
29#define TP_STRUCT__entry(args...) args
30
31#undef TRACE_EVENT
32#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
33 struct ftrace_raw_##name { \
34 struct trace_entry ent; \
35 tstruct \
36 }; \
37 static struct ftrace_event_call event_##name
38
39#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
new file mode 100644
index 000000000000..5117c43f5c67
--- /dev/null
+++ b/kernel/trace/trace_events_stage_2.h
@@ -0,0 +1,131 @@
1/*
2 * Stage 2 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * enum print_line_t
7 * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
8 * {
9 * struct trace_seq *s = &iter->seq;
10 * struct ftrace_raw_<call> *field; <-- defined in stage 1
11 * struct trace_entry *entry;
12 * int ret;
13 *
14 * entry = iter->ent;
15 *
16 * if (entry->type != event_<call>.id) {
17 * WARN_ON_ONCE(1);
18 * return TRACE_TYPE_UNHANDLED;
19 * }
20 *
21 * field = (typeof(field))entry;
22 *
23 * ret = trace_seq_printf(s, <TP_printk> "\n");
24 * if (!ret)
25 * return TRACE_TYPE_PARTIAL_LINE;
26 *
27 * return TRACE_TYPE_HANDLED;
28 * }
29 *
30 * This is the method used to print the raw event to the trace
31 * output format. Note, this is not needed if the data is read
32 * in binary.
33 */
34
35#undef __entry
36#define __entry field
37
38#undef TP_printk
39#define TP_printk(fmt, args...) fmt "\n", args
40
41#undef TRACE_EVENT
42#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
43enum print_line_t \
44ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
45{ \
46 struct trace_seq *s = &iter->seq; \
47 struct ftrace_raw_##call *field; \
48 struct trace_entry *entry; \
49 int ret; \
50 \
51 entry = iter->ent; \
52 \
53 if (entry->type != event_##call.id) { \
54 WARN_ON_ONCE(1); \
55 return TRACE_TYPE_UNHANDLED; \
56 } \
57 \
58 field = (typeof(field))entry; \
59 \
60 ret = trace_seq_printf(s, #call ": " print); \
61 if (!ret) \
62 return TRACE_TYPE_PARTIAL_LINE; \
63 \
64 return TRACE_TYPE_HANDLED; \
65}
66
67#include <trace/trace_event_types.h>
68
69/*
70 * Setup the showing format of trace point.
71 *
72 * int
73 * ftrace_format_##call(struct trace_seq *s)
74 * {
75 * struct ftrace_raw_##call field;
76 * int ret;
77 *
78 * ret = trace_seq_printf(s, #type " " #item ";"
79 * " offset:%u; size:%u;\n",
80 * offsetof(struct ftrace_raw_##call, item),
81 * sizeof(field.type));
82 *
83 * }
84 */
85
86#undef TP_STRUCT__entry
87#define TP_STRUCT__entry(args...) args
88
89#undef __field
90#define __field(type, item) \
91 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
92 "offset:%u;\tsize:%u;\n", \
93 (unsigned int)offsetof(typeof(field), item), \
94 (unsigned int)sizeof(field.item)); \
95 if (!ret) \
96 return 0;
97
98#undef __array
99#define __array(type, item, len) \
100 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
101 "offset:%u;\tsize:%u;\n", \
102 (unsigned int)offsetof(typeof(field), item), \
103 (unsigned int)sizeof(field.item)); \
104 if (!ret) \
105 return 0;
106
107#undef __entry
108#define __entry "REC"
109
110#undef TP_printk
111#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args
112
113#undef TP_fast_assign
114#define TP_fast_assign(args...) args
115
116#undef TRACE_EVENT
117#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
118static int \
119ftrace_format_##call(struct trace_seq *s) \
120{ \
121 struct ftrace_raw_##call field; \
122 int ret; \
123 \
124 tstruct; \
125 \
126 trace_seq_printf(s, "\nprint fmt: " print); \
127 \
128 return ret; \
129}
130
131#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
new file mode 100644
index 000000000000..ae2e323df0c7
--- /dev/null
+++ b/kernel/trace/trace_events_stage_3.h
@@ -0,0 +1,217 @@
1/*
2 * Stage 3 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * static void ftrace_event_<call>(proto)
7 * {
8 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
9 * }
10 *
11 * static int ftrace_reg_event_<call>(void)
12 * {
13 * int ret;
14 *
15 * ret = register_trace_<call>(ftrace_event_<call>);
16 * if (!ret)
17 * pr_info("event trace: Could not activate trace point "
18 * "probe to <call>");
19 * return ret;
20 * }
21 *
22 * static void ftrace_unreg_event_<call>(void)
23 * {
24 * unregister_trace_<call>(ftrace_event_<call>);
25 * }
26 *
27 * For those macros defined with TRACE_FORMAT:
28 *
29 * static struct ftrace_event_call __used
30 * __attribute__((__aligned__(4)))
31 * __attribute__((section("_ftrace_events"))) event_<call> = {
32 * .name = "<call>",
33 * .regfunc = ftrace_reg_event_<call>,
34 * .unregfunc = ftrace_unreg_event_<call>,
35 * }
36 *
37 *
38 * For those macros defined with TRACE_EVENT:
39 *
40 * static struct ftrace_event_call event_<call>;
41 *
42 * static void ftrace_raw_event_<call>(proto)
43 * {
44 * struct ring_buffer_event *event;
45 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
46 * unsigned long irq_flags;
47 * int pc;
48 *
49 * local_save_flags(irq_flags);
50 * pc = preempt_count();
51 *
52 * event = trace_current_buffer_lock_reserve(event_<call>.id,
53 * sizeof(struct ftrace_raw_<call>),
54 * irq_flags, pc);
55 * if (!event)
56 * return;
57 * entry = ring_buffer_event_data(event);
58 *
59 * <assign>; <-- Here we assign the entries by the __field and
60 * __array macros.
61 *
62 * trace_current_buffer_unlock_commit(event, irq_flags, pc);
63 * }
64 *
65 * static int ftrace_raw_reg_event_<call>(void)
66 * {
67 * int ret;
68 *
69 * ret = register_trace_<call>(ftrace_raw_event_<call>);
70 * if (!ret)
71 * pr_info("event trace: Could not activate trace point "
72 * "probe to <call>");
73 * return ret;
74 * }
75 *
76 * static void ftrace_unreg_event_<call>(void)
77 * {
78 * unregister_trace_<call>(ftrace_raw_event_<call>);
79 * }
80 *
81 * static struct trace_event ftrace_event_type_<call> = {
82 * .trace = ftrace_raw_output_<call>, <-- stage 2
83 * };
84 *
85 * static int ftrace_raw_init_event_<call>(void)
86 * {
87 * int id;
88 *
89 * id = register_ftrace_event(&ftrace_event_type_<call>);
90 * if (!id)
91 * return -ENODEV;
92 * event_<call>.id = id;
93 * return 0;
94 * }
95 *
96 * static struct ftrace_event_call __used
97 * __attribute__((__aligned__(4)))
98 * __attribute__((section("_ftrace_events"))) event_<call> = {
99 * .name = "<call>",
100 * .system = "<system>",
101 * .raw_init = ftrace_raw_init_event_<call>,
102 * .regfunc = ftrace_reg_event_<call>,
103 * .unregfunc = ftrace_unreg_event_<call>,
104 * .show_format = ftrace_format_<call>,
105 * }
106 *
107 */
108
109#undef TP_FMT
110#define TP_FMT(fmt, args...) fmt "\n", ##args
111
112#define _TRACE_FORMAT(call, proto, args, fmt) \
113static void ftrace_event_##call(proto) \
114{ \
115 event_trace_printk(_RET_IP_, #call ": " fmt); \
116} \
117 \
118static int ftrace_reg_event_##call(void) \
119{ \
120 int ret; \
121 \
122 ret = register_trace_##call(ftrace_event_##call); \
123 if (ret) \
124 pr_info("event trace: Could not activate trace point " \
125 "probe to " #call "\n"); \
126 return ret; \
127} \
128 \
129static void ftrace_unreg_event_##call(void) \
130{ \
131 unregister_trace_##call(ftrace_event_##call); \
132} \
133
134
135#undef TRACE_FORMAT
136#define TRACE_FORMAT(call, proto, args, fmt) \
137_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
138static struct ftrace_event_call __used \
139__attribute__((__aligned__(4))) \
140__attribute__((section("_ftrace_events"))) event_##call = { \
141 .name = #call, \
142 .system = __stringify(TRACE_SYSTEM), \
143 .regfunc = ftrace_reg_event_##call, \
144 .unregfunc = ftrace_unreg_event_##call, \
145}
146
147#undef __entry
148#define __entry entry
149
150#undef TRACE_EVENT
151#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
152 \
153static struct ftrace_event_call event_##call; \
154 \
155static void ftrace_raw_event_##call(proto) \
156{ \
157 struct ring_buffer_event *event; \
158 struct ftrace_raw_##call *entry; \
159 unsigned long irq_flags; \
160 int pc; \
161 \
162 local_save_flags(irq_flags); \
163 pc = preempt_count(); \
164 \
165 event = trace_current_buffer_lock_reserve(event_##call.id, \
166 sizeof(struct ftrace_raw_##call), \
167 irq_flags, pc); \
168 if (!event) \
169 return; \
170 entry = ring_buffer_event_data(event); \
171 \
172 assign; \
173 \
174 trace_current_buffer_unlock_commit(event, irq_flags, pc); \
175} \
176 \
177static int ftrace_raw_reg_event_##call(void) \
178{ \
179 int ret; \
180 \
181 ret = register_trace_##call(ftrace_raw_event_##call); \
182 if (ret) \
183 pr_info("event trace: Could not activate trace point " \
184 "probe to " #call "\n"); \
185 return ret; \
186} \
187 \
188static void ftrace_raw_unreg_event_##call(void) \
189{ \
190 unregister_trace_##call(ftrace_raw_event_##call); \
191} \
192 \
193static struct trace_event ftrace_event_type_##call = { \
194 .trace = ftrace_raw_output_##call, \
195}; \
196 \
197static int ftrace_raw_init_event_##call(void) \
198{ \
199 int id; \
200 \
201 id = register_ftrace_event(&ftrace_event_type_##call); \
202 if (!id) \
203 return -ENODEV; \
204 event_##call.id = id; \
205 return 0; \
206} \
207 \
208static struct ftrace_event_call __used \
209__attribute__((__aligned__(4))) \
210__attribute__((section("_ftrace_events"))) event_##call = { \
211 .name = #call, \
212 .system = __stringify(TRACE_SYSTEM), \
213 .raw_init = ftrace_raw_init_event_##call, \
214 .regfunc = ftrace_raw_reg_event_##call, \
215 .unregfunc = ftrace_raw_unreg_event_##call, \
216 .show_format = ftrace_format_##call, \
217}
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
new file mode 100644
index 000000000000..4d9952d3df50
--- /dev/null
+++ b/kernel/trace/trace_export.c
@@ -0,0 +1,102 @@
1/*
2 * trace_export.c - export basic ftrace utilities to user space
3 *
4 * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/stringify.h>
7#include <linux/kallsyms.h>
8#include <linux/seq_file.h>
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/fs.h>
15
16#include "trace_output.h"
17
18
19#undef TRACE_STRUCT
20#define TRACE_STRUCT(args...) args
21
22#undef TRACE_FIELD
23#define TRACE_FIELD(type, item, assign) \
24 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
25 "offset:%u;\tsize:%u;\n", \
26 (unsigned int)offsetof(typeof(field), item), \
27 (unsigned int)sizeof(field.item)); \
28 if (!ret) \
29 return 0;
30
31
32#undef TRACE_FIELD_SPECIAL
33#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
34 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \
35 "offset:%u;\tsize:%u;\n", \
36 (unsigned int)offsetof(typeof(field), item), \
37 (unsigned int)sizeof(field.item)); \
38 if (!ret) \
39 return 0;
40
41#undef TRACE_FIELD_ZERO_CHAR
42#define TRACE_FIELD_ZERO_CHAR(item) \
43 ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \
44 "offset:%u;\tsize:0;\n", \
45 (unsigned int)offsetof(typeof(field), item)); \
46 if (!ret) \
47 return 0;
48
49
50#undef TP_RAW_FMT
51#define TP_RAW_FMT(args...) args
52
53#undef TRACE_EVENT_FORMAT
54#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
55static int \
56ftrace_format_##call(struct trace_seq *s) \
57{ \
58 struct args field; \
59 int ret; \
60 \
61 tstruct; \
62 \
63 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
64 \
65 return ret; \
66}
67
68#include "trace_event_types.h"
69
70#undef TRACE_ZERO_CHAR
71#define TRACE_ZERO_CHAR(arg)
72
73#undef TRACE_FIELD
74#define TRACE_FIELD(type, item, assign)\
75 entry->item = assign;
76
77#undef TRACE_FIELD
78#define TRACE_FIELD(type, item, assign)\
79 entry->item = assign;
80
81#undef TP_CMD
82#define TP_CMD(cmd...) cmd
83
84#undef TRACE_ENTRY
85#define TRACE_ENTRY entry
86
87#undef TRACE_FIELD_SPECIAL
88#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
89 cmd;
90
91#undef TRACE_EVENT_FORMAT
92#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
93 \
94static struct ftrace_event_call __used \
95__attribute__((__aligned__(4))) \
96__attribute__((section("_ftrace_events"))) event_##call = { \
97 .name = #call, \
98 .id = proto, \
99 .system = __stringify(TRACE_SYSTEM), \
100 .show_format = ftrace_format_##call, \
101}
102#include "trace_event_types.h"
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9236d7e25a16..c9a0b7df44ff 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -9,6 +9,7 @@
9 * Copyright (C) 2004-2006 Ingo Molnar 9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III 10 * Copyright (C) 2004 William Lee Irwin III
11 */ 11 */
12#include <linux/ring_buffer.h>
12#include <linux/debugfs.h> 13#include <linux/debugfs.h>
13#include <linux/uaccess.h> 14#include <linux/uaccess.h>
14#include <linux/ftrace.h> 15#include <linux/ftrace.h>
@@ -16,52 +17,388 @@
16 17
17#include "trace.h" 18#include "trace.h"
18 19
19static void start_function_trace(struct trace_array *tr) 20/* function tracing enabled */
21static int ftrace_function_enabled;
22
23static struct trace_array *func_trace;
24
25static void tracing_start_function_trace(void);
26static void tracing_stop_function_trace(void);
27
28static int function_trace_init(struct trace_array *tr)
20{ 29{
30 func_trace = tr;
21 tr->cpu = get_cpu(); 31 tr->cpu = get_cpu();
22 tracing_reset_online_cpus(tr);
23 put_cpu(); 32 put_cpu();
24 33
25 tracing_start_cmdline_record(); 34 tracing_start_cmdline_record();
26 tracing_start_function_trace(); 35 tracing_start_function_trace();
36 return 0;
27} 37}
28 38
29static void stop_function_trace(struct trace_array *tr) 39static void function_trace_reset(struct trace_array *tr)
30{ 40{
31 tracing_stop_function_trace(); 41 tracing_stop_function_trace();
32 tracing_stop_cmdline_record(); 42 tracing_stop_cmdline_record();
33} 43}
34 44
35static int function_trace_init(struct trace_array *tr) 45static void function_trace_start(struct trace_array *tr)
36{ 46{
37 start_function_trace(tr); 47 tracing_reset_online_cpus(tr);
38 return 0;
39} 48}
40 49
41static void function_trace_reset(struct trace_array *tr) 50static void
51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
52{
53 struct trace_array *tr = func_trace;
54 struct trace_array_cpu *data;
55 unsigned long flags;
56 long disabled;
57 int cpu, resched;
58 int pc;
59
60 if (unlikely(!ftrace_function_enabled))
61 return;
62
63 pc = preempt_count();
64 resched = ftrace_preempt_disable();
65 local_save_flags(flags);
66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu];
68 disabled = atomic_inc_return(&data->disabled);
69
70 if (likely(disabled == 1))
71 trace_function(tr, ip, parent_ip, flags, pc);
72
73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched);
75}
76
77static void
78function_trace_call(unsigned long ip, unsigned long parent_ip)
42{ 79{
43 stop_function_trace(tr); 80 struct trace_array *tr = func_trace;
81 struct trace_array_cpu *data;
82 unsigned long flags;
83 long disabled;
84 int cpu;
85 int pc;
86
87 if (unlikely(!ftrace_function_enabled))
88 return;
89
90 /*
91 * Need to use raw, since this must be called before the
92 * recursive protection is performed.
93 */
94 local_irq_save(flags);
95 cpu = raw_smp_processor_id();
96 data = tr->data[cpu];
97 disabled = atomic_inc_return(&data->disabled);
98
99 if (likely(disabled == 1)) {
100 pc = preempt_count();
101 trace_function(tr, ip, parent_ip, flags, pc);
102 }
103
104 atomic_dec(&data->disabled);
105 local_irq_restore(flags);
44} 106}
45 107
46static void function_trace_start(struct trace_array *tr) 108static void
109function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
47{ 110{
48 tracing_reset_online_cpus(tr); 111 struct trace_array *tr = func_trace;
112 struct trace_array_cpu *data;
113 unsigned long flags;
114 long disabled;
115 int cpu;
116 int pc;
117
118 if (unlikely(!ftrace_function_enabled))
119 return;
120
121 /*
122 * Need to use raw, since this must be called before the
123 * recursive protection is performed.
124 */
125 local_irq_save(flags);
126 cpu = raw_smp_processor_id();
127 data = tr->data[cpu];
128 disabled = atomic_inc_return(&data->disabled);
129
130 if (likely(disabled == 1)) {
131 pc = preempt_count();
132 trace_function(tr, ip, parent_ip, flags, pc);
133 /*
134 * skip over 5 funcs:
135 * __ftrace_trace_stack,
136 * __trace_stack,
137 * function_stack_trace_call
138 * ftrace_list_func
139 * ftrace_call
140 */
141 __trace_stack(tr, flags, 5, pc);
142 }
143
144 atomic_dec(&data->disabled);
145 local_irq_restore(flags);
146}
147
148
149static struct ftrace_ops trace_ops __read_mostly =
150{
151 .func = function_trace_call,
152};
153
154static struct ftrace_ops trace_stack_ops __read_mostly =
155{
156 .func = function_stack_trace_call,
157};
158
159/* Our two options */
160enum {
161 TRACE_FUNC_OPT_STACK = 0x1,
162};
163
164static struct tracer_opt func_opts[] = {
165#ifdef CONFIG_STACKTRACE
166 { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
167#endif
168 { } /* Always set a last empty entry */
169};
170
171static struct tracer_flags func_flags = {
172 .val = 0, /* By default: all flags disabled */
173 .opts = func_opts
174};
175
176static void tracing_start_function_trace(void)
177{
178 ftrace_function_enabled = 0;
179
180 if (trace_flags & TRACE_ITER_PREEMPTONLY)
181 trace_ops.func = function_trace_call_preempt_only;
182 else
183 trace_ops.func = function_trace_call;
184
185 if (func_flags.val & TRACE_FUNC_OPT_STACK)
186 register_ftrace_function(&trace_stack_ops);
187 else
188 register_ftrace_function(&trace_ops);
189
190 ftrace_function_enabled = 1;
191}
192
193static void tracing_stop_function_trace(void)
194{
195 ftrace_function_enabled = 0;
196 /* OK if they are not registered */
197 unregister_ftrace_function(&trace_stack_ops);
198 unregister_ftrace_function(&trace_ops);
199}
200
201static int func_set_flag(u32 old_flags, u32 bit, int set)
202{
203 if (bit == TRACE_FUNC_OPT_STACK) {
204 /* do nothing if already set */
205 if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
206 return 0;
207
208 if (set) {
209 unregister_ftrace_function(&trace_ops);
210 register_ftrace_function(&trace_stack_ops);
211 } else {
212 unregister_ftrace_function(&trace_stack_ops);
213 register_ftrace_function(&trace_ops);
214 }
215
216 return 0;
217 }
218
219 return -EINVAL;
49} 220}
50 221
51static struct tracer function_trace __read_mostly = 222static struct tracer function_trace __read_mostly =
52{ 223{
53 .name = "function", 224 .name = "function",
54 .init = function_trace_init, 225 .init = function_trace_init,
55 .reset = function_trace_reset, 226 .reset = function_trace_reset,
56 .start = function_trace_start, 227 .start = function_trace_start,
228 .wait_pipe = poll_wait_pipe,
229 .flags = &func_flags,
230 .set_flag = func_set_flag,
57#ifdef CONFIG_FTRACE_SELFTEST 231#ifdef CONFIG_FTRACE_SELFTEST
58 .selftest = trace_selftest_startup_function, 232 .selftest = trace_selftest_startup_function,
59#endif 233#endif
60}; 234};
61 235
236#ifdef CONFIG_DYNAMIC_FTRACE
237static void
238ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
239{
240 long *count = (long *)data;
241
242 if (tracing_is_on())
243 return;
244
245 if (!*count)
246 return;
247
248 if (*count != -1)
249 (*count)--;
250
251 tracing_on();
252}
253
254static void
255ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
256{
257 long *count = (long *)data;
258
259 if (!tracing_is_on())
260 return;
261
262 if (!*count)
263 return;
264
265 if (*count != -1)
266 (*count)--;
267
268 tracing_off();
269}
270
271static int
272ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
273 struct ftrace_probe_ops *ops, void *data);
274
275static struct ftrace_probe_ops traceon_probe_ops = {
276 .func = ftrace_traceon,
277 .print = ftrace_trace_onoff_print,
278};
279
280static struct ftrace_probe_ops traceoff_probe_ops = {
281 .func = ftrace_traceoff,
282 .print = ftrace_trace_onoff_print,
283};
284
285static int
286ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
287 struct ftrace_probe_ops *ops, void *data)
288{
289 char str[KSYM_SYMBOL_LEN];
290 long count = (long)data;
291
292 kallsyms_lookup(ip, NULL, NULL, NULL, str);
293 seq_printf(m, "%s:", str);
294
295 if (ops == &traceon_probe_ops)
296 seq_printf(m, "traceon");
297 else
298 seq_printf(m, "traceoff");
299
300 if (count == -1)
301 seq_printf(m, ":unlimited\n");
302 else
303 seq_printf(m, ":count=%ld", count);
304 seq_putc(m, '\n');
305
306 return 0;
307}
308
309static int
310ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
311{
312 struct ftrace_probe_ops *ops;
313
314 /* we register both traceon and traceoff to this callback */
315 if (strcmp(cmd, "traceon") == 0)
316 ops = &traceon_probe_ops;
317 else
318 ops = &traceoff_probe_ops;
319
320 unregister_ftrace_function_probe_func(glob, ops);
321
322 return 0;
323}
324
325static int
326ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
327{
328 struct ftrace_probe_ops *ops;
329 void *count = (void *)-1;
330 char *number;
331 int ret;
332
333 /* hash funcs only work with set_ftrace_filter */
334 if (!enable)
335 return -EINVAL;
336
337 if (glob[0] == '!')
338 return ftrace_trace_onoff_unreg(glob+1, cmd, param);
339
340 /* we register both traceon and traceoff to this callback */
341 if (strcmp(cmd, "traceon") == 0)
342 ops = &traceon_probe_ops;
343 else
344 ops = &traceoff_probe_ops;
345
346 if (!param)
347 goto out_reg;
348
349 number = strsep(&param, ":");
350
351 if (!strlen(number))
352 goto out_reg;
353
354 /*
355 * We use the callback data field (which is a pointer)
356 * as our counter.
357 */
358 ret = strict_strtoul(number, 0, (unsigned long *)&count);
359 if (ret)
360 return ret;
361
362 out_reg:
363 ret = register_ftrace_function_probe(glob, ops, count);
364
365 return ret;
366}
367
368static struct ftrace_func_command ftrace_traceon_cmd = {
369 .name = "traceon",
370 .func = ftrace_trace_onoff_callback,
371};
372
373static struct ftrace_func_command ftrace_traceoff_cmd = {
374 .name = "traceoff",
375 .func = ftrace_trace_onoff_callback,
376};
377
378static int __init init_func_cmd_traceon(void)
379{
380 int ret;
381
382 ret = register_ftrace_command(&ftrace_traceoff_cmd);
383 if (ret)
384 return ret;
385
386 ret = register_ftrace_command(&ftrace_traceon_cmd);
387 if (ret)
388 unregister_ftrace_command(&ftrace_traceoff_cmd);
389 return ret;
390}
391#else
392static inline int init_func_cmd_traceon(void)
393{
394 return 0;
395}
396#endif /* CONFIG_DYNAMIC_FTRACE */
397
62static __init int init_function_trace(void) 398static __init int init_function_trace(void)
63{ 399{
400 init_func_cmd_traceon();
64 return register_tracer(&function_trace); 401 return register_tracer(&function_trace);
65} 402}
66
67device_initcall(init_function_trace); 403device_initcall(init_function_trace);
404
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 930c08e5b38e..6004ccac2dd7 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * 2 *
3 * Function graph tracer. 3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com> 4 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which 5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com> 6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 * 7 *
@@ -12,6 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16#define TRACE_GRAPH_INDENT 2 17#define TRACE_GRAPH_INDENT 2
17 18
@@ -20,9 +21,11 @@
20#define TRACE_GRAPH_PRINT_CPU 0x2 21#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 22#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22#define TRACE_GRAPH_PRINT_PROC 0x8 23#define TRACE_GRAPH_PRINT_PROC 0x8
24#define TRACE_GRAPH_PRINT_DURATION 0x10
25#define TRACE_GRAPH_PRINT_ABS_TIME 0X20
23 26
24static struct tracer_opt trace_opts[] = { 27static struct tracer_opt trace_opts[] = {
25 /* Display overruns ? */ 28 /* Display overruns? (for self-debug purpose) */
26 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 29 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
27 /* Display CPU ? */ 30 /* Display CPU ? */
28 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) }, 31 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
@@ -30,26 +33,101 @@ static struct tracer_opt trace_opts[] = {
30 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) }, 33 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
31 /* Display proc name/pid */ 34 /* Display proc name/pid */
32 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) }, 35 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
36 /* Display duration of execution */
37 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
38 /* Display absolute time of an entry */
39 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
33 { } /* Empty entry */ 40 { } /* Empty entry */
34}; 41};
35 42
36static struct tracer_flags tracer_flags = { 43static struct tracer_flags tracer_flags = {
37 /* Don't display overruns and proc by default */ 44 /* Don't display overruns and proc by default */
38 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD, 45 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
46 TRACE_GRAPH_PRINT_DURATION,
39 .opts = trace_opts 47 .opts = trace_opts
40}; 48};
41 49
42/* pid on the last trace processed */ 50/* pid on the last trace processed */
43static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
44 51
45static int graph_trace_init(struct trace_array *tr) 52
53/* Add a function return address to the trace stack on thread info.*/
54int
55ftrace_push_return_trace(unsigned long ret, unsigned long long time,
56 unsigned long func, int *depth)
46{ 57{
47 int cpu, ret; 58 int index;
48 59
49 for_each_online_cpu(cpu) 60 if (!current->ret_stack)
50 tracing_reset(tr, cpu); 61 return -EBUSY;
62
63 /* The return trace stack is full */
64 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
65 atomic_inc(&current->trace_overrun);
66 return -EBUSY;
67 }
68
69 index = ++current->curr_ret_stack;
70 barrier();
71 current->ret_stack[index].ret = ret;
72 current->ret_stack[index].func = func;
73 current->ret_stack[index].calltime = time;
74 *depth = index;
75
76 return 0;
77}
78
79/* Retrieve a function return address to the trace stack on thread info.*/
80void
81ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
82{
83 int index;
84
85 index = current->curr_ret_stack;
86
87 if (unlikely(index < 0)) {
88 ftrace_graph_stop();
89 WARN_ON(1);
90 /* Might as well panic, otherwise we have no where to go */
91 *ret = (unsigned long)panic;
92 return;
93 }
51 94
52 ret = register_ftrace_graph(&trace_graph_return, 95 *ret = current->ret_stack[index].ret;
96 trace->func = current->ret_stack[index].func;
97 trace->calltime = current->ret_stack[index].calltime;
98 trace->overrun = atomic_read(&current->trace_overrun);
99 trace->depth = index;
100 barrier();
101 current->curr_ret_stack--;
102
103}
104
105/*
106 * Send the trace to the ring-buffer.
107 * @return the original return address.
108 */
109unsigned long ftrace_return_to_handler(void)
110{
111 struct ftrace_graph_ret trace;
112 unsigned long ret;
113
114 ftrace_pop_return_trace(&trace, &ret);
115 trace.rettime = trace_clock_local();
116 ftrace_graph_return(&trace);
117
118 if (unlikely(!ret)) {
119 ftrace_graph_stop();
120 WARN_ON(1);
121 /* Might as well panic. What else to do? */
122 ret = (unsigned long)panic;
123 }
124
125 return ret;
126}
127
128static int graph_trace_init(struct trace_array *tr)
129{
130 int ret = register_ftrace_graph(&trace_graph_return,
53 &trace_graph_entry); 131 &trace_graph_entry);
54 if (ret) 132 if (ret)
55 return ret; 133 return ret;
@@ -112,15 +190,15 @@ print_graph_cpu(struct trace_seq *s, int cpu)
112static enum print_line_t 190static enum print_line_t
113print_graph_proc(struct trace_seq *s, pid_t pid) 191print_graph_proc(struct trace_seq *s, pid_t pid)
114{ 192{
115 int i; 193 char comm[TASK_COMM_LEN];
116 int ret;
117 int len;
118 char comm[8];
119 int spaces = 0;
120 /* sign + log10(MAX_INT) + '\0' */ 194 /* sign + log10(MAX_INT) + '\0' */
121 char pid_str[11]; 195 char pid_str[11];
196 int spaces = 0;
197 int ret;
198 int len;
199 int i;
122 200
123 strncpy(comm, trace_find_cmdline(pid), 7); 201 trace_find_cmdline(pid, comm);
124 comm[7] = '\0'; 202 comm[7] = '\0';
125 sprintf(pid_str, "%d", pid); 203 sprintf(pid_str, "%d", pid);
126 204
@@ -153,17 +231,25 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
153 231
154/* If the pid changed since the last trace, output this event */ 232/* If the pid changed since the last trace, output this event */
155static enum print_line_t 233static enum print_line_t
156verif_pid(struct trace_seq *s, pid_t pid, int cpu) 234verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
157{ 235{
158 pid_t prev_pid; 236 pid_t prev_pid;
237 pid_t *last_pid;
159 int ret; 238 int ret;
160 239
161 if (last_pid[cpu] != -1 && last_pid[cpu] == pid) 240 if (!last_pids_cpu)
241 return TRACE_TYPE_HANDLED;
242
243 last_pid = per_cpu_ptr(last_pids_cpu, cpu);
244
245 if (*last_pid == pid)
162 return TRACE_TYPE_HANDLED; 246 return TRACE_TYPE_HANDLED;
163 247
164 prev_pid = last_pid[cpu]; 248 prev_pid = *last_pid;
165 last_pid[cpu] = pid; 249 *last_pid = pid;
166 250
251 if (prev_pid == -1)
252 return TRACE_TYPE_HANDLED;
167/* 253/*
168 * Context-switch trace line: 254 * Context-switch trace line:
169 255
@@ -175,34 +261,34 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu)
175 ret = trace_seq_printf(s, 261 ret = trace_seq_printf(s,
176 " ------------------------------------------\n"); 262 " ------------------------------------------\n");
177 if (!ret) 263 if (!ret)
178 TRACE_TYPE_PARTIAL_LINE; 264 return TRACE_TYPE_PARTIAL_LINE;
179 265
180 ret = print_graph_cpu(s, cpu); 266 ret = print_graph_cpu(s, cpu);
181 if (ret == TRACE_TYPE_PARTIAL_LINE) 267 if (ret == TRACE_TYPE_PARTIAL_LINE)
182 TRACE_TYPE_PARTIAL_LINE; 268 return TRACE_TYPE_PARTIAL_LINE;
183 269
184 ret = print_graph_proc(s, prev_pid); 270 ret = print_graph_proc(s, prev_pid);
185 if (ret == TRACE_TYPE_PARTIAL_LINE) 271 if (ret == TRACE_TYPE_PARTIAL_LINE)
186 TRACE_TYPE_PARTIAL_LINE; 272 return TRACE_TYPE_PARTIAL_LINE;
187 273
188 ret = trace_seq_printf(s, " => "); 274 ret = trace_seq_printf(s, " => ");
189 if (!ret) 275 if (!ret)
190 TRACE_TYPE_PARTIAL_LINE; 276 return TRACE_TYPE_PARTIAL_LINE;
191 277
192 ret = print_graph_proc(s, pid); 278 ret = print_graph_proc(s, pid);
193 if (ret == TRACE_TYPE_PARTIAL_LINE) 279 if (ret == TRACE_TYPE_PARTIAL_LINE)
194 TRACE_TYPE_PARTIAL_LINE; 280 return TRACE_TYPE_PARTIAL_LINE;
195 281
196 ret = trace_seq_printf(s, 282 ret = trace_seq_printf(s,
197 "\n ------------------------------------------\n\n"); 283 "\n ------------------------------------------\n\n");
198 if (!ret) 284 if (!ret)
199 TRACE_TYPE_PARTIAL_LINE; 285 return TRACE_TYPE_PARTIAL_LINE;
200 286
201 return ret; 287 return TRACE_TYPE_HANDLED;
202} 288}
203 289
204static bool 290static struct ftrace_graph_ret_entry *
205trace_branch_is_leaf(struct trace_iterator *iter, 291get_return_for_leaf(struct trace_iterator *iter,
206 struct ftrace_graph_ent_entry *curr) 292 struct ftrace_graph_ent_entry *curr)
207{ 293{
208 struct ring_buffer_iter *ring_iter; 294 struct ring_buffer_iter *ring_iter;
@@ -211,65 +297,123 @@ trace_branch_is_leaf(struct trace_iterator *iter,
211 297
212 ring_iter = iter->buffer_iter[iter->cpu]; 298 ring_iter = iter->buffer_iter[iter->cpu];
213 299
214 if (!ring_iter) 300 /* First peek to compare current entry and the next one */
215 return false; 301 if (ring_iter)
216 302 event = ring_buffer_iter_peek(ring_iter, NULL);
217 event = ring_buffer_iter_peek(ring_iter, NULL); 303 else {
304 /* We need to consume the current entry to see the next one */
305 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
306 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
307 NULL);
308 }
218 309
219 if (!event) 310 if (!event)
220 return false; 311 return NULL;
221 312
222 next = ring_buffer_event_data(event); 313 next = ring_buffer_event_data(event);
223 314
224 if (next->ent.type != TRACE_GRAPH_RET) 315 if (next->ent.type != TRACE_GRAPH_RET)
225 return false; 316 return NULL;
226 317
227 if (curr->ent.pid != next->ent.pid || 318 if (curr->ent.pid != next->ent.pid ||
228 curr->graph_ent.func != next->ret.func) 319 curr->graph_ent.func != next->ret.func)
229 return false; 320 return NULL;
321
322 /* this is a leaf, now advance the iterator */
323 if (ring_iter)
324 ring_buffer_read(ring_iter, NULL);
230 325
231 return true; 326 return next;
327}
328
329/* Signal a overhead of time execution to the output */
330static int
331print_graph_overhead(unsigned long long duration, struct trace_seq *s)
332{
333 /* If duration disappear, we don't need anything */
334 if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
335 return 1;
336
337 /* Non nested entry or return */
338 if (duration == -1)
339 return trace_seq_printf(s, " ");
340
341 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
342 /* Duration exceeded 100 msecs */
343 if (duration > 100000ULL)
344 return trace_seq_printf(s, "! ");
345
346 /* Duration exceeded 10 msecs */
347 if (duration > 10000ULL)
348 return trace_seq_printf(s, "+ ");
349 }
350
351 return trace_seq_printf(s, " ");
352}
353
354static int print_graph_abs_time(u64 t, struct trace_seq *s)
355{
356 unsigned long usecs_rem;
357
358 usecs_rem = do_div(t, NSEC_PER_SEC);
359 usecs_rem /= 1000;
360
361 return trace_seq_printf(s, "%5lu.%06lu | ",
362 (unsigned long)t, usecs_rem);
232} 363}
233 364
234static enum print_line_t 365static enum print_line_t
235print_graph_irq(struct trace_seq *s, unsigned long addr, 366print_graph_irq(struct trace_iterator *iter, unsigned long addr,
236 enum trace_type type, int cpu, pid_t pid) 367 enum trace_type type, int cpu, pid_t pid)
237{ 368{
238 int ret; 369 int ret;
370 struct trace_seq *s = &iter->seq;
239 371
240 if (addr < (unsigned long)__irqentry_text_start || 372 if (addr < (unsigned long)__irqentry_text_start ||
241 addr >= (unsigned long)__irqentry_text_end) 373 addr >= (unsigned long)__irqentry_text_end)
242 return TRACE_TYPE_UNHANDLED; 374 return TRACE_TYPE_UNHANDLED;
243 375
244 if (type == TRACE_GRAPH_ENT) { 376 /* Absolute time */
245 ret = trace_seq_printf(s, "==========> | "); 377 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
246 } else { 378 ret = print_graph_abs_time(iter->ts, s);
247 /* Cpu */ 379 if (!ret)
248 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 380 return TRACE_TYPE_PARTIAL_LINE;
249 ret = print_graph_cpu(s, cpu); 381 }
250 if (ret == TRACE_TYPE_PARTIAL_LINE)
251 return TRACE_TYPE_PARTIAL_LINE;
252 }
253 /* Proc */
254 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
255 ret = print_graph_proc(s, pid);
256 if (ret == TRACE_TYPE_PARTIAL_LINE)
257 return TRACE_TYPE_PARTIAL_LINE;
258 382
259 ret = trace_seq_printf(s, " | "); 383 /* Cpu */
260 if (!ret) 384 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
261 return TRACE_TYPE_PARTIAL_LINE; 385 ret = print_graph_cpu(s, cpu);
262 } 386 if (ret == TRACE_TYPE_PARTIAL_LINE)
387 return TRACE_TYPE_PARTIAL_LINE;
388 }
389 /* Proc */
390 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
391 ret = print_graph_proc(s, pid);
392 if (ret == TRACE_TYPE_PARTIAL_LINE)
393 return TRACE_TYPE_PARTIAL_LINE;
394 ret = trace_seq_printf(s, " | ");
395 if (!ret)
396 return TRACE_TYPE_PARTIAL_LINE;
397 }
263 398
264 /* No overhead */ 399 /* No overhead */
265 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 400 ret = print_graph_overhead(-1, s);
266 ret = trace_seq_printf(s, " "); 401 if (!ret)
267 if (!ret) 402 return TRACE_TYPE_PARTIAL_LINE;
268 return TRACE_TYPE_PARTIAL_LINE; 403
269 } 404 if (type == TRACE_GRAPH_ENT)
405 ret = trace_seq_printf(s, "==========>");
406 else
407 ret = trace_seq_printf(s, "<==========");
408
409 if (!ret)
410 return TRACE_TYPE_PARTIAL_LINE;
411
412 /* Don't close the duration column if haven't one */
413 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
414 trace_seq_printf(s, " |");
415 ret = trace_seq_printf(s, "\n");
270 416
271 ret = trace_seq_printf(s, "<========== |\n");
272 }
273 if (!ret) 417 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE; 418 return TRACE_TYPE_PARTIAL_LINE;
275 return TRACE_TYPE_HANDLED; 419 return TRACE_TYPE_HANDLED;
@@ -288,7 +432,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
288 sprintf(msecs_str, "%lu", (unsigned long) duration); 432 sprintf(msecs_str, "%lu", (unsigned long) duration);
289 433
290 /* Print msecs */ 434 /* Print msecs */
291 ret = trace_seq_printf(s, msecs_str); 435 ret = trace_seq_printf(s, "%s", msecs_str);
292 if (!ret) 436 if (!ret)
293 return TRACE_TYPE_PARTIAL_LINE; 437 return TRACE_TYPE_PARTIAL_LINE;
294 438
@@ -321,51 +465,33 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
321 465
322} 466}
323 467
324/* Signal a overhead of time execution to the output */
325static int
326print_graph_overhead(unsigned long long duration, struct trace_seq *s)
327{
328 /* Duration exceeded 100 msecs */
329 if (duration > 100000ULL)
330 return trace_seq_printf(s, "! ");
331
332 /* Duration exceeded 10 msecs */
333 if (duration > 10000ULL)
334 return trace_seq_printf(s, "+ ");
335
336 return trace_seq_printf(s, " ");
337}
338
339/* Case of a leaf function on its call entry */ 468/* Case of a leaf function on its call entry */
340static enum print_line_t 469static enum print_line_t
341print_graph_entry_leaf(struct trace_iterator *iter, 470print_graph_entry_leaf(struct trace_iterator *iter,
342 struct ftrace_graph_ent_entry *entry, struct trace_seq *s) 471 struct ftrace_graph_ent_entry *entry,
472 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
343{ 473{
344 struct ftrace_graph_ret_entry *ret_entry;
345 struct ftrace_graph_ret *graph_ret; 474 struct ftrace_graph_ret *graph_ret;
346 struct ring_buffer_event *event;
347 struct ftrace_graph_ent *call; 475 struct ftrace_graph_ent *call;
348 unsigned long long duration; 476 unsigned long long duration;
349 int ret; 477 int ret;
350 int i; 478 int i;
351 479
352 event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
353 ret_entry = ring_buffer_event_data(event);
354 graph_ret = &ret_entry->ret; 480 graph_ret = &ret_entry->ret;
355 call = &entry->graph_ent; 481 call = &entry->graph_ent;
356 duration = graph_ret->rettime - graph_ret->calltime; 482 duration = graph_ret->rettime - graph_ret->calltime;
357 483
358 /* Overhead */ 484 /* Overhead */
359 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 485 ret = print_graph_overhead(duration, s);
360 ret = print_graph_overhead(duration, s); 486 if (!ret)
361 if (!ret) 487 return TRACE_TYPE_PARTIAL_LINE;
362 return TRACE_TYPE_PARTIAL_LINE;
363 }
364 488
365 /* Duration */ 489 /* Duration */
366 ret = print_graph_duration(duration, s); 490 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
367 if (ret == TRACE_TYPE_PARTIAL_LINE) 491 ret = print_graph_duration(duration, s);
368 return TRACE_TYPE_PARTIAL_LINE; 492 if (ret == TRACE_TYPE_PARTIAL_LINE)
493 return TRACE_TYPE_PARTIAL_LINE;
494 }
369 495
370 /* Function */ 496 /* Function */
371 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 497 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -394,25 +520,17 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
394 struct ftrace_graph_ent *call = &entry->graph_ent; 520 struct ftrace_graph_ent *call = &entry->graph_ent;
395 521
396 /* No overhead */ 522 /* No overhead */
397 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 523 ret = print_graph_overhead(-1, s);
398 ret = trace_seq_printf(s, " "); 524 if (!ret)
399 if (!ret) 525 return TRACE_TYPE_PARTIAL_LINE;
400 return TRACE_TYPE_PARTIAL_LINE;
401 }
402 526
403 /* Interrupt */ 527 /* No time */
404 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid); 528 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
405 if (ret == TRACE_TYPE_UNHANDLED) {
406 /* No time */
407 ret = trace_seq_printf(s, " | "); 529 ret = trace_seq_printf(s, " | ");
408 if (!ret) 530 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE; 531 return TRACE_TYPE_PARTIAL_LINE;
410 } else {
411 if (ret == TRACE_TYPE_PARTIAL_LINE)
412 return TRACE_TYPE_PARTIAL_LINE;
413 } 532 }
414 533
415
416 /* Function */ 534 /* Function */
417 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 535 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
418 ret = trace_seq_printf(s, " "); 536 ret = trace_seq_printf(s, " ");
@@ -428,20 +546,40 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
428 if (!ret) 546 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE; 547 return TRACE_TYPE_PARTIAL_LINE;
430 548
431 return TRACE_TYPE_HANDLED; 549 /*
550 * we already consumed the current entry to check the next one
551 * and see if this is a leaf.
552 */
553 return TRACE_TYPE_NO_CONSUME;
432} 554}
433 555
434static enum print_line_t 556static enum print_line_t
435print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 557print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
436 struct trace_iterator *iter, int cpu) 558 struct trace_iterator *iter)
437{ 559{
438 int ret; 560 int ret;
561 int cpu = iter->cpu;
562 pid_t *last_entry = iter->private;
439 struct trace_entry *ent = iter->ent; 563 struct trace_entry *ent = iter->ent;
564 struct ftrace_graph_ent *call = &field->graph_ent;
565 struct ftrace_graph_ret_entry *leaf_ret;
440 566
441 /* Pid */ 567 /* Pid */
442 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 568 if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
569 return TRACE_TYPE_PARTIAL_LINE;
570
571 /* Interrupt */
572 ret = print_graph_irq(iter, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
573 if (ret == TRACE_TYPE_PARTIAL_LINE)
443 return TRACE_TYPE_PARTIAL_LINE; 574 return TRACE_TYPE_PARTIAL_LINE;
444 575
576 /* Absolute time */
577 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
578 ret = print_graph_abs_time(iter->ts, s);
579 if (!ret)
580 return TRACE_TYPE_PARTIAL_LINE;
581 }
582
445 /* Cpu */ 583 /* Cpu */
446 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 584 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
447 ret = print_graph_cpu(s, cpu); 585 ret = print_graph_cpu(s, cpu);
@@ -460,8 +598,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
460 return TRACE_TYPE_PARTIAL_LINE; 598 return TRACE_TYPE_PARTIAL_LINE;
461 } 599 }
462 600
463 if (trace_branch_is_leaf(iter, field)) 601 leaf_ret = get_return_for_leaf(iter, field);
464 return print_graph_entry_leaf(iter, field, s); 602 if (leaf_ret)
603 return print_graph_entry_leaf(iter, field, leaf_ret, s);
465 else 604 else
466 return print_graph_entry_nested(field, s, iter->ent->pid, cpu); 605 return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
467 606
@@ -469,16 +608,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
469 608
470static enum print_line_t 609static enum print_line_t
471print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 610print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
472 struct trace_entry *ent, int cpu) 611 struct trace_entry *ent, struct trace_iterator *iter)
473{ 612{
474 int i; 613 int i;
475 int ret; 614 int ret;
615 int cpu = iter->cpu;
616 pid_t *last_pid = iter->private, pid = ent->pid;
476 unsigned long long duration = trace->rettime - trace->calltime; 617 unsigned long long duration = trace->rettime - trace->calltime;
477 618
478 /* Pid */ 619 /* Pid */
479 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 620 if (verif_pid(s, pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
480 return TRACE_TYPE_PARTIAL_LINE; 621 return TRACE_TYPE_PARTIAL_LINE;
481 622
623 /* Absolute time */
624 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
625 ret = print_graph_abs_time(iter->ts, s);
626 if (!ret)
627 return TRACE_TYPE_PARTIAL_LINE;
628 }
629
482 /* Cpu */ 630 /* Cpu */
483 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 631 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
484 ret = print_graph_cpu(s, cpu); 632 ret = print_graph_cpu(s, cpu);
@@ -498,16 +646,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
498 } 646 }
499 647
500 /* Overhead */ 648 /* Overhead */
501 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 649 ret = print_graph_overhead(duration, s);
502 ret = print_graph_overhead(duration, s); 650 if (!ret)
503 if (!ret) 651 return TRACE_TYPE_PARTIAL_LINE;
504 return TRACE_TYPE_PARTIAL_LINE;
505 }
506 652
507 /* Duration */ 653 /* Duration */
508 ret = print_graph_duration(duration, s); 654 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
509 if (ret == TRACE_TYPE_PARTIAL_LINE) 655 ret = print_graph_duration(duration, s);
510 return TRACE_TYPE_PARTIAL_LINE; 656 if (ret == TRACE_TYPE_PARTIAL_LINE)
657 return TRACE_TYPE_PARTIAL_LINE;
658 }
511 659
512 /* Closing brace */ 660 /* Closing brace */
513 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { 661 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
@@ -528,7 +676,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
528 return TRACE_TYPE_PARTIAL_LINE; 676 return TRACE_TYPE_PARTIAL_LINE;
529 } 677 }
530 678
531 ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid); 679 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid);
532 if (ret == TRACE_TYPE_PARTIAL_LINE) 680 if (ret == TRACE_TYPE_PARTIAL_LINE)
533 return TRACE_TYPE_PARTIAL_LINE; 681 return TRACE_TYPE_PARTIAL_LINE;
534 682
@@ -536,19 +684,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
536} 684}
537 685
538static enum print_line_t 686static enum print_line_t
539print_graph_comment(struct print_entry *trace, struct trace_seq *s, 687print_graph_comment(struct bprint_entry *trace, struct trace_seq *s,
540 struct trace_entry *ent, struct trace_iterator *iter) 688 struct trace_entry *ent, struct trace_iterator *iter)
541{ 689{
542 int i; 690 int i;
543 int ret; 691 int ret;
692 int cpu = iter->cpu;
693 pid_t *last_pid = iter->private;
544 694
545 /* Pid */ 695 /* Pid */
546 if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE) 696 if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
547 return TRACE_TYPE_PARTIAL_LINE; 697 return TRACE_TYPE_PARTIAL_LINE;
548 698
699 /* Absolute time */
700 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
701 ret = print_graph_abs_time(iter->ts, s);
702 if (!ret)
703 return TRACE_TYPE_PARTIAL_LINE;
704 }
705
549 /* Cpu */ 706 /* Cpu */
550 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 707 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
551 ret = print_graph_cpu(s, iter->cpu); 708 ret = print_graph_cpu(s, cpu);
552 if (ret == TRACE_TYPE_PARTIAL_LINE) 709 if (ret == TRACE_TYPE_PARTIAL_LINE)
553 return TRACE_TYPE_PARTIAL_LINE; 710 return TRACE_TYPE_PARTIAL_LINE;
554 } 711 }
@@ -565,17 +722,17 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
565 } 722 }
566 723
567 /* No overhead */ 724 /* No overhead */
568 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 725 ret = print_graph_overhead(-1, s);
569 ret = trace_seq_printf(s, " "); 726 if (!ret)
727 return TRACE_TYPE_PARTIAL_LINE;
728
729 /* No time */
730 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
731 ret = trace_seq_printf(s, " | ");
570 if (!ret) 732 if (!ret)
571 return TRACE_TYPE_PARTIAL_LINE; 733 return TRACE_TYPE_PARTIAL_LINE;
572 } 734 }
573 735
574 /* No time */
575 ret = trace_seq_printf(s, " | ");
576 if (!ret)
577 return TRACE_TYPE_PARTIAL_LINE;
578
579 /* Indentation */ 736 /* Indentation */
580 if (trace->depth > 0) 737 if (trace->depth > 0)
581 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { 738 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
@@ -585,12 +742,19 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
585 } 742 }
586 743
587 /* The comment */ 744 /* The comment */
588 ret = trace_seq_printf(s, "/* %s", trace->buf); 745 ret = trace_seq_printf(s, "/* ");
746 if (!ret)
747 return TRACE_TYPE_PARTIAL_LINE;
748
749 ret = trace_seq_bprintf(s, trace->fmt, trace->buf);
589 if (!ret) 750 if (!ret)
590 return TRACE_TYPE_PARTIAL_LINE; 751 return TRACE_TYPE_PARTIAL_LINE;
591 752
592 if (ent->flags & TRACE_FLAG_CONT) 753 /* Strip ending newline */
593 trace_seq_print_cont(s, iter); 754 if (s->buffer[s->len - 1] == '\n') {
755 s->buffer[s->len - 1] = '\0';
756 s->len--;
757 }
594 758
595 ret = trace_seq_printf(s, " */\n"); 759 ret = trace_seq_printf(s, " */\n");
596 if (!ret) 760 if (!ret)
@@ -610,16 +774,15 @@ print_graph_function(struct trace_iterator *iter)
610 case TRACE_GRAPH_ENT: { 774 case TRACE_GRAPH_ENT: {
611 struct ftrace_graph_ent_entry *field; 775 struct ftrace_graph_ent_entry *field;
612 trace_assign_type(field, entry); 776 trace_assign_type(field, entry);
613 return print_graph_entry(field, s, iter, 777 return print_graph_entry(field, s, iter);
614 iter->cpu);
615 } 778 }
616 case TRACE_GRAPH_RET: { 779 case TRACE_GRAPH_RET: {
617 struct ftrace_graph_ret_entry *field; 780 struct ftrace_graph_ret_entry *field;
618 trace_assign_type(field, entry); 781 trace_assign_type(field, entry);
619 return print_graph_return(&field->ret, s, entry, iter->cpu); 782 return print_graph_return(&field->ret, s, entry, iter);
620 } 783 }
621 case TRACE_PRINT: { 784 case TRACE_BPRINT: {
622 struct print_entry *field; 785 struct bprint_entry *field;
623 trace_assign_type(field, entry); 786 trace_assign_type(field, entry);
624 return print_graph_comment(field, s, entry, iter); 787 return print_graph_comment(field, s, entry, iter);
625 } 788 }
@@ -632,33 +795,64 @@ static void print_graph_headers(struct seq_file *s)
632{ 795{
633 /* 1st line */ 796 /* 1st line */
634 seq_printf(s, "# "); 797 seq_printf(s, "# ");
798 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
799 seq_printf(s, " TIME ");
635 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 800 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
636 seq_printf(s, "CPU "); 801 seq_printf(s, "CPU");
637 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 802 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
638 seq_printf(s, "TASK/PID "); 803 seq_printf(s, " TASK/PID ");
639 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) 804 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
640 seq_printf(s, "OVERHEAD/"); 805 seq_printf(s, " DURATION ");
641 seq_printf(s, "DURATION FUNCTION CALLS\n"); 806 seq_printf(s, " FUNCTION CALLS\n");
642 807
643 /* 2nd line */ 808 /* 2nd line */
644 seq_printf(s, "# "); 809 seq_printf(s, "# ");
810 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
811 seq_printf(s, " | ");
645 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 812 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
646 seq_printf(s, "| "); 813 seq_printf(s, "| ");
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 814 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
648 seq_printf(s, "| | "); 815 seq_printf(s, " | | ");
649 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 816 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
650 seq_printf(s, "| "); 817 seq_printf(s, " | | ");
651 seq_printf(s, "| | | | |\n"); 818 seq_printf(s, " | | | |\n");
652 } else 819}
653 seq_printf(s, " | | | | |\n"); 820
821static void graph_trace_open(struct trace_iterator *iter)
822{
823 /* pid on the last trace processed */
824 pid_t *last_pid = alloc_percpu(pid_t);
825 int cpu;
826
827 if (!last_pid)
828 pr_warning("function graph tracer: not enough memory\n");
829 else
830 for_each_possible_cpu(cpu) {
831 pid_t *pid = per_cpu_ptr(last_pid, cpu);
832 *pid = -1;
833 }
834
835 iter->private = last_pid;
836}
837
838static void graph_trace_close(struct trace_iterator *iter)
839{
840 free_percpu(iter->private);
654} 841}
842
655static struct tracer graph_trace __read_mostly = { 843static struct tracer graph_trace __read_mostly = {
656 .name = "function_graph", 844 .name = "function_graph",
657 .init = graph_trace_init, 845 .open = graph_trace_open,
658 .reset = graph_trace_reset, 846 .close = graph_trace_close,
847 .wait_pipe = poll_wait_pipe,
848 .init = graph_trace_init,
849 .reset = graph_trace_reset,
659 .print_line = print_graph_function, 850 .print_line = print_graph_function,
660 .print_header = print_graph_headers, 851 .print_header = print_graph_headers,
661 .flags = &tracer_flags, 852 .flags = &tracer_flags,
853#ifdef CONFIG_FTRACE_SELFTEST
854 .selftest = trace_selftest_startup_function_graph,
855#endif
662}; 856};
663 857
664static __init int init_graph_trace(void) 858static __init int init_graph_trace(void)
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 649df22d435f..7bfdf4c2347f 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,30 +1,53 @@
1/* 1/*
2 * h/w branch tracer for x86 based on bts 2 * h/w branch tracer for x86 based on bts
3 * 3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com> 4 * Copyright (C) 2008-2009 Intel Corporation.
5 * 5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */ 6 */
7 7#include <linux/spinlock.h>
8#include <linux/module.h> 8#include <linux/kallsyms.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
11#include <linux/ftrace.h> 10#include <linux/ftrace.h>
12#include <linux/kallsyms.h> 11#include <linux/module.h>
12#include <linux/cpu.h>
13#include <linux/smp.h>
14#include <linux/fs.h>
13 15
14#include <asm/ds.h> 16#include <asm/ds.h>
15 17
16#include "trace.h" 18#include "trace.h"
19#include "trace_output.h"
17 20
18 21
19#define SIZEOF_BTS (1 << 13) 22#define SIZEOF_BTS (1 << 13)
20 23
24/*
25 * The tracer lock protects the below per-cpu tracer array.
26 * It needs to be held to:
27 * - start tracing on all cpus
28 * - stop tracing on all cpus
29 * - start tracing on a single hotplug cpu
30 * - stop tracing on a single hotplug cpu
31 * - read the trace from all cpus
32 * - read the trace from a single cpu
33 */
34static DEFINE_SPINLOCK(bts_tracer_lock);
21static DEFINE_PER_CPU(struct bts_tracer *, tracer); 35static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); 36static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23 37
24#define this_tracer per_cpu(tracer, smp_processor_id()) 38#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id()) 39#define this_buffer per_cpu(buffer, smp_processor_id())
26 40
41static int __read_mostly trace_hw_branches_enabled;
42static struct trace_array *hw_branch_trace __read_mostly;
43
27 44
45/*
46 * Start tracing on the current cpu.
47 * The argument is ignored.
48 *
49 * pre: bts_tracer_lock must be locked.
50 */
28static void bts_trace_start_cpu(void *arg) 51static void bts_trace_start_cpu(void *arg)
29{ 52{
30 if (this_tracer) 53 if (this_tracer)
@@ -42,14 +65,20 @@ static void bts_trace_start_cpu(void *arg)
42 65
43static void bts_trace_start(struct trace_array *tr) 66static void bts_trace_start(struct trace_array *tr)
44{ 67{
45 int cpu; 68 spin_lock(&bts_tracer_lock);
46 69
47 tracing_reset_online_cpus(tr); 70 on_each_cpu(bts_trace_start_cpu, NULL, 1);
71 trace_hw_branches_enabled = 1;
48 72
49 for_each_cpu(cpu, cpu_possible_mask) 73 spin_unlock(&bts_tracer_lock);
50 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
51} 74}
52 75
76/*
77 * Stop tracing on the current cpu.
78 * The argument is ignored.
79 *
80 * pre: bts_tracer_lock must be locked.
81 */
53static void bts_trace_stop_cpu(void *arg) 82static void bts_trace_stop_cpu(void *arg)
54{ 83{
55 if (this_tracer) { 84 if (this_tracer) {
@@ -60,26 +89,60 @@ static void bts_trace_stop_cpu(void *arg)
60 89
61static void bts_trace_stop(struct trace_array *tr) 90static void bts_trace_stop(struct trace_array *tr)
62{ 91{
63 int cpu; 92 spin_lock(&bts_tracer_lock);
93
94 trace_hw_branches_enabled = 0;
95 on_each_cpu(bts_trace_stop_cpu, NULL, 1);
96
97 spin_unlock(&bts_tracer_lock);
98}
99
100static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
101 unsigned long action, void *hcpu)
102{
103 unsigned int cpu = (unsigned long)hcpu;
64 104
65 for_each_cpu(cpu, cpu_possible_mask) 105 spin_lock(&bts_tracer_lock);
106
107 if (!trace_hw_branches_enabled)
108 goto out;
109
110 switch (action) {
111 case CPU_ONLINE:
112 case CPU_DOWN_FAILED:
113 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
114 break;
115 case CPU_DOWN_PREPARE:
66 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); 116 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
117 break;
118 }
119
120 out:
121 spin_unlock(&bts_tracer_lock);
122 return NOTIFY_DONE;
67} 123}
68 124
125static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
126 .notifier_call = bts_hotcpu_handler
127};
128
69static int bts_trace_init(struct trace_array *tr) 129static int bts_trace_init(struct trace_array *tr)
70{ 130{
71 tracing_reset_online_cpus(tr); 131 hw_branch_trace = tr;
132
72 bts_trace_start(tr); 133 bts_trace_start(tr);
73 134
74 return 0; 135 return 0;
75} 136}
76 137
138static void bts_trace_reset(struct trace_array *tr)
139{
140 bts_trace_stop(tr);
141}
142
77static void bts_trace_print_header(struct seq_file *m) 143static void bts_trace_print_header(struct seq_file *m)
78{ 144{
79 seq_puts(m, 145 seq_puts(m, "# CPU# TO <- FROM\n");
80 "# CPU# FROM TO FUNCTION\n");
81 seq_puts(m,
82 "# | | | |\n");
83} 146}
84 147
85static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) 148static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
@@ -87,15 +150,15 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
87 struct trace_entry *entry = iter->ent; 150 struct trace_entry *entry = iter->ent;
88 struct trace_seq *seq = &iter->seq; 151 struct trace_seq *seq = &iter->seq;
89 struct hw_branch_entry *it; 152 struct hw_branch_entry *it;
153 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
90 154
91 trace_assign_type(it, entry); 155 trace_assign_type(it, entry);
92 156
93 if (entry->type == TRACE_HW_BRANCHES) { 157 if (entry->type == TRACE_HW_BRANCHES) {
94 if (trace_seq_printf(seq, "%4d ", entry->cpu) && 158 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
95 trace_seq_printf(seq, "0x%016llx -> 0x%016llx ", 159 seq_print_ip_sym(seq, it->to, symflags) &&
96 it->from, it->to) && 160 trace_seq_printf(seq, "\t <- ") &&
97 (!it->from || 161 seq_print_ip_sym(seq, it->from, symflags) &&
98 seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
99 trace_seq_printf(seq, "\n")) 162 trace_seq_printf(seq, "\n"))
100 return TRACE_TYPE_HANDLED; 163 return TRACE_TYPE_HANDLED;
101 return TRACE_TYPE_PARTIAL_LINE;; 164 return TRACE_TYPE_PARTIAL_LINE;;
@@ -103,26 +166,42 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
103 return TRACE_TYPE_UNHANDLED; 166 return TRACE_TYPE_UNHANDLED;
104} 167}
105 168
106void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) 169void trace_hw_branch(u64 from, u64 to)
107{ 170{
171 struct trace_array *tr = hw_branch_trace;
108 struct ring_buffer_event *event; 172 struct ring_buffer_event *event;
109 struct hw_branch_entry *entry; 173 struct hw_branch_entry *entry;
110 unsigned long irq; 174 unsigned long irq1;
175 int cpu;
111 176
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); 177 if (unlikely(!tr))
113 if (!event)
114 return; 178 return;
179
180 if (unlikely(!trace_hw_branches_enabled))
181 return;
182
183 local_irq_save(irq1);
184 cpu = raw_smp_processor_id();
185 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
186 goto out;
187
188 event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
189 sizeof(*entry), 0, 0);
190 if (!event)
191 goto out;
115 entry = ring_buffer_event_data(event); 192 entry = ring_buffer_event_data(event);
116 tracing_generic_entry_update(&entry->ent, 0, from); 193 tracing_generic_entry_update(&entry->ent, 0, from);
117 entry->ent.type = TRACE_HW_BRANCHES; 194 entry->ent.type = TRACE_HW_BRANCHES;
118 entry->ent.cpu = smp_processor_id();
119 entry->from = from; 195 entry->from = from;
120 entry->to = to; 196 entry->to = to;
121 ring_buffer_unlock_commit(tr->buffer, event, irq); 197 trace_buffer_unlock_commit(tr, event, 0, 0);
198
199 out:
200 atomic_dec(&tr->data[cpu]->disabled);
201 local_irq_restore(irq1);
122} 202}
123 203
124static void trace_bts_at(struct trace_array *tr, 204static void trace_bts_at(const struct bts_trace *trace, void *at)
125 const struct bts_trace *trace, void *at)
126{ 205{
127 struct bts_struct bts; 206 struct bts_struct bts;
128 int err = 0; 207 int err = 0;
@@ -137,18 +216,29 @@ static void trace_bts_at(struct trace_array *tr,
137 216
138 switch (bts.qualifier) { 217 switch (bts.qualifier) {
139 case BTS_BRANCH: 218 case BTS_BRANCH:
140 trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); 219 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
141 break; 220 break;
142 } 221 }
143} 222}
144 223
224/*
225 * Collect the trace on the current cpu and write it into the ftrace buffer.
226 *
227 * pre: bts_tracer_lock must be locked
228 */
145static void trace_bts_cpu(void *arg) 229static void trace_bts_cpu(void *arg)
146{ 230{
147 struct trace_array *tr = (struct trace_array *) arg; 231 struct trace_array *tr = (struct trace_array *) arg;
148 const struct bts_trace *trace; 232 const struct bts_trace *trace;
149 unsigned char *at; 233 unsigned char *at;
150 234
151 if (!this_tracer) 235 if (unlikely(!tr))
236 return;
237
238 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
239 return;
240
241 if (unlikely(!this_tracer))
152 return; 242 return;
153 243
154 ds_suspend_bts(this_tracer); 244 ds_suspend_bts(this_tracer);
@@ -158,11 +248,11 @@ static void trace_bts_cpu(void *arg)
158 248
159 for (at = trace->ds.top; (void *)at < trace->ds.end; 249 for (at = trace->ds.top; (void *)at < trace->ds.end;
160 at += trace->ds.size) 250 at += trace->ds.size)
161 trace_bts_at(tr, trace, at); 251 trace_bts_at(trace, at);
162 252
163 for (at = trace->ds.begin; (void *)at < trace->ds.top; 253 for (at = trace->ds.begin; (void *)at < trace->ds.top;
164 at += trace->ds.size) 254 at += trace->ds.size)
165 trace_bts_at(tr, trace, at); 255 trace_bts_at(trace, at);
166 256
167out: 257out:
168 ds_resume_bts(this_tracer); 258 ds_resume_bts(this_tracer);
@@ -170,26 +260,43 @@ out:
170 260
171static void trace_bts_prepare(struct trace_iterator *iter) 261static void trace_bts_prepare(struct trace_iterator *iter)
172{ 262{
173 int cpu; 263 spin_lock(&bts_tracer_lock);
264
265 on_each_cpu(trace_bts_cpu, iter->tr, 1);
266
267 spin_unlock(&bts_tracer_lock);
268}
269
270static void trace_bts_close(struct trace_iterator *iter)
271{
272 tracing_reset_online_cpus(iter->tr);
273}
274
275void trace_hw_branch_oops(void)
276{
277 spin_lock(&bts_tracer_lock);
278
279 trace_bts_cpu(hw_branch_trace);
174 280
175 for_each_cpu(cpu, cpu_possible_mask) 281 spin_unlock(&bts_tracer_lock);
176 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
177} 282}
178 283
179struct tracer bts_tracer __read_mostly = 284struct tracer bts_tracer __read_mostly =
180{ 285{
181 .name = "hw-branch-tracer", 286 .name = "hw-branch-tracer",
182 .init = bts_trace_init, 287 .init = bts_trace_init,
183 .reset = bts_trace_stop, 288 .reset = bts_trace_reset,
184 .print_header = bts_trace_print_header, 289 .print_header = bts_trace_print_header,
185 .print_line = bts_trace_print_line, 290 .print_line = bts_trace_print_line,
186 .start = bts_trace_start, 291 .start = bts_trace_start,
187 .stop = bts_trace_stop, 292 .stop = bts_trace_stop,
188 .open = trace_bts_prepare 293 .open = trace_bts_prepare,
294 .close = trace_bts_close
189}; 295};
190 296
191__init static int init_bts_trace(void) 297__init static int init_bts_trace(void)
192{ 298{
299 register_hotcpu_notifier(&bts_hotcpu_notifier);
193 return register_tracer(&bts_tracer); 300 return register_tracer(&bts_tracer);
194} 301}
195device_initcall(init_bts_trace); 302device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 62a78d943534..b923d13e2fad 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * trace irqs off criticall timings 2 * trace irqs off critical timings
3 * 3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
@@ -32,6 +32,8 @@ enum {
32 32
33static int trace_type __read_mostly; 33static int trace_type __read_mostly;
34 34
35static int save_lat_flag;
36
35#ifdef CONFIG_PREEMPT_TRACER 37#ifdef CONFIG_PREEMPT_TRACER
36static inline int 38static inline int
37preempt_trace(void) 39preempt_trace(void)
@@ -95,7 +97,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 97 disabled = atomic_inc_return(&data->disabled);
96 98
97 if (likely(disabled == 1)) 99 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 100 trace_function(tr, ip, parent_ip, flags, preempt_count());
99 101
100 atomic_dec(&data->disabled); 102 atomic_dec(&data->disabled);
101} 103}
@@ -153,7 +155,7 @@ check_critical_timing(struct trace_array *tr,
153 if (!report_latency(delta)) 155 if (!report_latency(delta))
154 goto out_unlock; 156 goto out_unlock;
155 157
156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); 158 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
157 159
158 latency = nsecs_to_usecs(delta); 160 latency = nsecs_to_usecs(delta);
159 161
@@ -177,7 +179,7 @@ out:
177 data->critical_sequence = max_sequence; 179 data->critical_sequence = max_sequence;
178 data->preempt_timestamp = ftrace_now(cpu); 180 data->preempt_timestamp = ftrace_now(cpu);
179 tracing_reset(tr, cpu); 181 tracing_reset(tr, cpu);
180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); 182 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
181} 183}
182 184
183static inline void 185static inline void
@@ -210,7 +212,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
210 212
211 local_save_flags(flags); 213 local_save_flags(flags);
212 214
213 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 215 trace_function(tr, ip, parent_ip, flags, preempt_count());
214 216
215 per_cpu(tracing_cpu, cpu) = 1; 217 per_cpu(tracing_cpu, cpu) = 1;
216 218
@@ -244,7 +246,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
244 atomic_inc(&data->disabled); 246 atomic_inc(&data->disabled);
245 247
246 local_save_flags(flags); 248 local_save_flags(flags);
247 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 249 trace_function(tr, ip, parent_ip, flags, preempt_count());
248 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 250 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
249 data->critical_start = 0; 251 data->critical_start = 0;
250 atomic_dec(&data->disabled); 252 atomic_dec(&data->disabled);
@@ -353,33 +355,26 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 355}
354#endif /* CONFIG_PREEMPT_TRACER */ 356#endif /* CONFIG_PREEMPT_TRACER */
355 357
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
362static void start_irqsoff_tracer(struct trace_array *tr) 358static void start_irqsoff_tracer(struct trace_array *tr)
363{ 359{
364 register_ftrace_function(&trace_ops); 360 register_ftrace_function(&trace_ops);
365 if (tracing_is_enabled()) { 361 if (tracing_is_enabled())
366 tracer_enabled = 1; 362 tracer_enabled = 1;
367 save_tracer_enabled = 1; 363 else
368 } else {
369 tracer_enabled = 0; 364 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
372} 365}
373 366
374static void stop_irqsoff_tracer(struct trace_array *tr) 367static void stop_irqsoff_tracer(struct trace_array *tr)
375{ 368{
376 tracer_enabled = 0; 369 tracer_enabled = 0;
377 save_tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 370 unregister_ftrace_function(&trace_ops);
379} 371}
380 372
381static void __irqsoff_tracer_init(struct trace_array *tr) 373static void __irqsoff_tracer_init(struct trace_array *tr)
382{ 374{
375 save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
376 trace_flags |= TRACE_ITER_LATENCY_FMT;
377
383 tracing_max_latency = 0; 378 tracing_max_latency = 0;
384 irqsoff_trace = tr; 379 irqsoff_trace = tr;
385 /* make sure that the tracer is visible */ 380 /* make sure that the tracer is visible */
@@ -390,30 +385,19 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
390static void irqsoff_tracer_reset(struct trace_array *tr) 385static void irqsoff_tracer_reset(struct trace_array *tr)
391{ 386{
392 stop_irqsoff_tracer(tr); 387 stop_irqsoff_tracer(tr);
388
389 if (!save_lat_flag)
390 trace_flags &= ~TRACE_ITER_LATENCY_FMT;
393} 391}
394 392
395static void irqsoff_tracer_start(struct trace_array *tr) 393static void irqsoff_tracer_start(struct trace_array *tr)
396{ 394{
397 tracer_enabled = 1; 395 tracer_enabled = 1;
398 save_tracer_enabled = 1;
399} 396}
400 397
401static void irqsoff_tracer_stop(struct trace_array *tr) 398static void irqsoff_tracer_stop(struct trace_array *tr)
402{ 399{
403 tracer_enabled = 0; 400 tracer_enabled = 0;
404 save_tracer_enabled = 0;
405}
406
407static void irqsoff_tracer_open(struct trace_iterator *iter)
408{
409 /* stop the trace while dumping */
410 tracer_enabled = 0;
411}
412
413static void irqsoff_tracer_close(struct trace_iterator *iter)
414{
415 /* restart tracing */
416 tracer_enabled = save_tracer_enabled;
417} 401}
418 402
419#ifdef CONFIG_IRQSOFF_TRACER 403#ifdef CONFIG_IRQSOFF_TRACER
@@ -431,8 +415,6 @@ static struct tracer irqsoff_tracer __read_mostly =
431 .reset = irqsoff_tracer_reset, 415 .reset = irqsoff_tracer_reset,
432 .start = irqsoff_tracer_start, 416 .start = irqsoff_tracer_start,
433 .stop = irqsoff_tracer_stop, 417 .stop = irqsoff_tracer_stop,
434 .open = irqsoff_tracer_open,
435 .close = irqsoff_tracer_close,
436 .print_max = 1, 418 .print_max = 1,
437#ifdef CONFIG_FTRACE_SELFTEST 419#ifdef CONFIG_FTRACE_SELFTEST
438 .selftest = trace_selftest_startup_irqsoff, 420 .selftest = trace_selftest_startup_irqsoff,
@@ -459,8 +441,6 @@ static struct tracer preemptoff_tracer __read_mostly =
459 .reset = irqsoff_tracer_reset, 441 .reset = irqsoff_tracer_reset,
460 .start = irqsoff_tracer_start, 442 .start = irqsoff_tracer_start,
461 .stop = irqsoff_tracer_stop, 443 .stop = irqsoff_tracer_stop,
462 .open = irqsoff_tracer_open,
463 .close = irqsoff_tracer_close,
464 .print_max = 1, 444 .print_max = 1,
465#ifdef CONFIG_FTRACE_SELFTEST 445#ifdef CONFIG_FTRACE_SELFTEST
466 .selftest = trace_selftest_startup_preemptoff, 446 .selftest = trace_selftest_startup_preemptoff,
@@ -489,8 +469,6 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
489 .reset = irqsoff_tracer_reset, 469 .reset = irqsoff_tracer_reset,
490 .start = irqsoff_tracer_start, 470 .start = irqsoff_tracer_start,
491 .stop = irqsoff_tracer_stop, 471 .stop = irqsoff_tracer_stop,
492 .open = irqsoff_tracer_open,
493 .close = irqsoff_tracer_close,
494 .print_max = 1, 472 .print_max = 1,
495#ifdef CONFIG_FTRACE_SELFTEST 473#ifdef CONFIG_FTRACE_SELFTEST
496 .selftest = trace_selftest_startup_preemptirqsoff, 474 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 80e503ef6136..f095916e477f 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,6 +12,7 @@
12#include <asm/atomic.h> 12#include <asm/atomic.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16struct header_iter { 17struct header_iter {
17 struct pci_dev *dev; 18 struct pci_dev *dev;
@@ -183,21 +184,22 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
183 switch (rw->opcode) { 184 switch (rw->opcode) {
184 case MMIO_READ: 185 case MMIO_READ:
185 ret = trace_seq_printf(s, 186 ret = trace_seq_printf(s,
186 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 187 "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
187 rw->width, secs, usec_rem, rw->map_id, 188 rw->width, secs, usec_rem, rw->map_id,
188 (unsigned long long)rw->phys, 189 (unsigned long long)rw->phys,
189 rw->value, rw->pc, 0); 190 rw->value, rw->pc, 0);
190 break; 191 break;
191 case MMIO_WRITE: 192 case MMIO_WRITE:
192 ret = trace_seq_printf(s, 193 ret = trace_seq_printf(s,
193 "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 194 "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
194 rw->width, secs, usec_rem, rw->map_id, 195 rw->width, secs, usec_rem, rw->map_id,
195 (unsigned long long)rw->phys, 196 (unsigned long long)rw->phys,
196 rw->value, rw->pc, 0); 197 rw->value, rw->pc, 0);
197 break; 198 break;
198 case MMIO_UNKNOWN_OP: 199 case MMIO_UNKNOWN_OP:
199 ret = trace_seq_printf(s, 200 ret = trace_seq_printf(s,
200 "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n", 201 "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
202 "%02lx 0x%lx %d\n",
201 secs, usec_rem, rw->map_id, 203 secs, usec_rem, rw->map_id,
202 (unsigned long long)rw->phys, 204 (unsigned long long)rw->phys,
203 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, 205 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
@@ -229,14 +231,14 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
229 switch (m->opcode) { 231 switch (m->opcode) {
230 case MMIO_PROBE: 232 case MMIO_PROBE:
231 ret = trace_seq_printf(s, 233 ret = trace_seq_printf(s,
232 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 234 "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
233 secs, usec_rem, m->map_id, 235 secs, usec_rem, m->map_id,
234 (unsigned long long)m->phys, m->virt, m->len, 236 (unsigned long long)m->phys, m->virt, m->len,
235 0UL, 0); 237 0UL, 0);
236 break; 238 break;
237 case MMIO_UNPROBE: 239 case MMIO_UNPROBE:
238 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
239 "UNMAP %lu.%06lu %d 0x%lx %d\n", 241 "UNMAP %u.%06lu %d 0x%lx %d\n",
240 secs, usec_rem, m->map_id, 0UL, 0); 242 secs, usec_rem, m->map_id, 0UL, 0);
241 break; 243 break;
242 default: 244 default:
@@ -255,18 +257,15 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
255 const char *msg = print->buf; 257 const char *msg = print->buf;
256 struct trace_seq *s = &iter->seq; 258 struct trace_seq *s = &iter->seq;
257 unsigned long long t = ns2usecs(iter->ts); 259 unsigned long long t = ns2usecs(iter->ts);
258 unsigned long usec_rem = do_div(t, 1000000ULL); 260 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
259 unsigned secs = (unsigned long)t; 261 unsigned secs = (unsigned long)t;
260 int ret; 262 int ret;
261 263
262 /* The trailing newline must be in the message. */ 264 /* The trailing newline must be in the message. */
263 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg); 265 ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
264 if (!ret) 266 if (!ret)
265 return TRACE_TYPE_PARTIAL_LINE; 267 return TRACE_TYPE_PARTIAL_LINE;
266 268
267 if (entry->flags & TRACE_FLAG_CONT)
268 trace_seq_print_cont(s, iter);
269
270 return TRACE_TYPE_HANDLED; 269 return TRACE_TYPE_HANDLED;
271} 270}
272 271
@@ -308,21 +307,17 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
308{ 307{
309 struct ring_buffer_event *event; 308 struct ring_buffer_event *event;
310 struct trace_mmiotrace_rw *entry; 309 struct trace_mmiotrace_rw *entry;
311 unsigned long irq_flags; 310 int pc = preempt_count();
312 311
313 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 312 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW,
314 &irq_flags); 313 sizeof(*entry), 0, pc);
315 if (!event) { 314 if (!event) {
316 atomic_inc(&dropped_count); 315 atomic_inc(&dropped_count);
317 return; 316 return;
318 } 317 }
319 entry = ring_buffer_event_data(event); 318 entry = ring_buffer_event_data(event);
320 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
321 entry->ent.type = TRACE_MMIO_RW;
322 entry->rw = *rw; 319 entry->rw = *rw;
323 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 320 trace_buffer_unlock_commit(tr, event, 0, pc);
324
325 trace_wake_up();
326} 321}
327 322
328void mmio_trace_rw(struct mmiotrace_rw *rw) 323void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -338,21 +333,17 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
338{ 333{
339 struct ring_buffer_event *event; 334 struct ring_buffer_event *event;
340 struct trace_mmiotrace_map *entry; 335 struct trace_mmiotrace_map *entry;
341 unsigned long irq_flags; 336 int pc = preempt_count();
342 337
343 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 338 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP,
344 &irq_flags); 339 sizeof(*entry), 0, pc);
345 if (!event) { 340 if (!event) {
346 atomic_inc(&dropped_count); 341 atomic_inc(&dropped_count);
347 return; 342 return;
348 } 343 }
349 entry = ring_buffer_event_data(event); 344 entry = ring_buffer_event_data(event);
350 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
351 entry->ent.type = TRACE_MMIO_MAP;
352 entry->map = *map; 345 entry->map = *map;
353 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 346 trace_buffer_unlock_commit(tr, event, 0, pc);
354
355 trace_wake_up();
356} 347}
357 348
358void mmio_trace_mapping(struct mmiotrace_map *map) 349void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index b9767acd30ac..9aa84bde23cd 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -47,12 +47,7 @@ static void stop_nop_trace(struct trace_array *tr)
47 47
48static int nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
49{ 49{
50 int cpu;
51 ctx_trace = tr; 50 ctx_trace = tr;
52
53 for_each_online_cpu(cpu)
54 tracing_reset(tr, cpu);
55
56 start_nop_trace(tr); 51 start_nop_trace(tr);
57 return 0; 52 return 0;
58} 53}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
new file mode 100644
index 000000000000..6a4c9dea191e
--- /dev/null
+++ b/kernel/trace/trace_output.c
@@ -0,0 +1,967 @@
1/*
2 * trace_output.c
3 *
4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/mutex.h>
10#include <linux/ftrace.h>
11
12#include "trace_output.h"
13
14/* must be a power of 2 */
15#define EVENT_HASHSIZE 128
16
17static DEFINE_MUTEX(trace_event_mutex);
18static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
19
20static int next_event_type = __TRACE_LAST_TYPE + 1;
21
22/**
23 * trace_seq_printf - sequence printing of trace information
24 * @s: trace sequence descriptor
25 * @fmt: printf format string
26 *
27 * The tracer may use either sequence operations or its own
28 * copy to user routines. To simplify formating of a trace
29 * trace_seq_printf is used to store strings into a special
30 * buffer (@s). Then the output may be either used by
31 * the sequencer or pulled into another buffer.
32 */
33int
34trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
35{
36 int len = (PAGE_SIZE - 1) - s->len;
37 va_list ap;
38 int ret;
39
40 if (!len)
41 return 0;
42
43 va_start(ap, fmt);
44 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
45 va_end(ap);
46
47 /* If we can't write it all, don't bother writing anything */
48 if (ret >= len)
49 return 0;
50
51 s->len += ret;
52
53 return len;
54}
55
56int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
57{
58 int len = (PAGE_SIZE - 1) - s->len;
59 int ret;
60
61 if (!len)
62 return 0;
63
64 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
65
66 /* If we can't write it all, don't bother writing anything */
67 if (ret >= len)
68 return 0;
69
70 s->len += ret;
71
72 return len;
73}
74
75/**
76 * trace_seq_puts - trace sequence printing of simple string
77 * @s: trace sequence descriptor
78 * @str: simple string to record
79 *
80 * The tracer may use either the sequence operations or its own
81 * copy to user routines. This function records a simple string
82 * into a special buffer (@s) for later retrieval by a sequencer
83 * or other mechanism.
84 */
85int trace_seq_puts(struct trace_seq *s, const char *str)
86{
87 int len = strlen(str);
88
89 if (len > ((PAGE_SIZE - 1) - s->len))
90 return 0;
91
92 memcpy(s->buffer + s->len, str, len);
93 s->len += len;
94
95 return len;
96}
97
98int trace_seq_putc(struct trace_seq *s, unsigned char c)
99{
100 if (s->len >= (PAGE_SIZE - 1))
101 return 0;
102
103 s->buffer[s->len++] = c;
104
105 return 1;
106}
107
108int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
109{
110 if (len > ((PAGE_SIZE - 1) - s->len))
111 return 0;
112
113 memcpy(s->buffer + s->len, mem, len);
114 s->len += len;
115
116 return len;
117}
118
119int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
120{
121 unsigned char hex[HEX_CHARS];
122 unsigned char *data = mem;
123 int i, j;
124
125#ifdef __BIG_ENDIAN
126 for (i = 0, j = 0; i < len; i++) {
127#else
128 for (i = len-1, j = 0; i >= 0; i--) {
129#endif
130 hex[j++] = hex_asc_hi(data[i]);
131 hex[j++] = hex_asc_lo(data[i]);
132 }
133 hex[j++] = ' ';
134
135 return trace_seq_putmem(s, hex, j);
136}
137
138int trace_seq_path(struct trace_seq *s, struct path *path)
139{
140 unsigned char *p;
141
142 if (s->len >= (PAGE_SIZE - 1))
143 return 0;
144 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
145 if (!IS_ERR(p)) {
146 p = mangle_path(s->buffer + s->len, p, "\n");
147 if (p) {
148 s->len = p - s->buffer;
149 return 1;
150 }
151 } else {
152 s->buffer[s->len++] = '?';
153 return 1;
154 }
155
156 return 0;
157}
158
159#ifdef CONFIG_KRETPROBES
160static inline const char *kretprobed(const char *name)
161{
162 static const char tramp_name[] = "kretprobe_trampoline";
163 int size = sizeof(tramp_name);
164
165 if (strncmp(tramp_name, name, size) == 0)
166 return "[unknown/kretprobe'd]";
167 return name;
168}
169#else
170static inline const char *kretprobed(const char *name)
171{
172 return name;
173}
174#endif /* CONFIG_KRETPROBES */
175
176static int
177seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
178{
179#ifdef CONFIG_KALLSYMS
180 char str[KSYM_SYMBOL_LEN];
181 const char *name;
182
183 kallsyms_lookup(address, NULL, NULL, NULL, str);
184
185 name = kretprobed(str);
186
187 return trace_seq_printf(s, fmt, name);
188#endif
189 return 1;
190}
191
192static int
193seq_print_sym_offset(struct trace_seq *s, const char *fmt,
194 unsigned long address)
195{
196#ifdef CONFIG_KALLSYMS
197 char str[KSYM_SYMBOL_LEN];
198 const char *name;
199
200 sprint_symbol(str, address);
201 name = kretprobed(str);
202
203 return trace_seq_printf(s, fmt, name);
204#endif
205 return 1;
206}
207
208#ifndef CONFIG_64BIT
209# define IP_FMT "%08lx"
210#else
211# define IP_FMT "%016lx"
212#endif
213
214int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
215 unsigned long ip, unsigned long sym_flags)
216{
217 struct file *file = NULL;
218 unsigned long vmstart = 0;
219 int ret = 1;
220
221 if (mm) {
222 const struct vm_area_struct *vma;
223
224 down_read(&mm->mmap_sem);
225 vma = find_vma(mm, ip);
226 if (vma) {
227 file = vma->vm_file;
228 vmstart = vma->vm_start;
229 }
230 if (file) {
231 ret = trace_seq_path(s, &file->f_path);
232 if (ret)
233 ret = trace_seq_printf(s, "[+0x%lx]",
234 ip - vmstart);
235 }
236 up_read(&mm->mmap_sem);
237 }
238 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
239 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
240 return ret;
241}
242
243int
244seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
245 unsigned long sym_flags)
246{
247 struct mm_struct *mm = NULL;
248 int ret = 1;
249 unsigned int i;
250
251 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
252 struct task_struct *task;
253 /*
254 * we do the lookup on the thread group leader,
255 * since individual threads might have already quit!
256 */
257 rcu_read_lock();
258 task = find_task_by_vpid(entry->ent.tgid);
259 if (task)
260 mm = get_task_mm(task);
261 rcu_read_unlock();
262 }
263
264 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
265 unsigned long ip = entry->caller[i];
266
267 if (ip == ULONG_MAX || !ret)
268 break;
269 if (i && ret)
270 ret = trace_seq_puts(s, " <- ");
271 if (!ip) {
272 if (ret)
273 ret = trace_seq_puts(s, "??");
274 continue;
275 }
276 if (!ret)
277 break;
278 if (ret)
279 ret = seq_print_user_ip(s, mm, ip, sym_flags);
280 }
281
282 if (mm)
283 mmput(mm);
284 return ret;
285}
286
287int
288seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
289{
290 int ret;
291
292 if (!ip)
293 return trace_seq_printf(s, "0");
294
295 if (sym_flags & TRACE_ITER_SYM_OFFSET)
296 ret = seq_print_sym_offset(s, "%s", ip);
297 else
298 ret = seq_print_sym_short(s, "%s", ip);
299
300 if (!ret)
301 return 0;
302
303 if (sym_flags & TRACE_ITER_SYM_ADDR)
304 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
305 return ret;
306}
307
308static int
309lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
310{
311 int hardirq, softirq;
312 char comm[TASK_COMM_LEN];
313
314 trace_find_cmdline(entry->pid, comm);
315 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
316 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
317
318 if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
319 comm, entry->pid, cpu,
320 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
321 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
322 'X' : '.',
323 (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
324 'N' : '.',
325 (hardirq && softirq) ? 'H' :
326 hardirq ? 'h' : softirq ? 's' : '.'))
327 return 0;
328
329 if (entry->preempt_count)
330 return trace_seq_printf(s, "%x", entry->preempt_count);
331 return trace_seq_puts(s, ".");
332}
333
334static unsigned long preempt_mark_thresh = 100;
335
336static int
337lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
338 unsigned long rel_usecs)
339{
340 return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
341 rel_usecs > preempt_mark_thresh ? '!' :
342 rel_usecs > 1 ? '+' : ' ');
343}
344
345int trace_print_context(struct trace_iterator *iter)
346{
347 struct trace_seq *s = &iter->seq;
348 struct trace_entry *entry = iter->ent;
349 unsigned long long t = ns2usecs(iter->ts);
350 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
351 unsigned long secs = (unsigned long)t;
352 char comm[TASK_COMM_LEN];
353
354 trace_find_cmdline(entry->pid, comm);
355
356 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
357 comm, entry->pid, iter->cpu, secs, usec_rem);
358}
359
360int trace_print_lat_context(struct trace_iterator *iter)
361{
362 u64 next_ts;
363 int ret;
364 struct trace_seq *s = &iter->seq;
365 struct trace_entry *entry = iter->ent,
366 *next_entry = trace_find_next_entry(iter, NULL,
367 &next_ts);
368 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
369 unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
370 unsigned long rel_usecs;
371
372 if (!next_entry)
373 next_ts = iter->ts;
374 rel_usecs = ns2usecs(next_ts - iter->ts);
375
376 if (verbose) {
377 char comm[TASK_COMM_LEN];
378
379 trace_find_cmdline(entry->pid, comm);
380
381 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
382 " %ld.%03ldms (+%ld.%03ldms): ", comm,
383 entry->pid, iter->cpu, entry->flags,
384 entry->preempt_count, iter->idx,
385 ns2usecs(iter->ts),
386 abs_usecs / USEC_PER_MSEC,
387 abs_usecs % USEC_PER_MSEC,
388 rel_usecs / USEC_PER_MSEC,
389 rel_usecs % USEC_PER_MSEC);
390 } else {
391 ret = lat_print_generic(s, entry, iter->cpu);
392 if (ret)
393 ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
394 }
395
396 return ret;
397}
398
399static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
400
401static int task_state_char(unsigned long state)
402{
403 int bit = state ? __ffs(state) + 1 : 0;
404
405 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
406}
407
408/**
409 * ftrace_find_event - find a registered event
410 * @type: the type of event to look for
411 *
412 * Returns an event of type @type otherwise NULL
413 */
414struct trace_event *ftrace_find_event(int type)
415{
416 struct trace_event *event;
417 struct hlist_node *n;
418 unsigned key;
419
420 key = type & (EVENT_HASHSIZE - 1);
421
422 hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
423 if (event->type == type)
424 return event;
425 }
426
427 return NULL;
428}
429
430/**
431 * register_ftrace_event - register output for an event type
432 * @event: the event type to register
433 *
434 * Event types are stored in a hash and this hash is used to
435 * find a way to print an event. If the @event->type is set
436 * then it will use that type, otherwise it will assign a
437 * type to use.
438 *
439 * If you assign your own type, please make sure it is added
440 * to the trace_type enum in trace.h, to avoid collisions
441 * with the dynamic types.
442 *
443 * Returns the event type number or zero on error.
444 */
445int register_ftrace_event(struct trace_event *event)
446{
447 unsigned key;
448 int ret = 0;
449
450 mutex_lock(&trace_event_mutex);
451
452 if (!event->type)
453 event->type = next_event_type++;
454 else if (event->type > __TRACE_LAST_TYPE) {
455 printk(KERN_WARNING "Need to add type to trace.h\n");
456 WARN_ON(1);
457 }
458
459 if (ftrace_find_event(event->type))
460 goto out;
461
462 if (event->trace == NULL)
463 event->trace = trace_nop_print;
464 if (event->raw == NULL)
465 event->raw = trace_nop_print;
466 if (event->hex == NULL)
467 event->hex = trace_nop_print;
468 if (event->binary == NULL)
469 event->binary = trace_nop_print;
470
471 key = event->type & (EVENT_HASHSIZE - 1);
472
473 hlist_add_head_rcu(&event->node, &event_hash[key]);
474
475 ret = event->type;
476 out:
477 mutex_unlock(&trace_event_mutex);
478
479 return ret;
480}
481
482/**
483 * unregister_ftrace_event - remove a no longer used event
484 * @event: the event to remove
485 */
486int unregister_ftrace_event(struct trace_event *event)
487{
488 mutex_lock(&trace_event_mutex);
489 hlist_del(&event->node);
490 mutex_unlock(&trace_event_mutex);
491
492 return 0;
493}
494
495/*
496 * Standard events
497 */
498
499enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags)
500{
501 return TRACE_TYPE_HANDLED;
502}
503
504/* TRACE_FN */
505static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
506{
507 struct ftrace_entry *field;
508 struct trace_seq *s = &iter->seq;
509
510 trace_assign_type(field, iter->ent);
511
512 if (!seq_print_ip_sym(s, field->ip, flags))
513 goto partial;
514
515 if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
516 if (!trace_seq_printf(s, " <-"))
517 goto partial;
518 if (!seq_print_ip_sym(s,
519 field->parent_ip,
520 flags))
521 goto partial;
522 }
523 if (!trace_seq_printf(s, "\n"))
524 goto partial;
525
526 return TRACE_TYPE_HANDLED;
527
528 partial:
529 return TRACE_TYPE_PARTIAL_LINE;
530}
531
532static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
533{
534 struct ftrace_entry *field;
535
536 trace_assign_type(field, iter->ent);
537
538 if (!trace_seq_printf(&iter->seq, "%lx %lx\n",
539 field->ip,
540 field->parent_ip))
541 return TRACE_TYPE_PARTIAL_LINE;
542
543 return TRACE_TYPE_HANDLED;
544}
545
546static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
547{
548 struct ftrace_entry *field;
549 struct trace_seq *s = &iter->seq;
550
551 trace_assign_type(field, iter->ent);
552
553 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
554 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
555
556 return TRACE_TYPE_HANDLED;
557}
558
559static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
560{
561 struct ftrace_entry *field;
562 struct trace_seq *s = &iter->seq;
563
564 trace_assign_type(field, iter->ent);
565
566 SEQ_PUT_FIELD_RET(s, field->ip);
567 SEQ_PUT_FIELD_RET(s, field->parent_ip);
568
569 return TRACE_TYPE_HANDLED;
570}
571
572static struct trace_event trace_fn_event = {
573 .type = TRACE_FN,
574 .trace = trace_fn_trace,
575 .raw = trace_fn_raw,
576 .hex = trace_fn_hex,
577 .binary = trace_fn_bin,
578};
579
580/* TRACE_CTX an TRACE_WAKE */
581static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
582 char *delim)
583{
584 struct ctx_switch_entry *field;
585 char comm[TASK_COMM_LEN];
586 int S, T;
587
588
589 trace_assign_type(field, iter->ent);
590
591 T = task_state_char(field->next_state);
592 S = task_state_char(field->prev_state);
593 trace_find_cmdline(field->next_pid, comm);
594 if (!trace_seq_printf(&iter->seq,
595 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
596 field->prev_pid,
597 field->prev_prio,
598 S, delim,
599 field->next_cpu,
600 field->next_pid,
601 field->next_prio,
602 T, comm))
603 return TRACE_TYPE_PARTIAL_LINE;
604
605 return TRACE_TYPE_HANDLED;
606}
607
608static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags)
609{
610 return trace_ctxwake_print(iter, "==>");
611}
612
613static enum print_line_t trace_wake_print(struct trace_iterator *iter,
614 int flags)
615{
616 return trace_ctxwake_print(iter, " +");
617}
618
619static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
620{
621 struct ctx_switch_entry *field;
622 int T;
623
624 trace_assign_type(field, iter->ent);
625
626 if (!S)
627 task_state_char(field->prev_state);
628 T = task_state_char(field->next_state);
629 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
630 field->prev_pid,
631 field->prev_prio,
632 S,
633 field->next_cpu,
634 field->next_pid,
635 field->next_prio,
636 T))
637 return TRACE_TYPE_PARTIAL_LINE;
638
639 return TRACE_TYPE_HANDLED;
640}
641
642static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags)
643{
644 return trace_ctxwake_raw(iter, 0);
645}
646
647static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags)
648{
649 return trace_ctxwake_raw(iter, '+');
650}
651
652
653static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
654{
655 struct ctx_switch_entry *field;
656 struct trace_seq *s = &iter->seq;
657 int T;
658
659 trace_assign_type(field, iter->ent);
660
661 if (!S)
662 task_state_char(field->prev_state);
663 T = task_state_char(field->next_state);
664
665 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
666 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
667 SEQ_PUT_HEX_FIELD_RET(s, S);
668 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
669 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
670 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
671 SEQ_PUT_HEX_FIELD_RET(s, T);
672
673 return TRACE_TYPE_HANDLED;
674}
675
676static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags)
677{
678 return trace_ctxwake_hex(iter, 0);
679}
680
681static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags)
682{
683 return trace_ctxwake_hex(iter, '+');
684}
685
686static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
687 int flags)
688{
689 struct ctx_switch_entry *field;
690 struct trace_seq *s = &iter->seq;
691
692 trace_assign_type(field, iter->ent);
693
694 SEQ_PUT_FIELD_RET(s, field->prev_pid);
695 SEQ_PUT_FIELD_RET(s, field->prev_prio);
696 SEQ_PUT_FIELD_RET(s, field->prev_state);
697 SEQ_PUT_FIELD_RET(s, field->next_pid);
698 SEQ_PUT_FIELD_RET(s, field->next_prio);
699 SEQ_PUT_FIELD_RET(s, field->next_state);
700
701 return TRACE_TYPE_HANDLED;
702}
703
704static struct trace_event trace_ctx_event = {
705 .type = TRACE_CTX,
706 .trace = trace_ctx_print,
707 .raw = trace_ctx_raw,
708 .hex = trace_ctx_hex,
709 .binary = trace_ctxwake_bin,
710};
711
712static struct trace_event trace_wake_event = {
713 .type = TRACE_WAKE,
714 .trace = trace_wake_print,
715 .raw = trace_wake_raw,
716 .hex = trace_wake_hex,
717 .binary = trace_ctxwake_bin,
718};
719
720/* TRACE_SPECIAL */
721static enum print_line_t trace_special_print(struct trace_iterator *iter,
722 int flags)
723{
724 struct special_entry *field;
725
726 trace_assign_type(field, iter->ent);
727
728 if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
729 field->arg1,
730 field->arg2,
731 field->arg3))
732 return TRACE_TYPE_PARTIAL_LINE;
733
734 return TRACE_TYPE_HANDLED;
735}
736
737static enum print_line_t trace_special_hex(struct trace_iterator *iter,
738 int flags)
739{
740 struct special_entry *field;
741 struct trace_seq *s = &iter->seq;
742
743 trace_assign_type(field, iter->ent);
744
745 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
746 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
747 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
748
749 return TRACE_TYPE_HANDLED;
750}
751
752static enum print_line_t trace_special_bin(struct trace_iterator *iter,
753 int flags)
754{
755 struct special_entry *field;
756 struct trace_seq *s = &iter->seq;
757
758 trace_assign_type(field, iter->ent);
759
760 SEQ_PUT_FIELD_RET(s, field->arg1);
761 SEQ_PUT_FIELD_RET(s, field->arg2);
762 SEQ_PUT_FIELD_RET(s, field->arg3);
763
764 return TRACE_TYPE_HANDLED;
765}
766
767static struct trace_event trace_special_event = {
768 .type = TRACE_SPECIAL,
769 .trace = trace_special_print,
770 .raw = trace_special_print,
771 .hex = trace_special_hex,
772 .binary = trace_special_bin,
773};
774
775/* TRACE_STACK */
776
777static enum print_line_t trace_stack_print(struct trace_iterator *iter,
778 int flags)
779{
780 struct stack_entry *field;
781 struct trace_seq *s = &iter->seq;
782 int i;
783
784 trace_assign_type(field, iter->ent);
785
786 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
787 if (i) {
788 if (!trace_seq_puts(s, " <= "))
789 goto partial;
790
791 if (!seq_print_ip_sym(s, field->caller[i], flags))
792 goto partial;
793 }
794 if (!trace_seq_puts(s, "\n"))
795 goto partial;
796 }
797
798 return TRACE_TYPE_HANDLED;
799
800 partial:
801 return TRACE_TYPE_PARTIAL_LINE;
802}
803
804static struct trace_event trace_stack_event = {
805 .type = TRACE_STACK,
806 .trace = trace_stack_print,
807 .raw = trace_special_print,
808 .hex = trace_special_hex,
809 .binary = trace_special_bin,
810};
811
812/* TRACE_USER_STACK */
813static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
814 int flags)
815{
816 struct userstack_entry *field;
817 struct trace_seq *s = &iter->seq;
818
819 trace_assign_type(field, iter->ent);
820
821 if (!seq_print_userip_objs(field, s, flags))
822 goto partial;
823
824 if (!trace_seq_putc(s, '\n'))
825 goto partial;
826
827 return TRACE_TYPE_HANDLED;
828
829 partial:
830 return TRACE_TYPE_PARTIAL_LINE;
831}
832
833static struct trace_event trace_user_stack_event = {
834 .type = TRACE_USER_STACK,
835 .trace = trace_user_stack_print,
836 .raw = trace_special_print,
837 .hex = trace_special_hex,
838 .binary = trace_special_bin,
839};
840
841/* TRACE_BPRINT */
842static enum print_line_t
843trace_bprint_print(struct trace_iterator *iter, int flags)
844{
845 struct trace_entry *entry = iter->ent;
846 struct trace_seq *s = &iter->seq;
847 struct bprint_entry *field;
848
849 trace_assign_type(field, entry);
850
851 if (!seq_print_ip_sym(s, field->ip, flags))
852 goto partial;
853
854 if (!trace_seq_puts(s, ": "))
855 goto partial;
856
857 if (!trace_seq_bprintf(s, field->fmt, field->buf))
858 goto partial;
859
860 return TRACE_TYPE_HANDLED;
861
862 partial:
863 return TRACE_TYPE_PARTIAL_LINE;
864}
865
866
867static enum print_line_t
868trace_bprint_raw(struct trace_iterator *iter, int flags)
869{
870 struct bprint_entry *field;
871 struct trace_seq *s = &iter->seq;
872
873 trace_assign_type(field, iter->ent);
874
875 if (!trace_seq_printf(s, ": %lx : ", field->ip))
876 goto partial;
877
878 if (!trace_seq_bprintf(s, field->fmt, field->buf))
879 goto partial;
880
881 return TRACE_TYPE_HANDLED;
882
883 partial:
884 return TRACE_TYPE_PARTIAL_LINE;
885}
886
887
888static struct trace_event trace_bprint_event = {
889 .type = TRACE_BPRINT,
890 .trace = trace_bprint_print,
891 .raw = trace_bprint_raw,
892};
893
894/* TRACE_PRINT */
895static enum print_line_t trace_print_print(struct trace_iterator *iter,
896 int flags)
897{
898 struct print_entry *field;
899 struct trace_seq *s = &iter->seq;
900
901 trace_assign_type(field, iter->ent);
902
903 if (!seq_print_ip_sym(s, field->ip, flags))
904 goto partial;
905
906 if (!trace_seq_printf(s, ": %s", field->buf))
907 goto partial;
908
909 return TRACE_TYPE_HANDLED;
910
911 partial:
912 return TRACE_TYPE_PARTIAL_LINE;
913}
914
915static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
916{
917 struct print_entry *field;
918
919 trace_assign_type(field, iter->ent);
920
921 if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
922 goto partial;
923
924 return TRACE_TYPE_HANDLED;
925
926 partial:
927 return TRACE_TYPE_PARTIAL_LINE;
928}
929
930static struct trace_event trace_print_event = {
931 .type = TRACE_PRINT,
932 .trace = trace_print_print,
933 .raw = trace_print_raw,
934};
935
936
937static struct trace_event *events[] __initdata = {
938 &trace_fn_event,
939 &trace_ctx_event,
940 &trace_wake_event,
941 &trace_special_event,
942 &trace_stack_event,
943 &trace_user_stack_event,
944 &trace_bprint_event,
945 &trace_print_event,
946 NULL
947};
948
949__init static int init_events(void)
950{
951 struct trace_event *event;
952 int i, ret;
953
954 for (i = 0; events[i]; i++) {
955 event = events[i];
956
957 ret = register_ftrace_event(event);
958 if (!ret) {
959 printk(KERN_WARNING "event %d failed to register\n",
960 event->type);
961 WARN_ON_ONCE(1);
962 }
963 }
964
965 return 0;
966}
967device_initcall(init_events);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
new file mode 100644
index 000000000000..3b90e6ade1aa
--- /dev/null
+++ b/kernel/trace/trace_output.h
@@ -0,0 +1,63 @@
1#ifndef __TRACE_EVENTS_H
2#define __TRACE_EVENTS_H
3
4#include "trace.h"
5
6typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
7 int flags);
8
9struct trace_event {
10 struct hlist_node node;
11 int type;
12 trace_print_func trace;
13 trace_print_func raw;
14 trace_print_func hex;
15 trace_print_func binary;
16};
17
18extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3)));
20extern int
21trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
22extern int
23seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
24 unsigned long sym_flags);
25extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
26 size_t cnt);
27int trace_seq_puts(struct trace_seq *s, const char *str);
28int trace_seq_putc(struct trace_seq *s, unsigned char c);
29int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
30int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
31int trace_seq_path(struct trace_seq *s, struct path *path);
32int seq_print_userip_objs(const struct userstack_entry *entry,
33 struct trace_seq *s, unsigned long sym_flags);
34int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
35 unsigned long ip, unsigned long sym_flags);
36
37int trace_print_context(struct trace_iterator *iter);
38int trace_print_lat_context(struct trace_iterator *iter);
39
40struct trace_event *ftrace_find_event(int type);
41int register_ftrace_event(struct trace_event *event);
42int unregister_ftrace_event(struct trace_event *event);
43
44enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags);
45
46#define MAX_MEMHEX_BYTES 8
47#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
48
49#define SEQ_PUT_FIELD_RET(s, x) \
50do { \
51 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
52 return TRACE_TYPE_PARTIAL_LINE; \
53} while (0)
54
55#define SEQ_PUT_HEX_FIELD_RET(s, x) \
56do { \
57 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
58 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
59 return TRACE_TYPE_PARTIAL_LINE; \
60} while (0)
61
62#endif
63
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 7bda248daf55..bae791ebcc51 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -11,15 +11,113 @@
11 11
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
14#include <linux/ftrace.h> 14#include <trace/power.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/module.h> 16#include <linux/module.h>
17 17
18#include "trace.h" 18#include "trace.h"
19#include "trace_output.h"
19 20
20static struct trace_array *power_trace; 21static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled; 22static int __read_mostly trace_power_enabled;
22 23
24static void probe_power_start(struct power_trace *it, unsigned int type,
25 unsigned int level)
26{
27 if (!trace_power_enabled)
28 return;
29
30 memset(it, 0, sizeof(struct power_trace));
31 it->state = level;
32 it->type = type;
33 it->stamp = ktime_get();
34}
35
36
37static void probe_power_end(struct power_trace *it)
38{
39 struct ring_buffer_event *event;
40 struct trace_power *entry;
41 struct trace_array_cpu *data;
42 struct trace_array *tr = power_trace;
43
44 if (!trace_power_enabled)
45 return;
46
47 preempt_disable();
48 it->end = ktime_get();
49 data = tr->data[smp_processor_id()];
50
51 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
52 sizeof(*entry), 0, 0);
53 if (!event)
54 goto out;
55 entry = ring_buffer_event_data(event);
56 entry->state_data = *it;
57 trace_buffer_unlock_commit(tr, event, 0, 0);
58 out:
59 preempt_enable();
60}
61
62static void probe_power_mark(struct power_trace *it, unsigned int type,
63 unsigned int level)
64{
65 struct ring_buffer_event *event;
66 struct trace_power *entry;
67 struct trace_array_cpu *data;
68 struct trace_array *tr = power_trace;
69
70 if (!trace_power_enabled)
71 return;
72
73 memset(it, 0, sizeof(struct power_trace));
74 it->state = level;
75 it->type = type;
76 it->stamp = ktime_get();
77 preempt_disable();
78 it->end = it->stamp;
79 data = tr->data[smp_processor_id()];
80
81 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
82 sizeof(*entry), 0, 0);
83 if (!event)
84 goto out;
85 entry = ring_buffer_event_data(event);
86 entry->state_data = *it;
87 trace_buffer_unlock_commit(tr, event, 0, 0);
88 out:
89 preempt_enable();
90}
91
92static int tracing_power_register(void)
93{
94 int ret;
95
96 ret = register_trace_power_start(probe_power_start);
97 if (ret) {
98 pr_info("power trace: Couldn't activate tracepoint"
99 " probe to trace_power_start\n");
100 return ret;
101 }
102 ret = register_trace_power_end(probe_power_end);
103 if (ret) {
104 pr_info("power trace: Couldn't activate tracepoint"
105 " probe to trace_power_end\n");
106 goto fail_start;
107 }
108 ret = register_trace_power_mark(probe_power_mark);
109 if (ret) {
110 pr_info("power trace: Couldn't activate tracepoint"
111 " probe to trace_power_mark\n");
112 goto fail_end;
113 }
114 return ret;
115fail_end:
116 unregister_trace_power_end(probe_power_end);
117fail_start:
118 unregister_trace_power_start(probe_power_start);
119 return ret;
120}
23 121
24static void start_power_trace(struct trace_array *tr) 122static void start_power_trace(struct trace_array *tr)
25{ 123{
@@ -31,6 +129,14 @@ static void stop_power_trace(struct trace_array *tr)
31 trace_power_enabled = 0; 129 trace_power_enabled = 0;
32} 130}
33 131
132static void power_trace_reset(struct trace_array *tr)
133{
134 trace_power_enabled = 0;
135 unregister_trace_power_start(probe_power_start);
136 unregister_trace_power_end(probe_power_end);
137 unregister_trace_power_mark(probe_power_mark);
138}
139
34 140
35static int power_trace_init(struct trace_array *tr) 141static int power_trace_init(struct trace_array *tr)
36{ 142{
@@ -38,6 +144,7 @@ static int power_trace_init(struct trace_array *tr)
38 power_trace = tr; 144 power_trace = tr;
39 145
40 trace_power_enabled = 1; 146 trace_power_enabled = 1;
147 tracing_power_register();
41 148
42 for_each_cpu(cpu, cpu_possible_mask) 149 for_each_cpu(cpu, cpu_possible_mask)
43 tracing_reset(tr, cpu); 150 tracing_reset(tr, cpu);
@@ -85,7 +192,7 @@ static struct tracer power_tracer __read_mostly =
85 .init = power_trace_init, 192 .init = power_trace_init,
86 .start = start_power_trace, 193 .start = start_power_trace,
87 .stop = stop_power_trace, 194 .stop = stop_power_trace,
88 .reset = stop_power_trace, 195 .reset = power_trace_reset,
89 .print_line = power_print_line, 196 .print_line = power_print_line,
90}; 197};
91 198
@@ -94,86 +201,3 @@ static int init_power_trace(void)
94 return register_tracer(&power_tracer); 201 return register_tracer(&power_tracer);
95} 202}
96device_initcall(init_power_trace); 203device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
new file mode 100644
index 000000000000..486785214e3e
--- /dev/null
+++ b/kernel/trace/trace_printk.c
@@ -0,0 +1,270 @@
1/*
2 * trace binary printk
3 *
4 * Copyright (C) 2008 Lai Jiangshan <laijs@cn.fujitsu.com>
5 *
6 */
7#include <linux/seq_file.h>
8#include <linux/debugfs.h>
9#include <linux/uaccess.h>
10#include <linux/kernel.h>
11#include <linux/ftrace.h>
12#include <linux/string.h>
13#include <linux/module.h>
14#include <linux/marker.h>
15#include <linux/mutex.h>
16#include <linux/ctype.h>
17#include <linux/list.h>
18#include <linux/slab.h>
19#include <linux/fs.h>
20
21#include "trace.h"
22
23#ifdef CONFIG_MODULES
24
25/*
26 * modules trace_printk()'s formats are autosaved in struct trace_bprintk_fmt
27 * which are queued on trace_bprintk_fmt_list.
28 */
29static LIST_HEAD(trace_bprintk_fmt_list);
30
31/* serialize accesses to trace_bprintk_fmt_list */
32static DEFINE_MUTEX(btrace_mutex);
33
34struct trace_bprintk_fmt {
35 struct list_head list;
36 char fmt[0];
37};
38
39static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
40{
41 struct trace_bprintk_fmt *pos;
42 list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
43 if (!strcmp(pos->fmt, fmt))
44 return pos;
45 }
46 return NULL;
47}
48
49static
50void hold_module_trace_bprintk_format(const char **start, const char **end)
51{
52 const char **iter;
53
54 mutex_lock(&btrace_mutex);
55 for (iter = start; iter < end; iter++) {
56 struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
57 if (tb_fmt) {
58 *iter = tb_fmt->fmt;
59 continue;
60 }
61
62 tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt)
63 + strlen(*iter) + 1, GFP_KERNEL);
64 if (tb_fmt) {
65 list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
66 strcpy(tb_fmt->fmt, *iter);
67 *iter = tb_fmt->fmt;
68 } else
69 *iter = NULL;
70 }
71 mutex_unlock(&btrace_mutex);
72}
73
74static int module_trace_bprintk_format_notify(struct notifier_block *self,
75 unsigned long val, void *data)
76{
77 struct module *mod = data;
78 if (mod->num_trace_bprintk_fmt) {
79 const char **start = mod->trace_bprintk_fmt_start;
80 const char **end = start + mod->num_trace_bprintk_fmt;
81
82 if (val == MODULE_STATE_COMING)
83 hold_module_trace_bprintk_format(start, end);
84 }
85 return 0;
86}
87
88#else /* !CONFIG_MODULES */
89__init static int
90module_trace_bprintk_format_notify(struct notifier_block *self,
91 unsigned long val, void *data)
92{
93 return 0;
94}
95#endif /* CONFIG_MODULES */
96
97
98__initdata_or_module static
99struct notifier_block module_trace_bprintk_format_nb = {
100 .notifier_call = module_trace_bprintk_format_notify,
101};
102
103int __trace_bprintk(unsigned long ip, const char *fmt, ...)
104 {
105 int ret;
106 va_list ap;
107
108 if (unlikely(!fmt))
109 return 0;
110
111 if (!(trace_flags & TRACE_ITER_PRINTK))
112 return 0;
113
114 va_start(ap, fmt);
115 ret = trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap);
116 va_end(ap);
117 return ret;
118}
119EXPORT_SYMBOL_GPL(__trace_bprintk);
120
121int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap)
122 {
123 if (unlikely(!fmt))
124 return 0;
125
126 if (!(trace_flags & TRACE_ITER_PRINTK))
127 return 0;
128
129 return trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap);
130}
131EXPORT_SYMBOL_GPL(__ftrace_vbprintk);
132
133int __trace_printk(unsigned long ip, const char *fmt, ...)
134{
135 int ret;
136 va_list ap;
137
138 if (!(trace_flags & TRACE_ITER_PRINTK))
139 return 0;
140
141 va_start(ap, fmt);
142 ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
143 va_end(ap);
144 return ret;
145}
146EXPORT_SYMBOL_GPL(__trace_printk);
147
148int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
149{
150 if (!(trace_flags & TRACE_ITER_PRINTK))
151 return 0;
152
153 return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
154}
155EXPORT_SYMBOL_GPL(__ftrace_vprintk);
156
157static void *
158t_next(struct seq_file *m, void *v, loff_t *pos)
159{
160 const char **fmt = m->private;
161 const char **next = fmt;
162
163 (*pos)++;
164
165 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
166 return NULL;
167
168 next = fmt;
169 m->private = ++next;
170
171 return fmt;
172}
173
174static void *t_start(struct seq_file *m, loff_t *pos)
175{
176 return t_next(m, NULL, pos);
177}
178
179static int t_show(struct seq_file *m, void *v)
180{
181 const char **fmt = v;
182 const char *str = *fmt;
183 int i;
184
185 seq_printf(m, "0x%lx : \"", (unsigned long)fmt);
186
187 /*
188 * Tabs and new lines need to be converted.
189 */
190 for (i = 0; str[i]; i++) {
191 switch (str[i]) {
192 case '\n':
193 seq_puts(m, "\\n");
194 break;
195 case '\t':
196 seq_puts(m, "\\t");
197 break;
198 case '\\':
199 seq_puts(m, "\\");
200 break;
201 case '"':
202 seq_puts(m, "\\\"");
203 break;
204 default:
205 seq_putc(m, str[i]);
206 }
207 }
208 seq_puts(m, "\"\n");
209
210 return 0;
211}
212
213static void t_stop(struct seq_file *m, void *p)
214{
215}
216
217static const struct seq_operations show_format_seq_ops = {
218 .start = t_start,
219 .next = t_next,
220 .show = t_show,
221 .stop = t_stop,
222};
223
224static int
225ftrace_formats_open(struct inode *inode, struct file *file)
226{
227 int ret;
228
229 ret = seq_open(file, &show_format_seq_ops);
230 if (!ret) {
231 struct seq_file *m = file->private_data;
232
233 m->private = __start___trace_bprintk_fmt;
234 }
235 return ret;
236}
237
238static const struct file_operations ftrace_formats_fops = {
239 .open = ftrace_formats_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
242 .release = seq_release,
243};
244
245static __init int init_trace_printk_function_export(void)
246{
247 struct dentry *d_tracer;
248 struct dentry *entry;
249
250 d_tracer = tracing_init_dentry();
251 if (!d_tracer)
252 return 0;
253
254 entry = debugfs_create_file("printk_formats", 0444, d_tracer,
255 NULL, &ftrace_formats_fops);
256 if (!entry)
257 pr_warning("Could not create debugfs "
258 "'printk_formats' entry\n");
259
260 return 0;
261}
262
263fs_initcall(init_trace_printk_function_export);
264
265static __init int init_trace_printk(void)
266{
267 return register_module_notifier(&module_trace_bprintk_format_nb);
268}
269
270early_initcall(init_trace_printk);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index df175cb4564f..de35f200abd3 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -18,6 +18,7 @@ static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static int sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex); 20static DEFINE_MUTEX(sched_register_mutex);
21static int sched_stopped;
21 22
22static void 23static void
23probe_sched_switch(struct rq *__rq, struct task_struct *prev, 24probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -28,7 +29,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
28 int cpu; 29 int cpu;
29 int pc; 30 int pc;
30 31
31 if (!sched_ref) 32 if (!sched_ref || sched_stopped)
32 return; 33 return;
33 34
34 tracing_record_cmdline(prev); 35 tracing_record_cmdline(prev);
@@ -43,7 +44,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
43 data = ctx_trace->data[cpu]; 44 data = ctx_trace->data[cpu];
44 45
45 if (likely(!atomic_read(&data->disabled))) 46 if (likely(!atomic_read(&data->disabled)))
46 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc); 47 tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
47 48
48 local_irq_restore(flags); 49 local_irq_restore(flags);
49} 50}
@@ -66,7 +67,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
66 data = ctx_trace->data[cpu]; 67 data = ctx_trace->data[cpu];
67 68
68 if (likely(!atomic_read(&data->disabled))) 69 if (likely(!atomic_read(&data->disabled)))
69 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current, 70 tracing_sched_wakeup_trace(ctx_trace, wakee, current,
70 flags, pc); 71 flags, pc);
71 72
72 local_irq_restore(flags); 73 local_irq_restore(flags);
@@ -93,7 +94,7 @@ static int tracing_sched_register(void)
93 ret = register_trace_sched_switch(probe_sched_switch); 94 ret = register_trace_sched_switch(probe_sched_switch);
94 if (ret) { 95 if (ret) {
95 pr_info("sched trace: Couldn't activate tracepoint" 96 pr_info("sched trace: Couldn't activate tracepoint"
96 " probe to kernel_sched_schedule\n"); 97 " probe to kernel_sched_switch\n");
97 goto fail_deprobe_wake_new; 98 goto fail_deprobe_wake_new;
98 } 99 }
99 100
@@ -185,12 +186,6 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
185 ctx_trace = tr; 186 ctx_trace = tr;
186} 187}
187 188
188static void start_sched_trace(struct trace_array *tr)
189{
190 tracing_reset_online_cpus(tr);
191 tracing_start_sched_switch_record();
192}
193
194static void stop_sched_trace(struct trace_array *tr) 189static void stop_sched_trace(struct trace_array *tr)
195{ 190{
196 tracing_stop_sched_switch_record(); 191 tracing_stop_sched_switch_record();
@@ -199,7 +194,8 @@ static void stop_sched_trace(struct trace_array *tr)
199static int sched_switch_trace_init(struct trace_array *tr) 194static int sched_switch_trace_init(struct trace_array *tr)
200{ 195{
201 ctx_trace = tr; 196 ctx_trace = tr;
202 start_sched_trace(tr); 197 tracing_reset_online_cpus(tr);
198 tracing_start_sched_switch_record();
203 return 0; 199 return 0;
204} 200}
205 201
@@ -211,13 +207,12 @@ static void sched_switch_trace_reset(struct trace_array *tr)
211 207
212static void sched_switch_trace_start(struct trace_array *tr) 208static void sched_switch_trace_start(struct trace_array *tr)
213{ 209{
214 tracing_reset_online_cpus(tr); 210 sched_stopped = 0;
215 tracing_start_sched_switch();
216} 211}
217 212
218static void sched_switch_trace_stop(struct trace_array *tr) 213static void sched_switch_trace_stop(struct trace_array *tr)
219{ 214{
220 tracing_stop_sched_switch(); 215 sched_stopped = 1;
221} 216}
222 217
223static struct tracer sched_switch_trace __read_mostly = 218static struct tracer sched_switch_trace __read_mostly =
@@ -227,6 +222,7 @@ static struct tracer sched_switch_trace __read_mostly =
227 .reset = sched_switch_trace_reset, 222 .reset = sched_switch_trace_reset,
228 .start = sched_switch_trace_start, 223 .start = sched_switch_trace_start,
229 .stop = sched_switch_trace_stop, 224 .stop = sched_switch_trace_stop,
225 .wait_pipe = poll_wait_pipe,
230#ifdef CONFIG_FTRACE_SELFTEST 226#ifdef CONFIG_FTRACE_SELFTEST
231 .selftest = trace_selftest_startup_sched_switch, 227 .selftest = trace_selftest_startup_sched_switch,
232#endif 228#endif
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 42ae1e77b6b3..3c5ad6b2ec84 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -25,12 +25,15 @@ static int __read_mostly tracer_enabled;
25static struct task_struct *wakeup_task; 25static struct task_struct *wakeup_task;
26static int wakeup_cpu; 26static int wakeup_cpu;
27static unsigned wakeup_prio = -1; 27static unsigned wakeup_prio = -1;
28static int wakeup_rt;
28 29
29static raw_spinlock_t wakeup_lock = 30static raw_spinlock_t wakeup_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31 32
32static void __wakeup_reset(struct trace_array *tr); 33static void __wakeup_reset(struct trace_array *tr);
33 34
35static int save_lat_flag;
36
34#ifdef CONFIG_FUNCTION_TRACER 37#ifdef CONFIG_FUNCTION_TRACER
35/* 38/*
36 * irqsoff uses its own tracer function to keep the overhead down: 39 * irqsoff uses its own tracer function to keep the overhead down:
@@ -71,7 +74,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
71 if (task_cpu(wakeup_task) != cpu) 74 if (task_cpu(wakeup_task) != cpu)
72 goto unlock; 75 goto unlock;
73 76
74 trace_function(tr, data, ip, parent_ip, flags, pc); 77 trace_function(tr, ip, parent_ip, flags, pc);
75 78
76 unlock: 79 unlock:
77 __raw_spin_unlock(&wakeup_lock); 80 __raw_spin_unlock(&wakeup_lock);
@@ -151,7 +154,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
151 if (unlikely(!tracer_enabled || next != wakeup_task)) 154 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock; 155 goto out_unlock;
153 156
154 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 157 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
155 159
156 /* 160 /*
157 * usecs conversion is slow so we try to delay the conversion 161 * usecs conversion is slow so we try to delay the conversion
@@ -182,13 +186,10 @@ out:
182 186
183static void __wakeup_reset(struct trace_array *tr) 187static void __wakeup_reset(struct trace_array *tr)
184{ 188{
185 struct trace_array_cpu *data;
186 int cpu; 189 int cpu;
187 190
188 for_each_possible_cpu(cpu) { 191 for_each_possible_cpu(cpu)
189 data = tr->data[cpu];
190 tracing_reset(tr, cpu); 192 tracing_reset(tr, cpu);
191 }
192 193
193 wakeup_cpu = -1; 194 wakeup_cpu = -1;
194 wakeup_prio = -1; 195 wakeup_prio = -1;
@@ -213,6 +214,7 @@ static void wakeup_reset(struct trace_array *tr)
213static void 214static void
214probe_wakeup(struct rq *rq, struct task_struct *p, int success) 215probe_wakeup(struct rq *rq, struct task_struct *p, int success)
215{ 216{
217 struct trace_array_cpu *data;
216 int cpu = smp_processor_id(); 218 int cpu = smp_processor_id();
217 unsigned long flags; 219 unsigned long flags;
218 long disabled; 220 long disabled;
@@ -224,7 +226,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
224 tracing_record_cmdline(p); 226 tracing_record_cmdline(p);
225 tracing_record_cmdline(current); 227 tracing_record_cmdline(current);
226 228
227 if (likely(!rt_task(p)) || 229 if ((wakeup_rt && !rt_task(p)) ||
228 p->prio >= wakeup_prio || 230 p->prio >= wakeup_prio ||
229 p->prio >= current->prio) 231 p->prio >= current->prio)
230 return; 232 return;
@@ -252,9 +254,10 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
252 254
253 local_save_flags(flags); 255 local_save_flags(flags);
254 256
255 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 257 data = wakeup_trace->data[wakeup_cpu];
256 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu], 258 data->preempt_timestamp = ftrace_now(cpu);
257 CALLER_ADDR1, CALLER_ADDR2, flags, pc); 259 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
260 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
258 261
259out_locked: 262out_locked:
260 __raw_spin_unlock(&wakeup_lock); 263 __raw_spin_unlock(&wakeup_lock);
@@ -262,12 +265,6 @@ out:
262 atomic_dec(&wakeup_trace->data[cpu]->disabled); 265 atomic_dec(&wakeup_trace->data[cpu]->disabled);
263} 266}
264 267
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
271static void start_wakeup_tracer(struct trace_array *tr) 268static void start_wakeup_tracer(struct trace_array *tr)
272{ 269{
273 int ret; 270 int ret;
@@ -289,7 +286,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
289 ret = register_trace_sched_switch(probe_wakeup_sched_switch); 286 ret = register_trace_sched_switch(probe_wakeup_sched_switch);
290 if (ret) { 287 if (ret) {
291 pr_info("sched trace: Couldn't activate tracepoint" 288 pr_info("sched trace: Couldn't activate tracepoint"
292 " probe to kernel_sched_schedule\n"); 289 " probe to kernel_sched_switch\n");
293 goto fail_deprobe_wake_new; 290 goto fail_deprobe_wake_new;
294 } 291 }
295 292
@@ -306,13 +303,10 @@ static void start_wakeup_tracer(struct trace_array *tr)
306 303
307 register_ftrace_function(&trace_ops); 304 register_ftrace_function(&trace_ops);
308 305
309 if (tracing_is_enabled()) { 306 if (tracing_is_enabled())
310 tracer_enabled = 1; 307 tracer_enabled = 1;
311 save_tracer_enabled = 1; 308 else
312 } else {
313 tracer_enabled = 0; 309 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
316 310
317 return; 311 return;
318fail_deprobe_wake_new: 312fail_deprobe_wake_new:
@@ -324,54 +318,54 @@ fail_deprobe:
324static void stop_wakeup_tracer(struct trace_array *tr) 318static void stop_wakeup_tracer(struct trace_array *tr)
325{ 319{
326 tracer_enabled = 0; 320 tracer_enabled = 0;
327 save_tracer_enabled = 0;
328 unregister_ftrace_function(&trace_ops); 321 unregister_ftrace_function(&trace_ops);
329 unregister_trace_sched_switch(probe_wakeup_sched_switch); 322 unregister_trace_sched_switch(probe_wakeup_sched_switch);
330 unregister_trace_sched_wakeup_new(probe_wakeup); 323 unregister_trace_sched_wakeup_new(probe_wakeup);
331 unregister_trace_sched_wakeup(probe_wakeup); 324 unregister_trace_sched_wakeup(probe_wakeup);
332} 325}
333 326
334static int wakeup_tracer_init(struct trace_array *tr) 327static int __wakeup_tracer_init(struct trace_array *tr)
335{ 328{
329 save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
330 trace_flags |= TRACE_ITER_LATENCY_FMT;
331
336 tracing_max_latency = 0; 332 tracing_max_latency = 0;
337 wakeup_trace = tr; 333 wakeup_trace = tr;
338 start_wakeup_tracer(tr); 334 start_wakeup_tracer(tr);
339 return 0; 335 return 0;
340} 336}
341 337
338static int wakeup_tracer_init(struct trace_array *tr)
339{
340 wakeup_rt = 0;
341 return __wakeup_tracer_init(tr);
342}
343
344static int wakeup_rt_tracer_init(struct trace_array *tr)
345{
346 wakeup_rt = 1;
347 return __wakeup_tracer_init(tr);
348}
349
342static void wakeup_tracer_reset(struct trace_array *tr) 350static void wakeup_tracer_reset(struct trace_array *tr)
343{ 351{
344 stop_wakeup_tracer(tr); 352 stop_wakeup_tracer(tr);
345 /* make sure we put back any tasks we are tracing */ 353 /* make sure we put back any tasks we are tracing */
346 wakeup_reset(tr); 354 wakeup_reset(tr);
355
356 if (!save_lat_flag)
357 trace_flags &= ~TRACE_ITER_LATENCY_FMT;
347} 358}
348 359
349static void wakeup_tracer_start(struct trace_array *tr) 360static void wakeup_tracer_start(struct trace_array *tr)
350{ 361{
351 wakeup_reset(tr); 362 wakeup_reset(tr);
352 tracer_enabled = 1; 363 tracer_enabled = 1;
353 save_tracer_enabled = 1;
354} 364}
355 365
356static void wakeup_tracer_stop(struct trace_array *tr) 366static void wakeup_tracer_stop(struct trace_array *tr)
357{ 367{
358 tracer_enabled = 0; 368 tracer_enabled = 0;
359 save_tracer_enabled = 0;
360}
361
362static void wakeup_tracer_open(struct trace_iterator *iter)
363{
364 /* stop the trace while dumping */
365 tracer_enabled = 0;
366}
367
368static void wakeup_tracer_close(struct trace_iterator *iter)
369{
370 /* forget about any processes we were recording */
371 if (save_tracer_enabled) {
372 wakeup_reset(iter->tr);
373 tracer_enabled = 1;
374 }
375} 369}
376 370
377static struct tracer wakeup_tracer __read_mostly = 371static struct tracer wakeup_tracer __read_mostly =
@@ -381,8 +375,20 @@ static struct tracer wakeup_tracer __read_mostly =
381 .reset = wakeup_tracer_reset, 375 .reset = wakeup_tracer_reset,
382 .start = wakeup_tracer_start, 376 .start = wakeup_tracer_start,
383 .stop = wakeup_tracer_stop, 377 .stop = wakeup_tracer_stop,
384 .open = wakeup_tracer_open, 378 .print_max = 1,
385 .close = wakeup_tracer_close, 379#ifdef CONFIG_FTRACE_SELFTEST
380 .selftest = trace_selftest_startup_wakeup,
381#endif
382};
383
384static struct tracer wakeup_rt_tracer __read_mostly =
385{
386 .name = "wakeup_rt",
387 .init = wakeup_rt_tracer_init,
388 .reset = wakeup_tracer_reset,
389 .start = wakeup_tracer_start,
390 .stop = wakeup_tracer_stop,
391 .wait_pipe = poll_wait_pipe,
386 .print_max = 1, 392 .print_max = 1,
387#ifdef CONFIG_FTRACE_SELFTEST 393#ifdef CONFIG_FTRACE_SELFTEST
388 .selftest = trace_selftest_startup_wakeup, 394 .selftest = trace_selftest_startup_wakeup,
@@ -397,6 +403,10 @@ __init static int init_wakeup_tracer(void)
397 if (ret) 403 if (ret)
398 return ret; 404 return ret;
399 405
406 ret = register_tracer(&wakeup_rt_tracer);
407 if (ret)
408 return ret;
409
400 return 0; 410 return 0;
401} 411}
402device_initcall(init_wakeup_tracer); 412device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index bc8e80a86bca..38856ba78a92 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1,5 +1,6 @@
1/* Include in trace.c */ 1/* Include in trace.c */
2 2
3#include <linux/stringify.h>
3#include <linux/kthread.h> 4#include <linux/kthread.h>
4#include <linux/delay.h> 5#include <linux/delay.h>
5 6
@@ -9,11 +10,12 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 10 case TRACE_FN:
10 case TRACE_CTX: 11 case TRACE_CTX:
11 case TRACE_WAKE: 12 case TRACE_WAKE:
12 case TRACE_CONT:
13 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT: 14 case TRACE_PRINT:
15 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
16 case TRACE_BRANCH: 16 case TRACE_BRANCH:
17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET:
17 return 1; 19 return 1;
18 } 20 }
19 return 0; 21 return 0;
@@ -99,9 +101,6 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
99 101
100#ifdef CONFIG_DYNAMIC_FTRACE 102#ifdef CONFIG_DYNAMIC_FTRACE
101 103
102#define __STR(x) #x
103#define STR(x) __STR(x)
104
105/* Test dynamic code modification and ftrace filters */ 104/* Test dynamic code modification and ftrace filters */
106int trace_selftest_startup_dynamic_tracing(struct tracer *trace, 105int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
107 struct trace_array *tr, 106 struct trace_array *tr,
@@ -125,17 +124,17 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
125 func(); 124 func();
126 125
127 /* 126 /*
128 * Some archs *cough*PowerPC*cough* add charachters to the 127 * Some archs *cough*PowerPC*cough* add characters to the
129 * start of the function names. We simply put a '*' to 128 * start of the function names. We simply put a '*' to
130 * accomodate them. 129 * accommodate them.
131 */ 130 */
132 func_name = "*" STR(DYN_FTRACE_TEST_NAME); 131 func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
133 132
134 /* filter only on our function */ 133 /* filter only on our function */
135 ftrace_set_filter(func_name, strlen(func_name), 1); 134 ftrace_set_filter(func_name, strlen(func_name), 1);
136 135
137 /* enable tracing */ 136 /* enable tracing */
138 ret = trace->init(tr); 137 ret = tracer_init(trace, tr);
139 if (ret) { 138 if (ret) {
140 warn_failed_init_tracer(trace, ret); 139 warn_failed_init_tracer(trace, ret);
141 goto out; 140 goto out;
@@ -209,7 +208,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
209 ftrace_enabled = 1; 208 ftrace_enabled = 1;
210 tracer_enabled = 1; 209 tracer_enabled = 1;
211 210
212 ret = trace->init(tr); 211 ret = tracer_init(trace, tr);
213 if (ret) { 212 if (ret) {
214 warn_failed_init_tracer(trace, ret); 213 warn_failed_init_tracer(trace, ret);
215 goto out; 214 goto out;
@@ -247,6 +246,54 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
247} 246}
248#endif /* CONFIG_FUNCTION_TRACER */ 247#endif /* CONFIG_FUNCTION_TRACER */
249 248
249
250#ifdef CONFIG_FUNCTION_GRAPH_TRACER
251/*
252 * Pretty much the same than for the function tracer from which the selftest
253 * has been borrowed.
254 */
255int
256trace_selftest_startup_function_graph(struct tracer *trace,
257 struct trace_array *tr)
258{
259 int ret;
260 unsigned long count;
261
262 ret = tracer_init(trace, tr);
263 if (ret) {
264 warn_failed_init_tracer(trace, ret);
265 goto out;
266 }
267
268 /* Sleep for a 1/10 of a second */
269 msleep(100);
270
271 tracing_stop();
272
273 /* check the trace buffer */
274 ret = trace_test_buffer(tr, &count);
275
276 trace->reset(tr);
277 tracing_start();
278
279 if (!ret && !count) {
280 printk(KERN_CONT ".. no entries found ..");
281 ret = -1;
282 goto out;
283 }
284
285 /* Don't test dynamic tracing, the function tracer already did */
286
287out:
288 /* Stop it if we failed */
289 if (ret)
290 ftrace_graph_stop();
291
292 return ret;
293}
294#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
295
296
250#ifdef CONFIG_IRQSOFF_TRACER 297#ifdef CONFIG_IRQSOFF_TRACER
251int 298int
252trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) 299trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
@@ -256,7 +303,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
256 int ret; 303 int ret;
257 304
258 /* start the tracing */ 305 /* start the tracing */
259 ret = trace->init(tr); 306 ret = tracer_init(trace, tr);
260 if (ret) { 307 if (ret) {
261 warn_failed_init_tracer(trace, ret); 308 warn_failed_init_tracer(trace, ret);
262 return ret; 309 return ret;
@@ -268,6 +315,14 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
268 local_irq_disable(); 315 local_irq_disable();
269 udelay(100); 316 udelay(100);
270 local_irq_enable(); 317 local_irq_enable();
318
319 /*
320 * Stop the tracer to avoid a warning subsequent
321 * to buffer flipping failure because tracing_stop()
322 * disables the tr and max buffers, making flipping impossible
323 * in case of parallels max irqs off latencies.
324 */
325 trace->stop(tr);
271 /* stop the tracing. */ 326 /* stop the tracing. */
272 tracing_stop(); 327 tracing_stop();
273 /* check both trace buffers */ 328 /* check both trace buffers */
@@ -310,7 +365,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
310 } 365 }
311 366
312 /* start the tracing */ 367 /* start the tracing */
313 ret = trace->init(tr); 368 ret = tracer_init(trace, tr);
314 if (ret) { 369 if (ret) {
315 warn_failed_init_tracer(trace, ret); 370 warn_failed_init_tracer(trace, ret);
316 return ret; 371 return ret;
@@ -322,6 +377,14 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
322 preempt_disable(); 377 preempt_disable();
323 udelay(100); 378 udelay(100);
324 preempt_enable(); 379 preempt_enable();
380
381 /*
382 * Stop the tracer to avoid a warning subsequent
383 * to buffer flipping failure because tracing_stop()
384 * disables the tr and max buffers, making flipping impossible
385 * in case of parallels max preempt off latencies.
386 */
387 trace->stop(tr);
325 /* stop the tracing. */ 388 /* stop the tracing. */
326 tracing_stop(); 389 tracing_stop();
327 /* check both trace buffers */ 390 /* check both trace buffers */
@@ -364,10 +427,10 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
364 } 427 }
365 428
366 /* start the tracing */ 429 /* start the tracing */
367 ret = trace->init(tr); 430 ret = tracer_init(trace, tr);
368 if (ret) { 431 if (ret) {
369 warn_failed_init_tracer(trace, ret); 432 warn_failed_init_tracer(trace, ret);
370 goto out; 433 goto out_no_start;
371 } 434 }
372 435
373 /* reset the max latency */ 436 /* reset the max latency */
@@ -381,31 +444,35 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
381 /* reverse the order of preempt vs irqs */ 444 /* reverse the order of preempt vs irqs */
382 local_irq_enable(); 445 local_irq_enable();
383 446
447 /*
448 * Stop the tracer to avoid a warning subsequent
449 * to buffer flipping failure because tracing_stop()
450 * disables the tr and max buffers, making flipping impossible
451 * in case of parallels max irqs/preempt off latencies.
452 */
453 trace->stop(tr);
384 /* stop the tracing. */ 454 /* stop the tracing. */
385 tracing_stop(); 455 tracing_stop();
386 /* check both trace buffers */ 456 /* check both trace buffers */
387 ret = trace_test_buffer(tr, NULL); 457 ret = trace_test_buffer(tr, NULL);
388 if (ret) { 458 if (ret)
389 tracing_start();
390 goto out; 459 goto out;
391 }
392 460
393 ret = trace_test_buffer(&max_tr, &count); 461 ret = trace_test_buffer(&max_tr, &count);
394 if (ret) { 462 if (ret)
395 tracing_start();
396 goto out; 463 goto out;
397 }
398 464
399 if (!ret && !count) { 465 if (!ret && !count) {
400 printk(KERN_CONT ".. no entries found .."); 466 printk(KERN_CONT ".. no entries found ..");
401 ret = -1; 467 ret = -1;
402 tracing_start();
403 goto out; 468 goto out;
404 } 469 }
405 470
406 /* do the test by disabling interrupts first this time */ 471 /* do the test by disabling interrupts first this time */
407 tracing_max_latency = 0; 472 tracing_max_latency = 0;
408 tracing_start(); 473 tracing_start();
474 trace->start(tr);
475
409 preempt_disable(); 476 preempt_disable();
410 local_irq_disable(); 477 local_irq_disable();
411 udelay(100); 478 udelay(100);
@@ -413,6 +480,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
413 /* reverse the order of preempt vs irqs */ 480 /* reverse the order of preempt vs irqs */
414 local_irq_enable(); 481 local_irq_enable();
415 482
483 trace->stop(tr);
416 /* stop the tracing. */ 484 /* stop the tracing. */
417 tracing_stop(); 485 tracing_stop();
418 /* check both trace buffers */ 486 /* check both trace buffers */
@@ -428,9 +496,10 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
428 goto out; 496 goto out;
429 } 497 }
430 498
431 out: 499out:
432 trace->reset(tr);
433 tracing_start(); 500 tracing_start();
501out_no_start:
502 trace->reset(tr);
434 tracing_max_latency = save_max; 503 tracing_max_latency = save_max;
435 504
436 return ret; 505 return ret;
@@ -496,7 +565,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
496 wait_for_completion(&isrt); 565 wait_for_completion(&isrt);
497 566
498 /* start the tracing */ 567 /* start the tracing */
499 ret = trace->init(tr); 568 ret = tracer_init(trace, tr);
500 if (ret) { 569 if (ret) {
501 warn_failed_init_tracer(trace, ret); 570 warn_failed_init_tracer(trace, ret);
502 return ret; 571 return ret;
@@ -557,7 +626,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
557 int ret; 626 int ret;
558 627
559 /* start the tracing */ 628 /* start the tracing */
560 ret = trace->init(tr); 629 ret = tracer_init(trace, tr);
561 if (ret) { 630 if (ret) {
562 warn_failed_init_tracer(trace, ret); 631 warn_failed_init_tracer(trace, ret);
563 return ret; 632 return ret;
@@ -589,10 +658,10 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
589 int ret; 658 int ret;
590 659
591 /* start the tracing */ 660 /* start the tracing */
592 ret = trace->init(tr); 661 ret = tracer_init(trace, tr);
593 if (ret) { 662 if (ret) {
594 warn_failed_init_tracer(trace, ret); 663 warn_failed_init_tracer(trace, ret);
595 return 0; 664 return ret;
596 } 665 }
597 666
598 /* Sleep for a 1/10 of a second */ 667 /* Sleep for a 1/10 of a second */
@@ -604,6 +673,11 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
604 trace->reset(tr); 673 trace->reset(tr);
605 tracing_start(); 674 tracing_start();
606 675
676 if (!ret && !count) {
677 printk(KERN_CONT ".. no entries found ..");
678 ret = -1;
679 }
680
607 return ret; 681 return ret;
608} 682}
609#endif /* CONFIG_SYSPROF_TRACER */ 683#endif /* CONFIG_SYSPROF_TRACER */
@@ -616,7 +690,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
616 int ret; 690 int ret;
617 691
618 /* start the tracing */ 692 /* start the tracing */
619 ret = trace->init(tr); 693 ret = tracer_init(trace, tr);
620 if (ret) { 694 if (ret) {
621 warn_failed_init_tracer(trace, ret); 695 warn_failed_init_tracer(trace, ret);
622 return ret; 696 return ret;
@@ -631,6 +705,11 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
631 trace->reset(tr); 705 trace->reset(tr);
632 tracing_start(); 706 tracing_start();
633 707
708 if (!ret && !count) {
709 printk(KERN_CONT ".. no entries found ..");
710 ret = -1;
711 }
712
634 return ret; 713 return ret;
635} 714}
636#endif /* CONFIG_BRANCH_TRACER */ 715#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index d0871bc0aca5..c750f65f9661 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -245,16 +245,31 @@ static int trace_lookup_stack(struct seq_file *m, long i)
245#endif 245#endif
246} 246}
247 247
248static void print_disabled(struct seq_file *m)
249{
250 seq_puts(m, "#\n"
251 "# Stack tracer disabled\n"
252 "#\n"
253 "# To enable the stack tracer, either add 'stacktrace' to the\n"
254 "# kernel command line\n"
255 "# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
256 "#\n");
257}
258
248static int t_show(struct seq_file *m, void *v) 259static int t_show(struct seq_file *m, void *v)
249{ 260{
250 long i; 261 long i;
251 int size; 262 int size;
252 263
253 if (v == SEQ_START_TOKEN) { 264 if (v == SEQ_START_TOKEN) {
254 seq_printf(m, " Depth Size Location" 265 seq_printf(m, " Depth Size Location"
255 " (%d entries)\n" 266 " (%d entries)\n"
256 " ----- ---- --------\n", 267 " ----- ---- --------\n",
257 max_stack_trace.nr_entries); 268 max_stack_trace.nr_entries);
269
270 if (!stack_tracer_enabled && !max_stack_size)
271 print_disabled(m);
272
258 return 0; 273 return 0;
259 } 274 }
260 275
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
new file mode 100644
index 000000000000..39310e3434ee
--- /dev/null
+++ b/kernel/trace/trace_stat.c
@@ -0,0 +1,319 @@
1/*
2 * Infrastructure for statistic tracing (histogram output).
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 * Based on the code from trace_branch.c which is
7 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
8 *
9 */
10
11
12#include <linux/list.h>
13#include <linux/debugfs.h>
14#include "trace_stat.h"
15#include "trace.h"
16
17
18/* List of stat entries from a tracer */
19struct trace_stat_list {
20 struct list_head list;
21 void *stat;
22};
23
24/* A stat session is the stats output in one file */
25struct tracer_stat_session {
26 struct list_head session_list;
27 struct tracer_stat *ts;
28 struct list_head stat_list;
29 struct mutex stat_mutex;
30 struct dentry *file;
31};
32
33/* All of the sessions currently in use. Each stat file embed one session */
34static LIST_HEAD(all_stat_sessions);
35static DEFINE_MUTEX(all_stat_sessions_mutex);
36
37/* The root directory for all stat files */
38static struct dentry *stat_dir;
39
40
41static void reset_stat_session(struct tracer_stat_session *session)
42{
43 struct trace_stat_list *node, *next;
44
45 list_for_each_entry_safe(node, next, &session->stat_list, list)
46 kfree(node);
47
48 INIT_LIST_HEAD(&session->stat_list);
49}
50
51static void destroy_session(struct tracer_stat_session *session)
52{
53 debugfs_remove(session->file);
54 reset_stat_session(session);
55 mutex_destroy(&session->stat_mutex);
56 kfree(session);
57}
58
59/*
60 * For tracers that don't provide a stat_cmp callback.
61 * This one will force an immediate insertion on tail of
62 * the list.
63 */
64static int dummy_cmp(void *p1, void *p2)
65{
66 return 1;
67}
68
69/*
70 * Initialize the stat list at each trace_stat file opening.
71 * All of these copies and sorting are required on all opening
72 * since the stats could have changed between two file sessions.
73 */
74static int stat_seq_init(struct tracer_stat_session *session)
75{
76 struct trace_stat_list *iter_entry, *new_entry;
77 struct tracer_stat *ts = session->ts;
78 void *prev_stat;
79 int ret = 0;
80 int i;
81
82 mutex_lock(&session->stat_mutex);
83 reset_stat_session(session);
84
85 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp;
87
88 /*
89 * The first entry. Actually this is the second, but the first
90 * one (the stat_list head) is pointless.
91 */
92 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
93 if (!new_entry) {
94 ret = -ENOMEM;
95 goto exit;
96 }
97
98 INIT_LIST_HEAD(&new_entry->list);
99
100 list_add(&new_entry->list, &session->stat_list);
101
102 new_entry->stat = ts->stat_start();
103 prev_stat = new_entry->stat;
104
105 /*
106 * Iterate over the tracer stat entries and store them in a sorted
107 * list.
108 */
109 for (i = 1; ; i++) {
110 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
111 if (!new_entry) {
112 ret = -ENOMEM;
113 goto exit_free_list;
114 }
115
116 INIT_LIST_HEAD(&new_entry->list);
117 new_entry->stat = ts->stat_next(prev_stat, i);
118
119 /* End of insertion */
120 if (!new_entry->stat)
121 break;
122
123 list_for_each_entry(iter_entry, &session->stat_list, list) {
124
125 /* Insertion with a descendent sorting */
126 if (ts->stat_cmp(new_entry->stat,
127 iter_entry->stat) > 0) {
128
129 list_add_tail(&new_entry->list,
130 &iter_entry->list);
131 break;
132
133 /* The current smaller value */
134 } else if (list_is_last(&iter_entry->list,
135 &session->stat_list)) {
136 list_add(&new_entry->list, &iter_entry->list);
137 break;
138 }
139 }
140
141 prev_stat = new_entry->stat;
142 }
143exit:
144 mutex_unlock(&session->stat_mutex);
145 return ret;
146
147exit_free_list:
148 reset_stat_session(session);
149 mutex_unlock(&session->stat_mutex);
150 return ret;
151}
152
153
154static void *stat_seq_start(struct seq_file *s, loff_t *pos)
155{
156 struct tracer_stat_session *session = s->private;
157
158 /* Prevent from tracer switch or stat_list modification */
159 mutex_lock(&session->stat_mutex);
160
161 /* If we are in the beginning of the file, print the headers */
162 if (!*pos && session->ts->stat_headers)
163 session->ts->stat_headers(s);
164
165 return seq_list_start(&session->stat_list, *pos);
166}
167
168static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
169{
170 struct tracer_stat_session *session = s->private;
171
172 return seq_list_next(p, &session->stat_list, pos);
173}
174
175static void stat_seq_stop(struct seq_file *s, void *p)
176{
177 struct tracer_stat_session *session = s->private;
178 mutex_unlock(&session->stat_mutex);
179}
180
181static int stat_seq_show(struct seq_file *s, void *v)
182{
183 struct tracer_stat_session *session = s->private;
184 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
185
186 return session->ts->stat_show(s, l->stat);
187}
188
189static const struct seq_operations trace_stat_seq_ops = {
190 .start = stat_seq_start,
191 .next = stat_seq_next,
192 .stop = stat_seq_stop,
193 .show = stat_seq_show
194};
195
196/* The session stat is refilled and resorted at each stat file opening */
197static int tracing_stat_open(struct inode *inode, struct file *file)
198{
199 int ret;
200
201 struct tracer_stat_session *session = inode->i_private;
202
203 ret = seq_open(file, &trace_stat_seq_ops);
204 if (!ret) {
205 struct seq_file *m = file->private_data;
206 m->private = session;
207 ret = stat_seq_init(session);
208 }
209
210 return ret;
211}
212
213/*
214 * Avoid consuming memory with our now useless list.
215 */
216static int tracing_stat_release(struct inode *i, struct file *f)
217{
218 struct tracer_stat_session *session = i->i_private;
219
220 mutex_lock(&session->stat_mutex);
221 reset_stat_session(session);
222 mutex_unlock(&session->stat_mutex);
223
224 return 0;
225}
226
227static const struct file_operations tracing_stat_fops = {
228 .open = tracing_stat_open,
229 .read = seq_read,
230 .llseek = seq_lseek,
231 .release = tracing_stat_release
232};
233
234static int tracing_stat_init(void)
235{
236 struct dentry *d_tracing;
237
238 d_tracing = tracing_init_dentry();
239
240 stat_dir = debugfs_create_dir("trace_stat", d_tracing);
241 if (!stat_dir)
242 pr_warning("Could not create debugfs "
243 "'trace_stat' entry\n");
244 return 0;
245}
246
247static int init_stat_file(struct tracer_stat_session *session)
248{
249 if (!stat_dir && tracing_stat_init())
250 return -ENODEV;
251
252 session->file = debugfs_create_file(session->ts->name, 0644,
253 stat_dir,
254 session, &tracing_stat_fops);
255 if (!session->file)
256 return -ENOMEM;
257 return 0;
258}
259
260int register_stat_tracer(struct tracer_stat *trace)
261{
262 struct tracer_stat_session *session, *node, *tmp;
263 int ret;
264
265 if (!trace)
266 return -EINVAL;
267
268 if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
269 return -EINVAL;
270
271 /* Already registered? */
272 mutex_lock(&all_stat_sessions_mutex);
273 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
274 if (node->ts == trace) {
275 mutex_unlock(&all_stat_sessions_mutex);
276 return -EINVAL;
277 }
278 }
279 mutex_unlock(&all_stat_sessions_mutex);
280
281 /* Init the session */
282 session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
283 if (!session)
284 return -ENOMEM;
285
286 session->ts = trace;
287 INIT_LIST_HEAD(&session->session_list);
288 INIT_LIST_HEAD(&session->stat_list);
289 mutex_init(&session->stat_mutex);
290 session->file = NULL;
291
292 ret = init_stat_file(session);
293 if (ret) {
294 destroy_session(session);
295 return ret;
296 }
297
298 /* Register */
299 mutex_lock(&all_stat_sessions_mutex);
300 list_add_tail(&session->session_list, &all_stat_sessions);
301 mutex_unlock(&all_stat_sessions_mutex);
302
303 return 0;
304}
305
306void unregister_stat_tracer(struct tracer_stat *trace)
307{
308 struct tracer_stat_session *node, *tmp;
309
310 mutex_lock(&all_stat_sessions_mutex);
311 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
312 if (node->ts == trace) {
313 list_del(&node->session_list);
314 destroy_session(node);
315 break;
316 }
317 }
318 mutex_unlock(&all_stat_sessions_mutex);
319}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
new file mode 100644
index 000000000000..202274cf7f3d
--- /dev/null
+++ b/kernel/trace/trace_stat.h
@@ -0,0 +1,31 @@
1#ifndef __TRACE_STAT_H
2#define __TRACE_STAT_H
3
4#include <linux/seq_file.h>
5
6/*
7 * If you want to provide a stat file (one-shot statistics), fill
8 * an iterator with stat_start/stat_next and a stat_show callbacks.
9 * The others callbacks are optional.
10 */
11struct tracer_stat {
12 /* The name of your stat file */
13 const char *name;
14 /* Iteration over statistic entries */
15 void *(*stat_start)(void);
16 void *(*stat_next)(void *prev, int idx);
17 /* Compare two entries for stats sorting */
18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s);
23};
24
25/*
26 * Destroy or create a stat file
27 */
28extern int register_stat_tracer(struct tracer_stat *trace);
29extern void unregister_stat_tracer(struct tracer_stat *trace);
30
31#endif /* __TRACE_STAT_H */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
new file mode 100644
index 000000000000..a2a3af29c943
--- /dev/null
+++ b/kernel/trace/trace_syscalls.c
@@ -0,0 +1,250 @@
1#include <linux/kernel.h>
2#include <linux/ftrace.h>
3#include <asm/syscall.h>
4
5#include "trace_output.h"
6#include "trace.h"
7
8/* Keep a counter of the syscall tracing users */
9static int refcount;
10
11/* Prevent from races on thread flags toggling */
12static DEFINE_MUTEX(syscall_trace_lock);
13
14/* Option to display the parameters types */
15enum {
16 TRACE_SYSCALLS_OPT_TYPES = 0x1,
17};
18
19static struct tracer_opt syscalls_opts[] = {
20 { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
21 { }
22};
23
24static struct tracer_flags syscalls_flags = {
25 .val = 0, /* By default: no parameters types */
26 .opts = syscalls_opts
27};
28
29enum print_line_t
30print_syscall_enter(struct trace_iterator *iter, int flags)
31{
32 struct trace_seq *s = &iter->seq;
33 struct trace_entry *ent = iter->ent;
34 struct syscall_trace_enter *trace;
35 struct syscall_metadata *entry;
36 int i, ret, syscall;
37
38 trace_assign_type(trace, ent);
39
40 syscall = trace->nr;
41
42 entry = syscall_nr_to_meta(syscall);
43 if (!entry)
44 goto end;
45
46 ret = trace_seq_printf(s, "%s(", entry->name);
47 if (!ret)
48 return TRACE_TYPE_PARTIAL_LINE;
49
50 for (i = 0; i < entry->nb_args; i++) {
51 /* parameter types */
52 if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) {
53 ret = trace_seq_printf(s, "%s ", entry->types[i]);
54 if (!ret)
55 return TRACE_TYPE_PARTIAL_LINE;
56 }
57 /* parameter values */
58 ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i],
59 trace->args[i],
60 i == entry->nb_args - 1 ? ")" : ",");
61 if (!ret)
62 return TRACE_TYPE_PARTIAL_LINE;
63 }
64
65end:
66 trace_seq_printf(s, "\n");
67 return TRACE_TYPE_HANDLED;
68}
69
70enum print_line_t
71print_syscall_exit(struct trace_iterator *iter, int flags)
72{
73 struct trace_seq *s = &iter->seq;
74 struct trace_entry *ent = iter->ent;
75 struct syscall_trace_exit *trace;
76 int syscall;
77 struct syscall_metadata *entry;
78 int ret;
79
80 trace_assign_type(trace, ent);
81
82 syscall = trace->nr;
83
84 entry = syscall_nr_to_meta(syscall);
85 if (!entry) {
86 trace_seq_printf(s, "\n");
87 return TRACE_TYPE_HANDLED;
88 }
89
90 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
91 trace->ret);
92 if (!ret)
93 return TRACE_TYPE_PARTIAL_LINE;
94
95 return TRACE_TYPE_HANDLED;
96}
97
98void start_ftrace_syscalls(void)
99{
100 unsigned long flags;
101 struct task_struct *g, *t;
102
103 mutex_lock(&syscall_trace_lock);
104
105 /* Don't enable the flag on the tasks twice */
106 if (++refcount != 1)
107 goto unlock;
108
109 arch_init_ftrace_syscalls();
110 read_lock_irqsave(&tasklist_lock, flags);
111
112 do_each_thread(g, t) {
113 set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
114 } while_each_thread(g, t);
115
116 read_unlock_irqrestore(&tasklist_lock, flags);
117
118unlock:
119 mutex_unlock(&syscall_trace_lock);
120}
121
122void stop_ftrace_syscalls(void)
123{
124 unsigned long flags;
125 struct task_struct *g, *t;
126
127 mutex_lock(&syscall_trace_lock);
128
129 /* There are perhaps still some users */
130 if (--refcount)
131 goto unlock;
132
133 read_lock_irqsave(&tasklist_lock, flags);
134
135 do_each_thread(g, t) {
136 clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
137 } while_each_thread(g, t);
138
139 read_unlock_irqrestore(&tasklist_lock, flags);
140
141unlock:
142 mutex_unlock(&syscall_trace_lock);
143}
144
145void ftrace_syscall_enter(struct pt_regs *regs)
146{
147 struct syscall_trace_enter *entry;
148 struct syscall_metadata *sys_data;
149 struct ring_buffer_event *event;
150 int size;
151 int syscall_nr;
152
153 syscall_nr = syscall_get_nr(current, regs);
154
155 sys_data = syscall_nr_to_meta(syscall_nr);
156 if (!sys_data)
157 return;
158
159 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
160
161 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size,
162 0, 0);
163 if (!event)
164 return;
165
166 entry = ring_buffer_event_data(event);
167 entry->nr = syscall_nr;
168 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
169
170 trace_current_buffer_unlock_commit(event, 0, 0);
171 trace_wake_up();
172}
173
174void ftrace_syscall_exit(struct pt_regs *regs)
175{
176 struct syscall_trace_exit *entry;
177 struct syscall_metadata *sys_data;
178 struct ring_buffer_event *event;
179 int syscall_nr;
180
181 syscall_nr = syscall_get_nr(current, regs);
182
183 sys_data = syscall_nr_to_meta(syscall_nr);
184 if (!sys_data)
185 return;
186
187 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT,
188 sizeof(*entry), 0, 0);
189 if (!event)
190 return;
191
192 entry = ring_buffer_event_data(event);
193 entry->nr = syscall_nr;
194 entry->ret = syscall_get_return_value(current, regs);
195
196 trace_current_buffer_unlock_commit(event, 0, 0);
197 trace_wake_up();
198}
199
200static int init_syscall_tracer(struct trace_array *tr)
201{
202 start_ftrace_syscalls();
203
204 return 0;
205}
206
207static void reset_syscall_tracer(struct trace_array *tr)
208{
209 stop_ftrace_syscalls();
210 tracing_reset_online_cpus(tr);
211}
212
213static struct trace_event syscall_enter_event = {
214 .type = TRACE_SYSCALL_ENTER,
215 .trace = print_syscall_enter,
216};
217
218static struct trace_event syscall_exit_event = {
219 .type = TRACE_SYSCALL_EXIT,
220 .trace = print_syscall_exit,
221};
222
223static struct tracer syscall_tracer __read_mostly = {
224 .name = "syscall",
225 .init = init_syscall_tracer,
226 .reset = reset_syscall_tracer,
227 .flags = &syscalls_flags,
228};
229
230__init int register_ftrace_syscalls(void)
231{
232 int ret;
233
234 ret = register_ftrace_event(&syscall_enter_event);
235 if (!ret) {
236 printk(KERN_WARNING "event %d failed to register\n",
237 syscall_enter_event.type);
238 WARN_ON_ONCE(1);
239 }
240
241 ret = register_ftrace_event(&syscall_exit_event);
242 if (!ret) {
243 printk(KERN_WARNING "event %d failed to register\n",
244 syscall_exit_event.type);
245 WARN_ON_ONCE(1);
246 }
247
248 return register_tracer(&syscall_tracer);
249}
250device_initcall(register_ftrace_syscalls);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index eaca5ad803ff..91fd19c2149f 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -88,7 +88,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
88 } 88 }
89} 89}
90 90
91const static struct stacktrace_ops backtrace_ops = { 91static const struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning, 92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
@@ -226,15 +226,6 @@ static void stop_stack_timers(void)
226 stop_stack_timer(cpu); 226 stop_stack_timer(cpu);
227} 227}
228 228
229static void start_stack_trace(struct trace_array *tr)
230{
231 mutex_lock(&sample_timer_lock);
232 tracing_reset_online_cpus(tr);
233 start_stack_timers();
234 tracer_enabled = 1;
235 mutex_unlock(&sample_timer_lock);
236}
237
238static void stop_stack_trace(struct trace_array *tr) 229static void stop_stack_trace(struct trace_array *tr)
239{ 230{
240 mutex_lock(&sample_timer_lock); 231 mutex_lock(&sample_timer_lock);
@@ -247,12 +238,18 @@ static int stack_trace_init(struct trace_array *tr)
247{ 238{
248 sysprof_trace = tr; 239 sysprof_trace = tr;
249 240
250 start_stack_trace(tr); 241 tracing_start_cmdline_record();
242
243 mutex_lock(&sample_timer_lock);
244 start_stack_timers();
245 tracer_enabled = 1;
246 mutex_unlock(&sample_timer_lock);
251 return 0; 247 return 0;
252} 248}
253 249
254static void stack_trace_reset(struct trace_array *tr) 250static void stack_trace_reset(struct trace_array *tr)
255{ 251{
252 tracing_stop_cmdline_record();
256 stop_stack_trace(tr); 253 stop_stack_trace(tr);
257} 254}
258 255
@@ -317,7 +314,7 @@ sysprof_sample_write(struct file *filp, const char __user *ubuf,
317 return cnt; 314 return cnt;
318} 315}
319 316
320static struct file_operations sysprof_sample_fops = { 317static const struct file_operations sysprof_sample_fops = {
321 .read = sysprof_sample_read, 318 .read = sysprof_sample_read,
322 .write = sysprof_sample_write, 319 .write = sysprof_sample_write,
323}; 320};
@@ -330,5 +327,5 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
330 d_tracer, NULL, &sysprof_sample_fops); 327 d_tracer, NULL, &sysprof_sample_fops);
331 if (entry) 328 if (entry)
332 return; 329 return;
333 pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); 330 pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
334} 331}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644
index 000000000000..9ab035b58cf1
--- /dev/null
+++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,288 @@
1/*
2 * Workqueue statistical tracer.
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8
9#include <trace/workqueue.h>
10#include <linux/list.h>
11#include <linux/percpu.h>
12#include "trace_stat.h"
13#include "trace.h"
14
15
16/* A cpu workqueue thread */
17struct cpu_workqueue_stats {
18 struct list_head list;
19/* Useful to know if we print the cpu headers */
20 bool first_entry;
21 int cpu;
22 pid_t pid;
23/* Can be inserted from interrupt or user context, need to be atomic */
24 atomic_t inserted;
25/*
26 * Don't need to be atomic, works are serialized in a single workqueue thread
27 * on a single CPU.
28 */
29 unsigned int executed;
30};
31
32/* List of workqueue threads on one cpu */
33struct workqueue_global_stats {
34 struct list_head list;
35 spinlock_t lock;
36};
37
38/* Don't need a global lock because allocated before the workqueues, and
39 * never freed.
40 */
41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
43
44/* Insertion of a work */
45static void
46probe_workqueue_insertion(struct task_struct *wq_thread,
47 struct work_struct *work)
48{
49 int cpu = cpumask_first(&wq_thread->cpus_allowed);
50 struct cpu_workqueue_stats *node, *next;
51 unsigned long flags;
52
53 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
54 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
55 list) {
56 if (node->pid == wq_thread->pid) {
57 atomic_inc(&node->inserted);
58 goto found;
59 }
60 }
61 pr_debug("trace_workqueue: entry not found\n");
62found:
63 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
64}
65
66/* Execution of a work */
67static void
68probe_workqueue_execution(struct task_struct *wq_thread,
69 struct work_struct *work)
70{
71 int cpu = cpumask_first(&wq_thread->cpus_allowed);
72 struct cpu_workqueue_stats *node, *next;
73 unsigned long flags;
74
75 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
76 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
77 list) {
78 if (node->pid == wq_thread->pid) {
79 node->executed++;
80 goto found;
81 }
82 }
83 pr_debug("trace_workqueue: entry not found\n");
84found:
85 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
86}
87
88/* Creation of a cpu workqueue thread */
89static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
90{
91 struct cpu_workqueue_stats *cws;
92 unsigned long flags;
93
94 WARN_ON(cpu < 0);
95
96 /* Workqueues are sometimes created in atomic context */
97 cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
98 if (!cws) {
99 pr_warning("trace_workqueue: not enough memory\n");
100 return;
101 }
102 INIT_LIST_HEAD(&cws->list);
103 cws->cpu = cpu;
104
105 cws->pid = wq_thread->pid;
106
107 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
108 if (list_empty(&workqueue_cpu_stat(cpu)->list))
109 cws->first_entry = true;
110 list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
111 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
112}
113
114/* Destruction of a cpu workqueue thread */
115static void probe_workqueue_destruction(struct task_struct *wq_thread)
116{
117 /* Workqueue only execute on one cpu */
118 int cpu = cpumask_first(&wq_thread->cpus_allowed);
119 struct cpu_workqueue_stats *node, *next;
120 unsigned long flags;
121
122 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
123 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
124 list) {
125 if (node->pid == wq_thread->pid) {
126 list_del(&node->list);
127 kfree(node);
128 goto found;
129 }
130 }
131
132 pr_debug("trace_workqueue: don't find workqueue to destroy\n");
133found:
134 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
135
136}
137
138static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
139{
140 unsigned long flags;
141 struct cpu_workqueue_stats *ret = NULL;
142
143
144 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
145
146 if (!list_empty(&workqueue_cpu_stat(cpu)->list))
147 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
148 struct cpu_workqueue_stats, list);
149
150 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
151
152 return ret;
153}
154
155static void *workqueue_stat_start(void)
156{
157 int cpu;
158 void *ret = NULL;
159
160 for_each_possible_cpu(cpu) {
161 ret = workqueue_stat_start_cpu(cpu);
162 if (ret)
163 return ret;
164 }
165 return NULL;
166}
167
168static void *workqueue_stat_next(void *prev, int idx)
169{
170 struct cpu_workqueue_stats *prev_cws = prev;
171 int cpu = prev_cws->cpu;
172 unsigned long flags;
173 void *ret = NULL;
174
175 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
176 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
177 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
178 do {
179 cpu = cpumask_next(cpu, cpu_possible_mask);
180 if (cpu >= nr_cpu_ids)
181 return NULL;
182 } while (!(ret = workqueue_stat_start_cpu(cpu)));
183 return ret;
184 }
185 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
186
187 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
188 list);
189}
190
191static int workqueue_stat_show(struct seq_file *s, void *p)
192{
193 struct cpu_workqueue_stats *cws = p;
194 unsigned long flags;
195 int cpu = cws->cpu;
196 struct pid *pid;
197 struct task_struct *tsk;
198
199 pid = find_get_pid(cws->pid);
200 if (pid) {
201 tsk = get_pid_task(pid, PIDTYPE_PID);
202 if (tsk) {
203 seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
204 atomic_read(&cws->inserted), cws->executed,
205 tsk->comm);
206 put_task_struct(tsk);
207 }
208 put_pid(pid);
209 }
210
211 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
212 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
213 seq_printf(s, "\n");
214 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
215
216 return 0;
217}
218
219static int workqueue_stat_headers(struct seq_file *s)
220{
221 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
222 seq_printf(s, "# | | | |\n\n");
223 return 0;
224}
225
226struct tracer_stat workqueue_stats __read_mostly = {
227 .name = "workqueues",
228 .stat_start = workqueue_stat_start,
229 .stat_next = workqueue_stat_next,
230 .stat_show = workqueue_stat_show,
231 .stat_headers = workqueue_stat_headers
232};
233
234
235int __init stat_workqueue_init(void)
236{
237 if (register_stat_tracer(&workqueue_stats)) {
238 pr_warning("Unable to register workqueue stat tracer\n");
239 return 1;
240 }
241
242 return 0;
243}
244fs_initcall(stat_workqueue_init);
245
246/*
247 * Workqueues are created very early, just after pre-smp initcalls.
248 * So we must register our tracepoints at this stage.
249 */
250int __init trace_workqueue_early_init(void)
251{
252 int ret, cpu;
253
254 ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
255 if (ret)
256 goto out;
257
258 ret = register_trace_workqueue_execution(probe_workqueue_execution);
259 if (ret)
260 goto no_insertion;
261
262 ret = register_trace_workqueue_creation(probe_workqueue_creation);
263 if (ret)
264 goto no_execution;
265
266 ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
267 if (ret)
268 goto no_creation;
269
270 for_each_possible_cpu(cpu) {
271 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
272 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
273 }
274
275 return 0;
276
277no_creation:
278 unregister_trace_workqueue_creation(probe_workqueue_creation);
279no_execution:
280 unregister_trace_workqueue_execution(probe_workqueue_execution);
281no_insertion:
282 unregister_trace_workqueue_insertion(probe_workqueue_insertion);
283out:
284 pr_warning("trace_workqueue: unable to trace workqueues\n");
285
286 return 1;
287}
288early_initcall(trace_workqueue_early_init);