aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2011-01-13 01:06:28 -0500
committerPaul Mundt <lethal@linux-sh.org>2011-01-13 01:06:28 -0500
commitf43dc23d5ea91fca257be02138a255f02d98e806 (patch)
treeb29722f6e965316e90ac97abf79923ced250dc21 /kernel/trace
parentf8e53553f452dcbf67cb89c8cba63a1cd6eb4cc0 (diff)
parent4162cf64973df51fc885825bc9ca4d055891c49f (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into common/serial-rework
Conflicts: arch/sh/kernel/cpu/sh2/setup-sh7619.c arch/sh/kernel/cpu/sh2a/setup-mxg.c arch/sh/kernel/cpu/sh2a/setup-sh7201.c arch/sh/kernel/cpu/sh2a/setup-sh7203.c arch/sh/kernel/cpu/sh2a/setup-sh7206.c arch/sh/kernel/cpu/sh3/setup-sh7705.c arch/sh/kernel/cpu/sh3/setup-sh770x.c arch/sh/kernel/cpu/sh3/setup-sh7710.c arch/sh/kernel/cpu/sh3/setup-sh7720.c arch/sh/kernel/cpu/sh4/setup-sh4-202.c arch/sh/kernel/cpu/sh4/setup-sh7750.c arch/sh/kernel/cpu/sh4/setup-sh7760.c arch/sh/kernel/cpu/sh4a/setup-sh7343.c arch/sh/kernel/cpu/sh4a/setup-sh7366.c arch/sh/kernel/cpu/sh4a/setup-sh7722.c arch/sh/kernel/cpu/sh4a/setup-sh7723.c arch/sh/kernel/cpu/sh4a/setup-sh7724.c arch/sh/kernel/cpu/sh4a/setup-sh7763.c arch/sh/kernel/cpu/sh4a/setup-sh7770.c arch/sh/kernel/cpu/sh4a/setup-sh7780.c arch/sh/kernel/cpu/sh4a/setup-sh7785.c arch/sh/kernel/cpu/sh4a/setup-sh7786.c arch/sh/kernel/cpu/sh4a/setup-shx3.c arch/sh/kernel/cpu/sh5/setup-sh5.c drivers/serial/sh-sci.c drivers/serial/sh-sci.h include/linux/serial_sci.h
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig238
-rw-r--r--kernel/trace/Makefile14
-rw-r--r--kernel/trace/blktrace.c300
-rw-r--r--kernel/trace/ftrace.c1105
-rw-r--r--kernel/trace/kmemtrace.c468
-rw-r--r--kernel/trace/power-traces.c20
-rw-r--r--kernel/trace/ring_buffer.c1849
-rw-r--r--kernel/trace/ring_buffer_benchmark.c91
-rw-r--r--kernel/trace/trace.c1762
-rw-r--r--kernel/trace/trace.h583
-rw-r--r--kernel/trace/trace_boot.c179
-rw-r--r--kernel/trace/trace_branch.c35
-rw-r--r--kernel/trace/trace_clock.c38
-rw-r--r--kernel/trace/trace_entries.h276
-rw-r--r--kernel/trace/trace_event_perf.c216
-rw-r--r--kernel/trace/trace_event_profile.c39
-rw-r--r--kernel/trace/trace_event_types.h175
-rw-r--r--kernel/trace/trace_events.c803
-rw-r--r--kernel/trace/trace_events_filter.c611
-rw-r--r--kernel/trace/trace_export.c273
-rw-r--r--kernel/trace/trace_functions.c15
-rw-r--r--kernel/trace/trace_functions_graph.c824
-rw-r--r--kernel/trace/trace_hw_branches.c309
-rw-r--r--kernel/trace/trace_irqsoff.c279
-rw-r--r--kernel/trace/trace_kdb.c135
-rw-r--r--kernel/trace/trace_kprobe.c1847
-rw-r--r--kernel/trace/trace_mmiotrace.c17
-rw-r--r--kernel/trace/trace_output.c327
-rw-r--r--kernel/trace/trace_output.h4
-rw-r--r--kernel/trace/trace_power.c214
-rw-r--r--kernel/trace/trace_printk.c29
-rw-r--r--kernel/trace/trace_sched_switch.c80
-rw-r--r--kernel/trace/trace_sched_wakeup.c355
-rw-r--r--kernel/trace/trace_selftest.c104
-rw-r--r--kernel/trace/trace_stack.c103
-rw-r--r--kernel/trace/trace_stat.c54
-rw-r--r--kernel/trace/trace_stat.h2
-rw-r--r--kernel/trace/trace_syscalls.c624
-rw-r--r--kernel/trace/trace_sysprof.c328
-rw-r--r--kernel/trace/trace_workqueue.c69
40 files changed, 9416 insertions, 5378 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47e7669..14674dce77a6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -11,38 +11,48 @@ config NOP_TRACER
11 11
12config HAVE_FTRACE_NMI_ENTER 12config HAVE_FTRACE_NMI_ENTER
13 bool 13 bool
14 help
15 See Documentation/trace/ftrace-design.txt
14 16
15config HAVE_FUNCTION_TRACER 17config HAVE_FUNCTION_TRACER
16 bool 18 bool
19 help
20 See Documentation/trace/ftrace-design.txt
17 21
18config HAVE_FUNCTION_GRAPH_TRACER 22config HAVE_FUNCTION_GRAPH_TRACER
19 bool 23 bool
24 help
25 See Documentation/trace/ftrace-design.txt
20 26
21config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
22 bool 28 bool
23 help 29 help
24 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
25 entering and exiting of a function. On exit, the value is compared
26 and if it does not match, then it will panic the kernel.
27 31
28config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
29 bool 33 bool
30 help 34 help
31 This gets selected when the arch tests the function_trace_stop 35 See Documentation/trace/ftrace-design.txt
32 variable at the mcount call site. Otherwise, this variable
33 is tested by the called function.
34 36
35config HAVE_DYNAMIC_FTRACE 37config HAVE_DYNAMIC_FTRACE
36 bool 38 bool
39 help
40 See Documentation/trace/ftrace-design.txt
37 41
38config HAVE_FTRACE_MCOUNT_RECORD 42config HAVE_FTRACE_MCOUNT_RECORD
39 bool 43 bool
44 help
45 See Documentation/trace/ftrace-design.txt
40 46
41config HAVE_HW_BRANCH_TRACER 47config HAVE_SYSCALL_TRACEPOINTS
42 bool 48 bool
49 help
50 See Documentation/trace/ftrace-design.txt
43 51
44config HAVE_FTRACE_SYSCALLS 52config HAVE_C_RECORDMCOUNT
45 bool 53 bool
54 help
55 C version of recordmcount available?
46 56
47config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
48 bool 58 bool
@@ -59,16 +69,36 @@ config EVENT_TRACING
59 select CONTEXT_SWITCH_TRACER 69 select CONTEXT_SWITCH_TRACER
60 bool 70 bool
61 71
72config EVENT_POWER_TRACING_DEPRECATED
73 depends on EVENT_TRACING
74 bool "Deprecated power event trace API, to be removed"
75 default y
76 help
77 Provides old power event types:
78 C-state/idle accounting events:
79 power:power_start
80 power:power_end
81 and old cpufreq accounting event:
82 power:power_frequency
83 This is for userspace compatibility
84 and will vanish after 5 kernel iterations,
85 namely 2.6.41.
86
62config CONTEXT_SWITCH_TRACER 87config CONTEXT_SWITCH_TRACER
63 select MARKERS
64 bool 88 bool
65 89
90config RING_BUFFER_ALLOW_SWAP
91 bool
92 help
93 Allow the use of ring_buffer_swap_cpu.
94 Adds a very slight overhead to tracing when enabled.
95
66# All tracer options should select GENERIC_TRACER. For those options that are 96# All tracer options should select GENERIC_TRACER. For those options that are
67# enabled by all tracers (context switch and event tracer) they select TRACING. 97# enabled by all tracers (context switch and event tracer) they select TRACING.
68# This allows those options to appear when no other tracer is selected. But the 98# This allows those options to appear when no other tracer is selected. But the
69# options do not appear when something else selects it. We need the two options 99# options do not appear when something else selects it. We need the two options
70# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 100# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
71# hidding of the automatic options options. 101# hiding of the automatic options.
72 102
73config TRACING 103config TRACING
74 bool 104 bool
@@ -104,21 +134,21 @@ menuconfig FTRACE
104 bool "Tracers" 134 bool "Tracers"
105 default y if DEBUG_KERNEL 135 default y if DEBUG_KERNEL
106 help 136 help
107 Enable the kernel tracing infrastructure. 137 Enable the kernel tracing infrastructure.
108 138
109if FTRACE 139if FTRACE
110 140
111config FUNCTION_TRACER 141config FUNCTION_TRACER
112 bool "Kernel Function Tracer" 142 bool "Kernel Function Tracer"
113 depends on HAVE_FUNCTION_TRACER 143 depends on HAVE_FUNCTION_TRACER
114 select FRAME_POINTER 144 select FRAME_POINTER if !ARM_UNWIND && !S390
115 select KALLSYMS 145 select KALLSYMS
116 select GENERIC_TRACER 146 select GENERIC_TRACER
117 select CONTEXT_SWITCH_TRACER 147 select CONTEXT_SWITCH_TRACER
118 help 148 help
119 Enable the kernel to trace every kernel function. This is done 149 Enable the kernel to trace every kernel function. This is done
120 by using a compiler feature to insert a small, 5-byte No-Operation 150 by using a compiler feature to insert a small, 5-byte No-Operation
121 instruction to the beginning of every kernel function, which NOP 151 instruction at the beginning of every kernel function, which NOP
122 sequence is then dynamically patched into a tracer call when 152 sequence is then dynamically patched into a tracer call when
123 tracing is enabled by the administrator. If it's runtime disabled 153 tracing is enabled by the administrator. If it's runtime disabled
124 (the bootup default), then the overhead of the instructions is very 154 (the bootup default), then the overhead of the instructions is very
@@ -135,7 +165,7 @@ config FUNCTION_GRAPH_TRACER
135 and its entry. 165 and its entry.
136 Its first purpose is to trace the duration of functions and 166 Its first purpose is to trace the duration of functions and
137 draw a call graph for each thread with some information like 167 draw a call graph for each thread with some information like
138 the return value. This is done by setting the current return 168 the return value. This is done by setting the current return
139 address on the current task structure into a stack of calls. 169 address on the current task structure into a stack of calls.
140 170
141 171
@@ -143,10 +173,11 @@ config IRQSOFF_TRACER
143 bool "Interrupts-off Latency Tracer" 173 bool "Interrupts-off Latency Tracer"
144 default n 174 default n
145 depends on TRACE_IRQFLAGS_SUPPORT 175 depends on TRACE_IRQFLAGS_SUPPORT
146 depends on GENERIC_TIME 176 depends on !ARCH_USES_GETTIMEOFFSET
147 select TRACE_IRQFLAGS 177 select TRACE_IRQFLAGS
148 select GENERIC_TRACER 178 select GENERIC_TRACER
149 select TRACER_MAX_TRACE 179 select TRACER_MAX_TRACE
180 select RING_BUFFER_ALLOW_SWAP
150 help 181 help
151 This option measures the time spent in irqs-off critical 182 This option measures the time spent in irqs-off critical
152 sections, with microsecond accuracy. 183 sections, with microsecond accuracy.
@@ -157,19 +188,20 @@ config IRQSOFF_TRACER
157 188
158 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 189 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
159 190
160 (Note that kernel size and overhead increases with this option 191 (Note that kernel size and overhead increase with this option
161 enabled. This option and the preempt-off timing option can be 192 enabled. This option and the preempt-off timing option can be
162 used together or separately.) 193 used together or separately.)
163 194
164config PREEMPT_TRACER 195config PREEMPT_TRACER
165 bool "Preemption-off Latency Tracer" 196 bool "Preemption-off Latency Tracer"
166 default n 197 default n
167 depends on GENERIC_TIME 198 depends on !ARCH_USES_GETTIMEOFFSET
168 depends on PREEMPT 199 depends on PREEMPT
169 select GENERIC_TRACER 200 select GENERIC_TRACER
170 select TRACER_MAX_TRACE 201 select TRACER_MAX_TRACE
202 select RING_BUFFER_ALLOW_SWAP
171 help 203 help
172 This option measures the time spent in preemption off critical 204 This option measures the time spent in preemption-off critical
173 sections, with microsecond accuracy. 205 sections, with microsecond accuracy.
174 206
175 The default measurement method is a maximum search, which is 207 The default measurement method is a maximum search, which is
@@ -178,19 +210,10 @@ config PREEMPT_TRACER
178 210
179 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 211 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
180 212
181 (Note that kernel size and overhead increases with this option 213 (Note that kernel size and overhead increase with this option
182 enabled. This option and the irqs-off timing option can be 214 enabled. This option and the irqs-off timing option can be
183 used together or separately.) 215 used together or separately.)
184 216
185config SYSPROF_TRACER
186 bool "Sysprof Tracer"
187 depends on X86
188 select GENERIC_TRACER
189 select CONTEXT_SWITCH_TRACER
190 help
191 This tracer provides the trace needed by the 'Sysprof' userspace
192 tool.
193
194config SCHED_TRACER 217config SCHED_TRACER
195 bool "Scheduling Latency Tracer" 218 bool "Scheduling Latency Tracer"
196 select GENERIC_TRACER 219 select GENERIC_TRACER
@@ -205,35 +228,18 @@ config ENABLE_DEFAULT_TRACERS
205 depends on !GENERIC_TRACER 228 depends on !GENERIC_TRACER
206 select TRACING 229 select TRACING
207 help 230 help
208 This tracer hooks to various trace points in the kernel 231 This tracer hooks to various trace points in the kernel,
209 allowing the user to pick and choose which trace point they 232 allowing the user to pick and choose which trace point they
210 want to trace. It also includes the sched_switch tracer plugin. 233 want to trace. It also includes the sched_switch tracer plugin.
211 234
212config FTRACE_SYSCALLS 235config FTRACE_SYSCALLS
213 bool "Trace syscalls" 236 bool "Trace syscalls"
214 depends on HAVE_FTRACE_SYSCALLS 237 depends on HAVE_SYSCALL_TRACEPOINTS
215 select GENERIC_TRACER 238 select GENERIC_TRACER
216 select KALLSYMS 239 select KALLSYMS
217 help 240 help
218 Basic tracer to catch the syscall entry and exit events. 241 Basic tracer to catch the syscall entry and exit events.
219 242
220config BOOT_TRACER
221 bool "Trace boot initcalls"
222 select GENERIC_TRACER
223 select CONTEXT_SWITCH_TRACER
224 help
225 This tracer helps developers to optimize boot times: it records
226 the timings of the initcalls and traces key events and the identity
227 of tasks that can cause boot delays, such as context-switches.
228
229 Its aim is to be parsed by the /scripts/bootgraph.pl tool to
230 produce pretty graphics about boot inefficiencies, giving a visual
231 representation of the delays during initcalls - but the raw
232 /debug/tracing/trace text output is readable too.
233
234 You must pass in ftrace=initcall to the kernel command line
235 to enable this on bootup.
236
237config TRACE_BRANCH_PROFILING 243config TRACE_BRANCH_PROFILING
238 bool 244 bool
239 select GENERIC_TRACER 245 select GENERIC_TRACER
@@ -248,19 +254,19 @@ choice
248 The likely/unlikely profiler only looks at the conditions that 254 The likely/unlikely profiler only looks at the conditions that
249 are annotated with a likely or unlikely macro. 255 are annotated with a likely or unlikely macro.
250 256
251 The "all branch" profiler will profile every if statement in the 257 The "all branch" profiler will profile every if-statement in the
252 kernel. This profiler will also enable the likely/unlikely 258 kernel. This profiler will also enable the likely/unlikely
253 profiler as well. 259 profiler.
254 260
255 Either of the above profilers add a bit of overhead to the system. 261 Either of the above profilers adds a bit of overhead to the system.
256 If unsure choose "No branch profiling". 262 If unsure, choose "No branch profiling".
257 263
258config BRANCH_PROFILE_NONE 264config BRANCH_PROFILE_NONE
259 bool "No branch profiling" 265 bool "No branch profiling"
260 help 266 help
261 No branch profiling. Branch profiling adds a bit of overhead. 267 No branch profiling. Branch profiling adds a bit of overhead.
262 Only enable it if you want to analyse the branching behavior. 268 Only enable it if you want to analyse the branching behavior.
263 Otherwise keep it disabled. 269 Otherwise keep it disabled.
264 270
265config PROFILE_ANNOTATED_BRANCHES 271config PROFILE_ANNOTATED_BRANCHES
266 bool "Trace likely/unlikely profiler" 272 bool "Trace likely/unlikely profiler"
@@ -271,7 +277,7 @@ config PROFILE_ANNOTATED_BRANCHES
271 277
272 /sys/kernel/debug/tracing/profile_annotated_branch 278 /sys/kernel/debug/tracing/profile_annotated_branch
273 279
274 Note: this will add a significant overhead, only turn this 280 Note: this will add a significant overhead; only turn this
275 on if you need to profile the system's use of these macros. 281 on if you need to profile the system's use of these macros.
276 282
277config PROFILE_ALL_BRANCHES 283config PROFILE_ALL_BRANCHES
@@ -288,7 +294,7 @@ config PROFILE_ALL_BRANCHES
288 294
289 This configuration, when enabled, will impose a great overhead 295 This configuration, when enabled, will impose a great overhead
290 on the system. This should only be enabled when the system 296 on the system. This should only be enabled when the system
291 is to be analyzed 297 is to be analyzed in much detail.
292endchoice 298endchoice
293 299
294config TRACING_BRANCHES 300config TRACING_BRANCHES
@@ -313,16 +319,6 @@ config BRANCH_TRACER
313 319
314 Say N if unsure. 320 Say N if unsure.
315 321
316config POWER_TRACER
317 bool "Trace power consumption behavior"
318 depends on X86
319 select GENERIC_TRACER
320 help
321 This tracer helps developers to analyze and optimize the kernels
322 power management decisions, specifically the C-state and P-state
323 behavior.
324
325
326config STACK_TRACER 322config STACK_TRACER
327 bool "Trace max stack" 323 bool "Trace max stack"
328 depends on HAVE_FUNCTION_TRACER 324 depends on HAVE_FUNCTION_TRACER
@@ -347,47 +343,8 @@ config STACK_TRACER
347 343
348 Say N if unsure. 344 Say N if unsure.
349 345
350config HW_BRANCH_TRACER
351 depends on HAVE_HW_BRANCH_TRACER
352 bool "Trace hw branches"
353 select GENERIC_TRACER
354 help
355 This tracer records all branches on the system in a circular
356 buffer giving access to the last N branches for each cpu.
357
358config KMEMTRACE
359 bool "Trace SLAB allocations"
360 select GENERIC_TRACER
361 help
362 kmemtrace provides tracing for slab allocator functions, such as
363 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
364 data is then fed to the userspace application in order to analyse
365 allocation hotspots, internal fragmentation and so on, making it
366 possible to see how well an allocator performs, as well as debug
367 and profile kernel code.
368
369 This requires an userspace application to use. See
370 Documentation/trace/kmemtrace.txt for more information.
371
372 Saying Y will make the kernel somewhat larger and slower. However,
373 if you disable kmemtrace at run-time or boot-time, the performance
374 impact is minimal (depending on the arch the kernel is built for).
375
376 If unsure, say N.
377
378config WORKQUEUE_TRACER
379 bool "Trace workqueues"
380 select GENERIC_TRACER
381 help
382 The workqueue tracer provides some statistical informations
383 about each cpu workqueue thread such as the number of the
384 works inserted and executed since their creation. It can help
385 to evaluate the amount of work each of them have to perform.
386 For example it can help a developer to decide whether he should
387 choose a per cpu workqueue instead of a singlethreaded one.
388
389config BLK_DEV_IO_TRACE 346config BLK_DEV_IO_TRACE
390 bool "Support for tracing block io actions" 347 bool "Support for tracing block IO actions"
391 depends on SYSFS 348 depends on SYSFS
392 depends on BLOCK 349 depends on BLOCK
393 select RELAY 350 select RELAY
@@ -411,38 +368,55 @@ config BLK_DEV_IO_TRACE
411 368
412 If unsure, say N. 369 If unsure, say N.
413 370
371config KPROBE_EVENT
372 depends on KPROBES
373 depends on HAVE_REGS_AND_STACK_ACCESS_API
374 bool "Enable kprobes-based dynamic events"
375 select TRACING
376 default y
377 help
378 This allows the user to add tracing events (similar to tracepoints)
379 on the fly via the ftrace interface. See
380 Documentation/trace/kprobetrace.txt for more details.
381
382 Those events can be inserted wherever kprobes can probe, and record
383 various register and memory values.
384
385 This option is also required by perf-probe subcommand of perf tools.
386 If you want to use perf tools, this option is strongly recommended.
387
414config DYNAMIC_FTRACE 388config DYNAMIC_FTRACE
415 bool "enable/disable ftrace tracepoints dynamically" 389 bool "enable/disable ftrace tracepoints dynamically"
416 depends on FUNCTION_TRACER 390 depends on FUNCTION_TRACER
417 depends on HAVE_DYNAMIC_FTRACE 391 depends on HAVE_DYNAMIC_FTRACE
418 default y 392 default y
419 help 393 help
420 This option will modify all the calls to ftrace dynamically 394 This option will modify all the calls to ftrace dynamically
421 (will patch them out of the binary image and replaces them 395 (will patch them out of the binary image and replace them
422 with a No-Op instruction) as they are called. A table is 396 with a No-Op instruction) as they are called. A table is
423 created to dynamically enable them again. 397 created to dynamically enable them again.
424 398
425 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise 399 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
426 has native performance as long as no tracing is active. 400 otherwise has native performance as long as no tracing is active.
427 401
428 The changes to the code are done by a kernel thread that 402 The changes to the code are done by a kernel thread that
429 wakes up once a second and checks to see if any ftrace calls 403 wakes up once a second and checks to see if any ftrace calls
430 were made. If so, it runs stop_machine (stops all CPUS) 404 were made. If so, it runs stop_machine (stops all CPUS)
431 and modifies the code to jump over the call to ftrace. 405 and modifies the code to jump over the call to ftrace.
432 406
433config FUNCTION_PROFILER 407config FUNCTION_PROFILER
434 bool "Kernel function profiler" 408 bool "Kernel function profiler"
435 depends on FUNCTION_TRACER 409 depends on FUNCTION_TRACER
436 default n 410 default n
437 help 411 help
438 This option enables the kernel function profiler. A file is created 412 This option enables the kernel function profiler. A file is created
439 in debugfs called function_profile_enabled which defaults to zero. 413 in debugfs called function_profile_enabled which defaults to zero.
440 When a 1 is echoed into this file profiling begins, and when a 414 When a 1 is echoed into this file profiling begins, and when a
441 zero is entered, profiling stops. A file in the trace_stats 415 zero is entered, profiling stops. A "functions" file is created in
442 directory called functions, that show the list of functions that 416 the trace_stats directory; this file shows the list of functions that
443 have been hit and their counters. 417 have been hit and their counters.
444 418
445 If in doubt, say N 419 If in doubt, say N.
446 420
447config FTRACE_MCOUNT_RECORD 421config FTRACE_MCOUNT_RECORD
448 def_bool y 422 def_bool y
@@ -462,6 +436,18 @@ config FTRACE_STARTUP_TEST
462 functioning properly. It will do tests on all the configured 436 functioning properly. It will do tests on all the configured
463 tracers of ftrace. 437 tracers of ftrace.
464 438
439config EVENT_TRACE_TEST_SYSCALLS
440 bool "Run selftest on syscall events"
441 depends on FTRACE_STARTUP_TEST
442 help
443 This option will also enable testing every syscall event.
444 It only enables the event and disables it and runs various loads
445 with the event enabled. This adds a bit more time for kernel boot
446 up since it runs this on every system call defined.
447
448 TBD - enable a way to actually call the syscalls as we test their
449 events
450
465config MMIOTRACE 451config MMIOTRACE
466 bool "Memory mapped IO tracing" 452 bool "Memory mapped IO tracing"
467 depends on HAVE_MMIOTRACE_SUPPORT && PCI 453 depends on HAVE_MMIOTRACE_SUPPORT && PCI
@@ -489,8 +475,8 @@ config RING_BUFFER_BENCHMARK
489 tristate "Ring buffer benchmark stress tester" 475 tristate "Ring buffer benchmark stress tester"
490 depends on RING_BUFFER 476 depends on RING_BUFFER
491 help 477 help
492 This option creates a test to stress the ring buffer and bench mark it. 478 This option creates a test to stress the ring buffer and benchmark it.
493 It creates its own ring buffer such that it will not interfer with 479 It creates its own ring buffer such that it will not interfere with
494 any other users of the ring buffer (such as ftrace). It then creates 480 any other users of the ring buffer (such as ftrace). It then creates
495 a producer and consumer that will run for 10 seconds and sleep for 481 a producer and consumer that will run for 10 seconds and sleep for
496 10 seconds. Each interval it will print out the number of events 482 10 seconds. Each interval it will print out the number of events
@@ -499,7 +485,7 @@ config RING_BUFFER_BENCHMARK
499 It does not disable interrupts or raise its priority, so it may be 485 It does not disable interrupts or raise its priority, so it may be
500 affected by processes that are running. 486 affected by processes that are running.
501 487
502 If unsure, say N 488 If unsure, say N.
503 489
504endif # FTRACE 490endif # FTRACE
505 491
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 844164dca90a..761c510a06c5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
30obj-$(CONFIG_TRACING) += trace_stat.o 30obj-$(CONFIG_TRACING) += trace_stat.o
31obj-$(CONFIG_TRACING) += trace_printk.o 31obj-$(CONFIG_TRACING) += trace_printk.o
32obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 32obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
33obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
34obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o 33obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
35obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 34obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
36obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 35obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,12 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
38obj-$(CONFIG_NOP_TRACER) += trace_nop.o 37obj-$(CONFIG_NOP_TRACER) += trace_nop.o
39obj-$(CONFIG_STACK_TRACER) += trace_stack.o 38obj-$(CONFIG_STACK_TRACER) += trace_stack.o
40obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 39obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 40obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 41obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_POWER_TRACER) += trace_power.o
46obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
47obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 42obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
48obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 43obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
49ifeq ($(CONFIG_BLOCK),y) 44ifeq ($(CONFIG_BLOCK),y)
@@ -52,7 +47,14 @@ endif
52obj-$(CONFIG_EVENT_TRACING) += trace_events.o 47obj-$(CONFIG_EVENT_TRACING) += trace_events.o
53obj-$(CONFIG_EVENT_TRACING) += trace_export.o 48obj-$(CONFIG_EVENT_TRACING) += trace_export.o
54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 49obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 50ifeq ($(CONFIG_PERF_EVENTS),y)
51obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
52endif
56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
55obj-$(CONFIG_TRACEPOINTS) += power-traces.o
56ifeq ($(CONFIG_TRACING),y)
57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
58endif
57 59
58libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af6fc30..7b8ec0281548 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h>
24#include <linux/debugfs.h> 25#include <linux/debugfs.h>
25#include <linux/time.h> 26#include <linux/time.h>
26#include <linux/uaccess.h> 27#include <linux/uaccess.h>
@@ -64,13 +65,15 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
64{ 65{
65 struct blk_io_trace *t; 66 struct blk_io_trace *t;
66 struct ring_buffer_event *event = NULL; 67 struct ring_buffer_event *event = NULL;
68 struct ring_buffer *buffer = NULL;
67 int pc = 0; 69 int pc = 0;
68 int cpu = smp_processor_id(); 70 int cpu = smp_processor_id();
69 bool blk_tracer = blk_tracer_enabled; 71 bool blk_tracer = blk_tracer_enabled;
70 72
71 if (blk_tracer) { 73 if (blk_tracer) {
74 buffer = blk_tr->buffer;
72 pc = preempt_count(); 75 pc = preempt_count();
73 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 76 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
74 sizeof(*t) + len, 77 sizeof(*t) + len,
75 0, pc); 78 0, pc);
76 if (!event) 79 if (!event)
@@ -95,7 +98,7 @@ record_it:
95 memcpy((void *) t + sizeof(*t), data, len); 98 memcpy((void *) t + sizeof(*t), data, len);
96 99
97 if (blk_tracer) 100 if (blk_tracer)
98 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 101 trace_buffer_unlock_commit(buffer, event, 0, pc);
99 } 102 }
100} 103}
101 104
@@ -165,9 +168,11 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
165static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
166 BLK_TC_ACT(BLK_TC_WRITE) }; 169 BLK_TC_ACT(BLK_TC_WRITE) };
167 170
171#define BLK_TC_RAHEAD BLK_TC_AHEAD
172
168/* The ilog2() calls fall out because they're constant */ 173/* The ilog2() calls fall out because they're constant */
169#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ 174#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
170 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name)) 175 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
171 176
172/* 177/*
173 * The worker for the various blk_add_trace*() types. Fills out a 178 * The worker for the various blk_add_trace*() types. Fills out a
@@ -178,6 +183,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
178{ 183{
179 struct task_struct *tsk = current; 184 struct task_struct *tsk = current;
180 struct ring_buffer_event *event = NULL; 185 struct ring_buffer_event *event = NULL;
186 struct ring_buffer *buffer = NULL;
181 struct blk_io_trace *t; 187 struct blk_io_trace *t;
182 unsigned long flags = 0; 188 unsigned long flags = 0;
183 unsigned long *sequence; 189 unsigned long *sequence;
@@ -189,9 +195,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
189 return; 195 return;
190 196
191 what |= ddir_act[rw & WRITE]; 197 what |= ddir_act[rw & WRITE];
192 what |= MASK_TC_BIT(rw, BARRIER); 198 what |= MASK_TC_BIT(rw, SYNC);
193 what |= MASK_TC_BIT(rw, SYNCIO); 199 what |= MASK_TC_BIT(rw, RAHEAD);
194 what |= MASK_TC_BIT(rw, AHEAD);
195 what |= MASK_TC_BIT(rw, META); 200 what |= MASK_TC_BIT(rw, META);
196 what |= MASK_TC_BIT(rw, DISCARD); 201 what |= MASK_TC_BIT(rw, DISCARD);
197 202
@@ -203,8 +208,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
203 if (blk_tracer) { 208 if (blk_tracer) {
204 tracing_record_cmdline(current); 209 tracing_record_cmdline(current);
205 210
211 buffer = blk_tr->buffer;
206 pc = preempt_count(); 212 pc = preempt_count();
207 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 213 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
208 sizeof(*t) + pdu_len, 214 sizeof(*t) + pdu_len,
209 0, pc); 215 0, pc);
210 if (!event) 216 if (!event)
@@ -251,7 +257,7 @@ record_it:
251 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 257 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
252 258
253 if (blk_tracer) { 259 if (blk_tracer) {
254 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 260 trace_buffer_unlock_commit(buffer, event, 0, pc);
255 return; 261 return;
256 } 262 }
257 } 263 }
@@ -266,8 +272,8 @@ static void blk_trace_free(struct blk_trace *bt)
266{ 272{
267 debugfs_remove(bt->msg_file); 273 debugfs_remove(bt->msg_file);
268 debugfs_remove(bt->dropped_file); 274 debugfs_remove(bt->dropped_file);
269 debugfs_remove(bt->dir);
270 relay_close(bt->rchan); 275 relay_close(bt->rchan);
276 debugfs_remove(bt->dir);
271 free_percpu(bt->sequence); 277 free_percpu(bt->sequence);
272 free_percpu(bt->msg_data); 278 free_percpu(bt->msg_data);
273 kfree(bt); 279 kfree(bt);
@@ -317,6 +323,7 @@ static const struct file_operations blk_dropped_fops = {
317 .owner = THIS_MODULE, 323 .owner = THIS_MODULE,
318 .open = blk_dropped_open, 324 .open = blk_dropped_open,
319 .read = blk_dropped_read, 325 .read = blk_dropped_read,
326 .llseek = default_llseek,
320}; 327};
321 328
322static int blk_msg_open(struct inode *inode, struct file *filp) 329static int blk_msg_open(struct inode *inode, struct file *filp)
@@ -356,6 +363,7 @@ static const struct file_operations blk_msg_fops = {
356 .owner = THIS_MODULE, 363 .owner = THIS_MODULE,
357 .open = blk_msg_open, 364 .open = blk_msg_open,
358 .write = blk_msg_write, 365 .write = blk_msg_write,
366 .llseek = noop_llseek,
359}; 367};
360 368
361/* 369/*
@@ -377,18 +385,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
377 385
378static int blk_remove_buf_file_callback(struct dentry *dentry) 386static int blk_remove_buf_file_callback(struct dentry *dentry)
379{ 387{
380 struct dentry *parent = dentry->d_parent;
381 debugfs_remove(dentry); 388 debugfs_remove(dentry);
382 389
383 /*
384 * this will fail for all but the last file, but that is ok. what we
385 * care about is the top level buts->name directory going away, when
386 * the last trace file is gone. Then we don't have to rmdir() that
387 * manually on trace stop, so it nicely solves the issue with
388 * force killing of running traces.
389 */
390
391 debugfs_remove(parent);
392 return 0; 390 return 0;
393} 391}
394 392
@@ -545,13 +543,49 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
545 if (ret) 543 if (ret)
546 return ret; 544 return ret;
547 545
548 if (copy_to_user(arg, &buts, sizeof(buts))) 546 if (copy_to_user(arg, &buts, sizeof(buts))) {
547 blk_trace_remove(q);
549 return -EFAULT; 548 return -EFAULT;
550 549 }
551 return 0; 550 return 0;
552} 551}
553EXPORT_SYMBOL_GPL(blk_trace_setup); 552EXPORT_SYMBOL_GPL(blk_trace_setup);
554 553
554#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
555static int compat_blk_trace_setup(struct request_queue *q, char *name,
556 dev_t dev, struct block_device *bdev,
557 char __user *arg)
558{
559 struct blk_user_trace_setup buts;
560 struct compat_blk_user_trace_setup cbuts;
561 int ret;
562
563 if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
564 return -EFAULT;
565
566 buts = (struct blk_user_trace_setup) {
567 .act_mask = cbuts.act_mask,
568 .buf_size = cbuts.buf_size,
569 .buf_nr = cbuts.buf_nr,
570 .start_lba = cbuts.start_lba,
571 .end_lba = cbuts.end_lba,
572 .pid = cbuts.pid,
573 };
574 memcpy(&buts.name, &cbuts.name, 32);
575
576 ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
577 if (ret)
578 return ret;
579
580 if (copy_to_user(arg, &buts.name, 32)) {
581 blk_trace_remove(q);
582 return -EFAULT;
583 }
584
585 return 0;
586}
587#endif
588
555int blk_trace_startstop(struct request_queue *q, int start) 589int blk_trace_startstop(struct request_queue *q, int start)
556{ 590{
557 int ret; 591 int ret;
@@ -611,6 +645,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
611 bdevname(bdev, b); 645 bdevname(bdev, b);
612 ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); 646 ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
613 break; 647 break;
648#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
649 case BLKTRACESETUP32:
650 bdevname(bdev, b);
651 ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
652 break;
653#endif
614 case BLKTRACESTART: 654 case BLKTRACESTART:
615 start = 1; 655 start = 1;
616 case BLKTRACESTOP: 656 case BLKTRACESTOP:
@@ -664,10 +704,13 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
664 if (likely(!bt)) 704 if (likely(!bt))
665 return; 705 return;
666 706
667 if (blk_discard_rq(rq)) 707 if (rq->cmd_flags & REQ_DISCARD)
668 rw |= (1 << BIO_RW_DISCARD); 708 rw |= REQ_DISCARD;
709
710 if (rq->cmd_flags & REQ_SECURE)
711 rw |= REQ_SECURE;
669 712
670 if (blk_pc_request(rq)) { 713 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
671 what |= BLK_TC_ACT(BLK_TC_PC); 714 what |= BLK_TC_ACT(BLK_TC_PC);
672 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, 715 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
673 what, rq->errors, rq->cmd_len, rq->cmd); 716 what, rq->errors, rq->cmd_len, rq->cmd);
@@ -678,28 +721,33 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
678 } 721 }
679} 722}
680 723
681static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) 724static void blk_add_trace_rq_abort(void *ignore,
725 struct request_queue *q, struct request *rq)
682{ 726{
683 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 727 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
684} 728}
685 729
686static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) 730static void blk_add_trace_rq_insert(void *ignore,
731 struct request_queue *q, struct request *rq)
687{ 732{
688 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 733 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
689} 734}
690 735
691static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) 736static void blk_add_trace_rq_issue(void *ignore,
737 struct request_queue *q, struct request *rq)
692{ 738{
693 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 739 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
694} 740}
695 741
696static void blk_add_trace_rq_requeue(struct request_queue *q, 742static void blk_add_trace_rq_requeue(void *ignore,
743 struct request_queue *q,
697 struct request *rq) 744 struct request *rq)
698{ 745{
699 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 746 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
700} 747}
701 748
702static void blk_add_trace_rq_complete(struct request_queue *q, 749static void blk_add_trace_rq_complete(void *ignore,
750 struct request_queue *q,
703 struct request *rq) 751 struct request *rq)
704{ 752{
705 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 753 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
@@ -727,34 +775,40 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
727 !bio_flagged(bio, BIO_UPTODATE), 0, NULL); 775 !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
728} 776}
729 777
730static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) 778static void blk_add_trace_bio_bounce(void *ignore,
779 struct request_queue *q, struct bio *bio)
731{ 780{
732 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); 781 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
733} 782}
734 783
735static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) 784static void blk_add_trace_bio_complete(void *ignore,
785 struct request_queue *q, struct bio *bio)
736{ 786{
737 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 787 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
738} 788}
739 789
740static void blk_add_trace_bio_backmerge(struct request_queue *q, 790static void blk_add_trace_bio_backmerge(void *ignore,
791 struct request_queue *q,
741 struct bio *bio) 792 struct bio *bio)
742{ 793{
743 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 794 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
744} 795}
745 796
746static void blk_add_trace_bio_frontmerge(struct request_queue *q, 797static void blk_add_trace_bio_frontmerge(void *ignore,
798 struct request_queue *q,
747 struct bio *bio) 799 struct bio *bio)
748{ 800{
749 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 801 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
750} 802}
751 803
752static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) 804static void blk_add_trace_bio_queue(void *ignore,
805 struct request_queue *q, struct bio *bio)
753{ 806{
754 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 807 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
755} 808}
756 809
757static void blk_add_trace_getrq(struct request_queue *q, 810static void blk_add_trace_getrq(void *ignore,
811 struct request_queue *q,
758 struct bio *bio, int rw) 812 struct bio *bio, int rw)
759{ 813{
760 if (bio) 814 if (bio)
@@ -768,7 +822,8 @@ static void blk_add_trace_getrq(struct request_queue *q,
768} 822}
769 823
770 824
771static void blk_add_trace_sleeprq(struct request_queue *q, 825static void blk_add_trace_sleeprq(void *ignore,
826 struct request_queue *q,
772 struct bio *bio, int rw) 827 struct bio *bio, int rw)
773{ 828{
774 if (bio) 829 if (bio)
@@ -782,7 +837,7 @@ static void blk_add_trace_sleeprq(struct request_queue *q,
782 } 837 }
783} 838}
784 839
785static void blk_add_trace_plug(struct request_queue *q) 840static void blk_add_trace_plug(void *ignore, struct request_queue *q)
786{ 841{
787 struct blk_trace *bt = q->blk_trace; 842 struct blk_trace *bt = q->blk_trace;
788 843
@@ -790,7 +845,7 @@ static void blk_add_trace_plug(struct request_queue *q)
790 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); 845 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
791} 846}
792 847
793static void blk_add_trace_unplug_io(struct request_queue *q) 848static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q)
794{ 849{
795 struct blk_trace *bt = q->blk_trace; 850 struct blk_trace *bt = q->blk_trace;
796 851
@@ -803,7 +858,7 @@ static void blk_add_trace_unplug_io(struct request_queue *q)
803 } 858 }
804} 859}
805 860
806static void blk_add_trace_unplug_timer(struct request_queue *q) 861static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q)
807{ 862{
808 struct blk_trace *bt = q->blk_trace; 863 struct blk_trace *bt = q->blk_trace;
809 864
@@ -816,7 +871,8 @@ static void blk_add_trace_unplug_timer(struct request_queue *q)
816 } 871 }
817} 872}
818 873
819static void blk_add_trace_split(struct request_queue *q, struct bio *bio, 874static void blk_add_trace_split(void *ignore,
875 struct request_queue *q, struct bio *bio,
820 unsigned int pdu) 876 unsigned int pdu)
821{ 877{
822 struct blk_trace *bt = q->blk_trace; 878 struct blk_trace *bt = q->blk_trace;
@@ -832,6 +888,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
832 888
833/** 889/**
834 * blk_add_trace_remap - Add a trace for a remap operation 890 * blk_add_trace_remap - Add a trace for a remap operation
891 * @ignore: trace callback data parameter (not used)
835 * @q: queue the io is for 892 * @q: queue the io is for
836 * @bio: the source bio 893 * @bio: the source bio
837 * @dev: target device 894 * @dev: target device
@@ -842,8 +899,9 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
842 * it spans a stripe (or similar). Add a trace for that action. 899 * it spans a stripe (or similar). Add a trace for that action.
843 * 900 *
844 **/ 901 **/
845static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, 902static void blk_add_trace_remap(void *ignore,
846 dev_t dev, sector_t from) 903 struct request_queue *q, struct bio *bio,
904 dev_t dev, sector_t from)
847{ 905{
848 struct blk_trace *bt = q->blk_trace; 906 struct blk_trace *bt = q->blk_trace;
849 struct blk_io_trace_remap r; 907 struct blk_io_trace_remap r;
@@ -861,6 +919,39 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
861} 919}
862 920
863/** 921/**
922 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
923 * @ignore: trace callback data parameter (not used)
924 * @q: queue the io is for
925 * @rq: the source request
926 * @dev: target device
927 * @from: source sector
928 *
929 * Description:
930 * Device mapper remaps request to other devices.
931 * Add a trace for that action.
932 *
933 **/
934static void blk_add_trace_rq_remap(void *ignore,
935 struct request_queue *q,
936 struct request *rq, dev_t dev,
937 sector_t from)
938{
939 struct blk_trace *bt = q->blk_trace;
940 struct blk_io_trace_remap r;
941
942 if (likely(!bt))
943 return;
944
945 r.device_from = cpu_to_be32(dev);
946 r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
947 r.sector_from = cpu_to_be64(from);
948
949 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
950 rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
951 sizeof(r), &r);
952}
953
954/**
864 * blk_add_driver_data - Add binary message with driver-specific data 955 * blk_add_driver_data - Add binary message with driver-specific data
865 * @q: queue the io is for 956 * @q: queue the io is for
866 * @rq: io request 957 * @rq: io request
@@ -880,7 +971,7 @@ void blk_add_driver_data(struct request_queue *q,
880 if (likely(!bt)) 971 if (likely(!bt))
881 return; 972 return;
882 973
883 if (blk_pc_request(rq)) 974 if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
884 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, 975 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
885 BLK_TA_DRV_DATA, rq->errors, len, data); 976 BLK_TA_DRV_DATA, rq->errors, len, data);
886 else 977 else
@@ -893,61 +984,64 @@ static void blk_register_tracepoints(void)
893{ 984{
894 int ret; 985 int ret;
895 986
896 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); 987 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
988 WARN_ON(ret);
989 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
897 WARN_ON(ret); 990 WARN_ON(ret);
898 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); 991 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
899 WARN_ON(ret); 992 WARN_ON(ret);
900 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); 993 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
901 WARN_ON(ret); 994 WARN_ON(ret);
902 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); 995 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
903 WARN_ON(ret); 996 WARN_ON(ret);
904 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); 997 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
905 WARN_ON(ret); 998 WARN_ON(ret);
906 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); 999 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
907 WARN_ON(ret); 1000 WARN_ON(ret);
908 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); 1001 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
909 WARN_ON(ret); 1002 WARN_ON(ret);
910 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 1003 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
911 WARN_ON(ret); 1004 WARN_ON(ret);
912 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 1005 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
913 WARN_ON(ret); 1006 WARN_ON(ret);
914 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); 1007 ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
915 WARN_ON(ret); 1008 WARN_ON(ret);
916 ret = register_trace_block_getrq(blk_add_trace_getrq); 1009 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
917 WARN_ON(ret); 1010 WARN_ON(ret);
918 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); 1011 ret = register_trace_block_plug(blk_add_trace_plug, NULL);
919 WARN_ON(ret); 1012 WARN_ON(ret);
920 ret = register_trace_block_plug(blk_add_trace_plug); 1013 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
921 WARN_ON(ret); 1014 WARN_ON(ret);
922 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); 1015 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
923 WARN_ON(ret); 1016 WARN_ON(ret);
924 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); 1017 ret = register_trace_block_split(blk_add_trace_split, NULL);
925 WARN_ON(ret); 1018 WARN_ON(ret);
926 ret = register_trace_block_split(blk_add_trace_split); 1019 ret = register_trace_block_remap(blk_add_trace_remap, NULL);
927 WARN_ON(ret); 1020 WARN_ON(ret);
928 ret = register_trace_block_remap(blk_add_trace_remap); 1021 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
929 WARN_ON(ret); 1022 WARN_ON(ret);
930} 1023}
931 1024
932static void blk_unregister_tracepoints(void) 1025static void blk_unregister_tracepoints(void)
933{ 1026{
934 unregister_trace_block_remap(blk_add_trace_remap); 1027 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
935 unregister_trace_block_split(blk_add_trace_split); 1028 unregister_trace_block_remap(blk_add_trace_remap, NULL);
936 unregister_trace_block_unplug_io(blk_add_trace_unplug_io); 1029 unregister_trace_block_split(blk_add_trace_split, NULL);
937 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); 1030 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
938 unregister_trace_block_plug(blk_add_trace_plug); 1031 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
939 unregister_trace_block_sleeprq(blk_add_trace_sleeprq); 1032 unregister_trace_block_plug(blk_add_trace_plug, NULL);
940 unregister_trace_block_getrq(blk_add_trace_getrq); 1033 unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
941 unregister_trace_block_bio_queue(blk_add_trace_bio_queue); 1034 unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
942 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 1035 unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
943 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 1036 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
944 unregister_trace_block_bio_complete(blk_add_trace_bio_complete); 1037 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
945 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); 1038 unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
946 unregister_trace_block_rq_complete(blk_add_trace_rq_complete); 1039 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
947 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); 1040 unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
948 unregister_trace_block_rq_issue(blk_add_trace_rq_issue); 1041 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
949 unregister_trace_block_rq_insert(blk_add_trace_rq_insert); 1042 unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
950 unregister_trace_block_rq_abort(blk_add_trace_rq_abort); 1043 unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
1044 unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
951 1045
952 tracepoint_synchronize_unregister(); 1046 tracepoint_synchronize_unregister();
953} 1047}
@@ -1290,7 +1384,7 @@ out:
1290} 1384}
1291 1385
1292static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1386static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1293 int flags) 1387 int flags, struct trace_event *event)
1294{ 1388{
1295 return print_one_line(iter, false); 1389 return print_one_line(iter, false);
1296} 1390}
@@ -1312,7 +1406,8 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1312} 1406}
1313 1407
1314static enum print_line_t 1408static enum print_line_t
1315blk_trace_event_print_binary(struct trace_iterator *iter, int flags) 1409blk_trace_event_print_binary(struct trace_iterator *iter, int flags,
1410 struct trace_event *event)
1316{ 1411{
1317 return blk_trace_synthesize_old_trace(iter) ? 1412 return blk_trace_synthesize_old_trace(iter) ?
1318 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1413 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
@@ -1350,12 +1445,16 @@ static struct tracer blk_tracer __read_mostly = {
1350 .set_flag = blk_tracer_set_flag, 1445 .set_flag = blk_tracer_set_flag,
1351}; 1446};
1352 1447
1353static struct trace_event trace_blk_event = { 1448static struct trace_event_functions trace_blk_event_funcs = {
1354 .type = TRACE_BLK,
1355 .trace = blk_trace_event_print, 1449 .trace = blk_trace_event_print,
1356 .binary = blk_trace_event_print_binary, 1450 .binary = blk_trace_event_print_binary,
1357}; 1451};
1358 1452
1453static struct trace_event trace_blk_event = {
1454 .type = TRACE_BLK,
1455 .funcs = &trace_blk_event_funcs,
1456};
1457
1359static int __init init_blk_tracer(void) 1458static int __init init_blk_tracer(void)
1360{ 1459{
1361 if (!register_ftrace_event(&trace_blk_event)) { 1460 if (!register_ftrace_event(&trace_blk_event)) {
@@ -1550,10 +1649,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1550 struct block_device *bdev; 1649 struct block_device *bdev;
1551 ssize_t ret = -ENXIO; 1650 ssize_t ret = -ENXIO;
1552 1651
1553 lock_kernel();
1554 bdev = bdget(part_devt(p)); 1652 bdev = bdget(part_devt(p));
1555 if (bdev == NULL) 1653 if (bdev == NULL)
1556 goto out_unlock_kernel; 1654 goto out;
1557 1655
1558 q = blk_trace_get_queue(bdev); 1656 q = blk_trace_get_queue(bdev);
1559 if (q == NULL) 1657 if (q == NULL)
@@ -1581,8 +1679,7 @@ out_unlock_bdev:
1581 mutex_unlock(&bdev->bd_mutex); 1679 mutex_unlock(&bdev->bd_mutex);
1582out_bdput: 1680out_bdput:
1583 bdput(bdev); 1681 bdput(bdev);
1584out_unlock_kernel: 1682out:
1585 unlock_kernel();
1586 return ret; 1683 return ret;
1587} 1684}
1588 1685
@@ -1612,11 +1709,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1612 1709
1613 ret = -ENXIO; 1710 ret = -ENXIO;
1614 1711
1615 lock_kernel();
1616 p = dev_to_part(dev); 1712 p = dev_to_part(dev);
1617 bdev = bdget(part_devt(p)); 1713 bdev = bdget(part_devt(p));
1618 if (bdev == NULL) 1714 if (bdev == NULL)
1619 goto out_unlock_kernel; 1715 goto out;
1620 1716
1621 q = blk_trace_get_queue(bdev); 1717 q = blk_trace_get_queue(bdev);
1622 if (q == NULL) 1718 if (q == NULL)
@@ -1651,8 +1747,6 @@ out_unlock_bdev:
1651 mutex_unlock(&bdev->bd_mutex); 1747 mutex_unlock(&bdev->bd_mutex);
1652out_bdput: 1748out_bdput:
1653 bdput(bdev); 1749 bdput(bdev);
1654out_unlock_kernel:
1655 unlock_kernel();
1656out: 1750out:
1657 return ret ? ret : count; 1751 return ret ? ret : count;
1658} 1752}
@@ -1662,6 +1756,11 @@ int blk_trace_init_sysfs(struct device *dev)
1662 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); 1756 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
1663} 1757}
1664 1758
1759void blk_trace_remove_sysfs(struct device *dev)
1760{
1761 sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
1762}
1763
1665#endif /* CONFIG_BLK_DEV_IO_TRACE */ 1764#endif /* CONFIG_BLK_DEV_IO_TRACE */
1666 1765
1667#ifdef CONFIG_EVENT_TRACING 1766#ifdef CONFIG_EVENT_TRACING
@@ -1672,7 +1771,7 @@ void blk_dump_cmd(char *buf, struct request *rq)
1672 int len = rq->cmd_len; 1771 int len = rq->cmd_len;
1673 unsigned char *cmd = rq->cmd; 1772 unsigned char *cmd = rq->cmd;
1674 1773
1675 if (!blk_pc_request(rq)) { 1774 if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
1676 buf[0] = '\0'; 1775 buf[0] = '\0';
1677 return; 1776 return;
1678 } 1777 }
@@ -1697,21 +1796,21 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1697 1796
1698 if (rw & WRITE) 1797 if (rw & WRITE)
1699 rwbs[i++] = 'W'; 1798 rwbs[i++] = 'W';
1700 else if (rw & 1 << BIO_RW_DISCARD) 1799 else if (rw & REQ_DISCARD)
1701 rwbs[i++] = 'D'; 1800 rwbs[i++] = 'D';
1702 else if (bytes) 1801 else if (bytes)
1703 rwbs[i++] = 'R'; 1802 rwbs[i++] = 'R';
1704 else 1803 else
1705 rwbs[i++] = 'N'; 1804 rwbs[i++] = 'N';
1706 1805
1707 if (rw & 1 << BIO_RW_AHEAD) 1806 if (rw & REQ_RAHEAD)
1708 rwbs[i++] = 'A'; 1807 rwbs[i++] = 'A';
1709 if (rw & 1 << BIO_RW_BARRIER) 1808 if (rw & REQ_SYNC)
1710 rwbs[i++] = 'B';
1711 if (rw & 1 << BIO_RW_SYNCIO)
1712 rwbs[i++] = 'S'; 1809 rwbs[i++] = 'S';
1713 if (rw & 1 << BIO_RW_META) 1810 if (rw & REQ_META)
1714 rwbs[i++] = 'M'; 1811 rwbs[i++] = 'M';
1812 if (rw & REQ_SECURE)
1813 rwbs[i++] = 'E';
1715 1814
1716 rwbs[i] = '\0'; 1815 rwbs[i] = '\0';
1717} 1816}
@@ -1721,8 +1820,11 @@ void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
1721 int rw = rq->cmd_flags & 0x03; 1820 int rw = rq->cmd_flags & 0x03;
1722 int bytes; 1821 int bytes;
1723 1822
1724 if (blk_discard_rq(rq)) 1823 if (rq->cmd_flags & REQ_DISCARD)
1725 rw |= (1 << BIO_RW_DISCARD); 1824 rw |= REQ_DISCARD;
1825
1826 if (rq->cmd_flags & REQ_SECURE)
1827 rw |= REQ_SECURE;
1726 1828
1727 bytes = blk_rq_bytes(rq); 1829 bytes = blk_rq_bytes(rq);
1728 1830
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3718d55fb4c3..f3dadae83883 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,12 +22,13 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/slab.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31#include <linux/rcupdate.h>
31 32
32#include <trace/events/sched.h> 33#include <trace/events/sched.h>
33 34
@@ -60,6 +61,13 @@ static int last_ftrace_enabled;
60/* Quick disabling of function tracer. */ 61/* Quick disabling of function tracer. */
61int function_trace_stop; 62int function_trace_stop;
62 63
64/* List for set_ftrace_pid's pids. */
65LIST_HEAD(ftrace_pids);
66struct ftrace_pid {
67 struct list_head list;
68 struct pid *pid;
69};
70
63/* 71/*
64 * ftrace_disabled is set when an anomaly is discovered. 72 * ftrace_disabled is set when an anomaly is discovered.
65 * ftrace_disabled is much stronger than ftrace_enabled. 73 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -78,18 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
78ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 86ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
79ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 87ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
80 88
89/*
90 * Traverse the ftrace_list, invoking all entries. The reason that we
91 * can use rcu_dereference_raw() is that elements removed from this list
92 * are simply leaked, so there is no need to interact with a grace-period
93 * mechanism. The rcu_dereference_raw() calls are needed to handle
94 * concurrent insertions into the ftrace_list.
95 *
96 * Silly Alpha and silly pointer-speculation compiler optimizations!
97 */
81static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 98static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
82{ 99{
83 struct ftrace_ops *op = ftrace_list; 100 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
84
85 /* in case someone actually ports this to alpha! */
86 read_barrier_depends();
87 101
88 while (op != &ftrace_list_end) { 102 while (op != &ftrace_list_end) {
89 /* silly alpha */
90 read_barrier_depends();
91 op->func(ip, parent_ip); 103 op->func(ip, parent_ip);
92 op = op->next; 104 op = rcu_dereference_raw(op->next); /*see above*/
93 }; 105 };
94} 106}
95 107
@@ -144,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
144 * the ops->next pointer is valid before another CPU sees 156 * the ops->next pointer is valid before another CPU sees
145 * the ops pointer included into the ftrace_list. 157 * the ops pointer included into the ftrace_list.
146 */ 158 */
147 smp_wmb(); 159 rcu_assign_pointer(ftrace_list, ops);
148 ftrace_list = ops;
149 160
150 if (ftrace_enabled) { 161 if (ftrace_enabled) {
151 ftrace_func_t func; 162 ftrace_func_t func;
@@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 else 166 else
156 func = ftrace_list_func; 167 func = ftrace_list_func;
157 168
158 if (ftrace_pid_trace) { 169 if (!list_empty(&ftrace_pids)) {
159 set_ftrace_pid_function(func); 170 set_ftrace_pid_function(func);
160 func = ftrace_pid_func; 171 func = ftrace_pid_func;
161 } 172 }
@@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
203 if (ftrace_list->next == &ftrace_list_end) { 214 if (ftrace_list->next == &ftrace_list_end) {
204 ftrace_func_t func = ftrace_list->func; 215 ftrace_func_t func = ftrace_list->func;
205 216
206 if (ftrace_pid_trace) { 217 if (!list_empty(&ftrace_pids)) {
207 set_ftrace_pid_function(func); 218 set_ftrace_pid_function(func);
208 func = ftrace_pid_func; 219 func = ftrace_pid_func;
209 } 220 }
@@ -225,9 +236,13 @@ static void ftrace_update_pid_func(void)
225 if (ftrace_trace_function == ftrace_stub) 236 if (ftrace_trace_function == ftrace_stub)
226 return; 237 return;
227 238
239#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
228 func = ftrace_trace_function; 240 func = ftrace_trace_function;
241#else
242 func = __ftrace_trace_function;
243#endif
229 244
230 if (ftrace_pid_trace) { 245 if (!list_empty(&ftrace_pids)) {
231 set_ftrace_pid_function(func); 246 set_ftrace_pid_function(func);
232 func = ftrace_pid_func; 247 func = ftrace_pid_func;
233 } else { 248 } else {
@@ -249,6 +264,7 @@ struct ftrace_profile {
249 unsigned long counter; 264 unsigned long counter;
250#ifdef CONFIG_FUNCTION_GRAPH_TRACER 265#ifdef CONFIG_FUNCTION_GRAPH_TRACER
251 unsigned long long time; 266 unsigned long long time;
267 unsigned long long time_squared;
252#endif 268#endif
253}; 269};
254 270
@@ -291,7 +307,9 @@ function_stat_next(void *v, int idx)
291 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); 307 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
292 308
293 again: 309 again:
294 rec++; 310 if (idx != 0)
311 rec++;
312
295 if ((void *)rec >= (void *)&pg->records[pg->index]) { 313 if ((void *)rec >= (void *)&pg->records[pg->index]) {
296 pg = pg->next; 314 pg = pg->next;
297 if (!pg) 315 if (!pg)
@@ -349,9 +367,9 @@ static int function_stat_headers(struct seq_file *m)
349{ 367{
350#ifdef CONFIG_FUNCTION_GRAPH_TRACER 368#ifdef CONFIG_FUNCTION_GRAPH_TRACER
351 seq_printf(m, " Function " 369 seq_printf(m, " Function "
352 "Hit Time Avg\n" 370 "Hit Time Avg s^2\n"
353 " -------- " 371 " -------- "
354 "--- ---- ---\n"); 372 "--- ---- --- ---\n");
355#else 373#else
356 seq_printf(m, " Function Hit\n" 374 seq_printf(m, " Function Hit\n"
357 " -------- ---\n"); 375 " -------- ---\n");
@@ -363,11 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v)
363{ 381{
364 struct ftrace_profile *rec = v; 382 struct ftrace_profile *rec = v;
365 char str[KSYM_SYMBOL_LEN]; 383 char str[KSYM_SYMBOL_LEN];
384 int ret = 0;
366#ifdef CONFIG_FUNCTION_GRAPH_TRACER 385#ifdef CONFIG_FUNCTION_GRAPH_TRACER
367 static DEFINE_MUTEX(mutex);
368 static struct trace_seq s; 386 static struct trace_seq s;
369 unsigned long long avg; 387 unsigned long long avg;
388 unsigned long long stddev;
370#endif 389#endif
390 mutex_lock(&ftrace_profile_lock);
391
392 /* we raced with function_profile_reset() */
393 if (unlikely(rec->counter == 0)) {
394 ret = -EBUSY;
395 goto out;
396 }
371 397
372 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 398 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
373 seq_printf(m, " %-30.30s %10lu", str, rec->counter); 399 seq_printf(m, " %-30.30s %10lu", str, rec->counter);
@@ -377,17 +403,31 @@ static int function_stat_show(struct seq_file *m, void *v)
377 avg = rec->time; 403 avg = rec->time;
378 do_div(avg, rec->counter); 404 do_div(avg, rec->counter);
379 405
380 mutex_lock(&mutex); 406 /* Sample standard deviation (s^2) */
407 if (rec->counter <= 1)
408 stddev = 0;
409 else {
410 stddev = rec->time_squared - rec->counter * avg * avg;
411 /*
412 * Divide only 1000 for ns^2 -> us^2 conversion.
413 * trace_print_graph_duration will divide 1000 again.
414 */
415 do_div(stddev, (rec->counter - 1) * 1000);
416 }
417
381 trace_seq_init(&s); 418 trace_seq_init(&s);
382 trace_print_graph_duration(rec->time, &s); 419 trace_print_graph_duration(rec->time, &s);
383 trace_seq_puts(&s, " "); 420 trace_seq_puts(&s, " ");
384 trace_print_graph_duration(avg, &s); 421 trace_print_graph_duration(avg, &s);
422 trace_seq_puts(&s, " ");
423 trace_print_graph_duration(stddev, &s);
385 trace_print_seq(m, &s); 424 trace_print_seq(m, &s);
386 mutex_unlock(&mutex);
387#endif 425#endif
388 seq_putc(m, '\n'); 426 seq_putc(m, '\n');
427out:
428 mutex_unlock(&ftrace_profile_lock);
389 429
390 return 0; 430 return ret;
391} 431}
392 432
393static void ftrace_profile_reset(struct ftrace_profile_stat *stat) 433static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
@@ -633,6 +673,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
633 if (!stat->hash || !ftrace_profile_enabled) 673 if (!stat->hash || !ftrace_profile_enabled)
634 goto out; 674 goto out;
635 675
676 /* If the calltime was zero'd ignore it */
677 if (!trace->calltime)
678 goto out;
679
636 calltime = trace->rettime - trace->calltime; 680 calltime = trace->rettime - trace->calltime;
637 681
638 if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { 682 if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
@@ -651,8 +695,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
651 } 695 }
652 696
653 rec = ftrace_find_profiled_func(stat, trace->func); 697 rec = ftrace_find_profiled_func(stat, trace->func);
654 if (rec) 698 if (rec) {
655 rec->time += calltime; 699 rec->time += calltime;
700 rec->time_squared += calltime * calltime;
701 }
656 702
657 out: 703 out:
658 local_irq_restore(flags); 704 local_irq_restore(flags);
@@ -734,7 +780,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
734 out: 780 out:
735 mutex_unlock(&ftrace_profile_lock); 781 mutex_unlock(&ftrace_profile_lock);
736 782
737 filp->f_pos += cnt; 783 *ppos += cnt;
738 784
739 return cnt; 785 return cnt;
740} 786}
@@ -754,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = {
754 .open = tracing_open_generic, 800 .open = tracing_open_generic,
755 .read = ftrace_profile_read, 801 .read = ftrace_profile_read,
756 .write = ftrace_profile_write, 802 .write = ftrace_profile_write,
803 .llseek = default_llseek,
757}; 804};
758 805
759/* used to initialize the real stat files */ 806/* used to initialize the real stat files */
@@ -766,7 +813,7 @@ static struct tracer_stat function_stats __initdata = {
766 .stat_show = function_stat_show 813 .stat_show = function_stat_show
767}; 814};
768 815
769static void ftrace_profile_debugfs(struct dentry *d_tracer) 816static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
770{ 817{
771 struct ftrace_profile_stat *stat; 818 struct ftrace_profile_stat *stat;
772 struct dentry *entry; 819 struct dentry *entry;
@@ -784,7 +831,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
784 * The files created are permanent, if something happens 831 * The files created are permanent, if something happens
785 * we still do not free memory. 832 * we still do not free memory.
786 */ 833 */
787 kfree(stat);
788 WARN(1, 834 WARN(1,
789 "Could not allocate stat file for cpu %d\n", 835 "Could not allocate stat file for cpu %d\n",
790 cpu); 836 cpu);
@@ -811,13 +857,11 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
811} 857}
812 858
813#else /* CONFIG_FUNCTION_PROFILER */ 859#else /* CONFIG_FUNCTION_PROFILER */
814static void ftrace_profile_debugfs(struct dentry *d_tracer) 860static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
815{ 861{
816} 862}
817#endif /* CONFIG_FUNCTION_PROFILER */ 863#endif /* CONFIG_FUNCTION_PROFILER */
818 864
819/* set when tracing only a pid */
820struct pid *ftrace_pid_trace;
821static struct pid * const ftrace_swapper_pid = &init_struct_pid; 865static struct pid * const ftrace_swapper_pid = &init_struct_pid;
822 866
823#ifdef CONFIG_DYNAMIC_FTRACE 867#ifdef CONFIG_DYNAMIC_FTRACE
@@ -841,10 +885,8 @@ enum {
841 FTRACE_ENABLE_CALLS = (1 << 0), 885 FTRACE_ENABLE_CALLS = (1 << 0),
842 FTRACE_DISABLE_CALLS = (1 << 1), 886 FTRACE_DISABLE_CALLS = (1 << 1),
843 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 887 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
844 FTRACE_ENABLE_MCOUNT = (1 << 3), 888 FTRACE_START_FUNC_RET = (1 << 3),
845 FTRACE_DISABLE_MCOUNT = (1 << 4), 889 FTRACE_STOP_FUNC_RET = (1 << 4),
846 FTRACE_START_FUNC_RET = (1 << 5),
847 FTRACE_STOP_FUNC_RET = (1 << 6),
848}; 890};
849 891
850static int ftrace_filtered; 892static int ftrace_filtered;
@@ -884,36 +926,6 @@ static struct dyn_ftrace *ftrace_free_records;
884 } \ 926 } \
885 } 927 }
886 928
887#ifdef CONFIG_KPROBES
888
889static int frozen_record_count;
890
891static inline void freeze_record(struct dyn_ftrace *rec)
892{
893 if (!(rec->flags & FTRACE_FL_FROZEN)) {
894 rec->flags |= FTRACE_FL_FROZEN;
895 frozen_record_count++;
896 }
897}
898
899static inline void unfreeze_record(struct dyn_ftrace *rec)
900{
901 if (rec->flags & FTRACE_FL_FROZEN) {
902 rec->flags &= ~FTRACE_FL_FROZEN;
903 frozen_record_count--;
904 }
905}
906
907static inline int record_frozen(struct dyn_ftrace *rec)
908{
909 return rec->flags & FTRACE_FL_FROZEN;
910}
911#else
912# define freeze_record(rec) ({ 0; })
913# define unfreeze_record(rec) ({ 0; })
914# define record_frozen(rec) ({ 0; })
915#endif /* CONFIG_KPROBES */
916
917static void ftrace_free_rec(struct dyn_ftrace *rec) 929static void ftrace_free_rec(struct dyn_ftrace *rec)
918{ 930{
919 rec->freelist = ftrace_free_records; 931 rec->freelist = ftrace_free_records;
@@ -1011,75 +1023,54 @@ static void ftrace_bug(int failed, unsigned long ip)
1011} 1023}
1012 1024
1013 1025
1026/* Return 1 if the address range is reserved for ftrace */
1027int ftrace_text_reserved(void *start, void *end)
1028{
1029 struct dyn_ftrace *rec;
1030 struct ftrace_page *pg;
1031
1032 do_for_each_ftrace_rec(pg, rec) {
1033 if (rec->ip <= (unsigned long)end &&
1034 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1035 return 1;
1036 } while_for_each_ftrace_rec();
1037 return 0;
1038}
1039
1040
1014static int 1041static int
1015__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1042__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1016{ 1043{
1017 unsigned long ftrace_addr; 1044 unsigned long ftrace_addr;
1018 unsigned long ip, fl; 1045 unsigned long flag = 0UL;
1019 1046
1020 ftrace_addr = (unsigned long)FTRACE_ADDR; 1047 ftrace_addr = (unsigned long)FTRACE_ADDR;
1021 1048
1022 ip = rec->ip;
1023
1024 /* 1049 /*
1025 * If this record is not to be traced and 1050 * If this record is not to be traced or we want to disable it,
1026 * it is not enabled then do nothing. 1051 * then disable it.
1027 * 1052 *
1028 * If this record is not to be traced and 1053 * If we want to enable it and filtering is off, then enable it.
1029 * it is enabled then disable it.
1030 * 1054 *
1055 * If we want to enable it and filtering is on, enable it only if
1056 * it's filtered
1031 */ 1057 */
1032 if (rec->flags & FTRACE_FL_NOTRACE) { 1058 if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) {
1033 if (rec->flags & FTRACE_FL_ENABLED) 1059 if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
1034 rec->flags &= ~FTRACE_FL_ENABLED; 1060 flag = FTRACE_FL_ENABLED;
1035 else 1061 }
1036 return 0;
1037
1038 } else if (ftrace_filtered && enable) {
1039 /*
1040 * Filtering is on:
1041 */
1042
1043 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
1044
1045 /* Record is filtered and enabled, do nothing */
1046 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
1047 return 0;
1048
1049 /* Record is not filtered or enabled, do nothing */
1050 if (!fl)
1051 return 0;
1052
1053 /* Record is not filtered but enabled, disable it */
1054 if (fl == FTRACE_FL_ENABLED)
1055 rec->flags &= ~FTRACE_FL_ENABLED;
1056 else
1057 /* Otherwise record is filtered but not enabled, enable it */
1058 rec->flags |= FTRACE_FL_ENABLED;
1059 } else {
1060 /* Disable or not filtered */
1061
1062 if (enable) {
1063 /* if record is enabled, do nothing */
1064 if (rec->flags & FTRACE_FL_ENABLED)
1065 return 0;
1066
1067 rec->flags |= FTRACE_FL_ENABLED;
1068
1069 } else {
1070 1062
1071 /* if record is not enabled, do nothing */ 1063 /* If the state of this record hasn't changed, then do nothing */
1072 if (!(rec->flags & FTRACE_FL_ENABLED)) 1064 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
1073 return 0; 1065 return 0;
1074 1066
1075 rec->flags &= ~FTRACE_FL_ENABLED; 1067 if (flag) {
1076 } 1068 rec->flags |= FTRACE_FL_ENABLED;
1069 return ftrace_make_call(rec, ftrace_addr);
1077 } 1070 }
1078 1071
1079 if (rec->flags & FTRACE_FL_ENABLED) 1072 rec->flags &= ~FTRACE_FL_ENABLED;
1080 return ftrace_make_call(rec, ftrace_addr); 1073 return ftrace_make_nop(NULL, rec, ftrace_addr);
1081 else
1082 return ftrace_make_nop(NULL, rec, ftrace_addr);
1083} 1074}
1084 1075
1085static void ftrace_replace_code(int enable) 1076static void ftrace_replace_code(int enable)
@@ -1098,25 +1089,12 @@ static void ftrace_replace_code(int enable)
1098 !(rec->flags & FTRACE_FL_CONVERTED)) 1089 !(rec->flags & FTRACE_FL_CONVERTED))
1099 continue; 1090 continue;
1100 1091
1101 /* ignore updates to this record's mcount site */
1102 if (get_kprobe((void *)rec->ip)) {
1103 freeze_record(rec);
1104 continue;
1105 } else {
1106 unfreeze_record(rec);
1107 }
1108
1109 failed = __ftrace_replace_code(rec, enable); 1092 failed = __ftrace_replace_code(rec, enable);
1110 if (failed) { 1093 if (failed) {
1111 rec->flags |= FTRACE_FL_FAILED; 1094 rec->flags |= FTRACE_FL_FAILED;
1112 if ((system_state == SYSTEM_BOOTING) || 1095 ftrace_bug(failed, rec->ip);
1113 !core_kernel_text(rec->ip)) { 1096 /* Stop processing */
1114 ftrace_free_rec(rec); 1097 return;
1115 } else {
1116 ftrace_bug(failed, rec->ip);
1117 /* Stop processing */
1118 return;
1119 }
1120 } 1098 }
1121 } while_for_each_ftrace_rec(); 1099 } while_for_each_ftrace_rec();
1122} 1100}
@@ -1247,8 +1225,6 @@ static void ftrace_shutdown(int command)
1247 1225
1248static void ftrace_startup_sysctl(void) 1226static void ftrace_startup_sysctl(void)
1249{ 1227{
1250 int command = FTRACE_ENABLE_MCOUNT;
1251
1252 if (unlikely(ftrace_disabled)) 1228 if (unlikely(ftrace_disabled))
1253 return; 1229 return;
1254 1230
@@ -1256,23 +1232,17 @@ static void ftrace_startup_sysctl(void)
1256 saved_ftrace_func = NULL; 1232 saved_ftrace_func = NULL;
1257 /* ftrace_start_up is true if we want ftrace running */ 1233 /* ftrace_start_up is true if we want ftrace running */
1258 if (ftrace_start_up) 1234 if (ftrace_start_up)
1259 command |= FTRACE_ENABLE_CALLS; 1235 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1260
1261 ftrace_run_update_code(command);
1262} 1236}
1263 1237
1264static void ftrace_shutdown_sysctl(void) 1238static void ftrace_shutdown_sysctl(void)
1265{ 1239{
1266 int command = FTRACE_DISABLE_MCOUNT;
1267
1268 if (unlikely(ftrace_disabled)) 1240 if (unlikely(ftrace_disabled))
1269 return; 1241 return;
1270 1242
1271 /* ftrace_start_up is true if ftrace is running */ 1243 /* ftrace_start_up is true if ftrace is running */
1272 if (ftrace_start_up) 1244 if (ftrace_start_up)
1273 command |= FTRACE_DISABLE_CALLS; 1245 ftrace_run_update_code(FTRACE_DISABLE_CALLS);
1274
1275 ftrace_run_update_code(command);
1276} 1246}
1277 1247
1278static cycle_t ftrace_update_time; 1248static cycle_t ftrace_update_time;
@@ -1297,12 +1267,34 @@ static int ftrace_update_code(struct module *mod)
1297 ftrace_new_addrs = p->newlist; 1267 ftrace_new_addrs = p->newlist;
1298 p->flags = 0L; 1268 p->flags = 0L;
1299 1269
1300 /* convert record (i.e, patch mcount-call with NOP) */ 1270 /*
1301 if (ftrace_code_disable(mod, p)) { 1271 * Do the initial record convertion from mcount jump
1302 p->flags |= FTRACE_FL_CONVERTED; 1272 * to the NOP instructions.
1303 ftrace_update_cnt++; 1273 */
1304 } else 1274 if (!ftrace_code_disable(mod, p)) {
1305 ftrace_free_rec(p); 1275 ftrace_free_rec(p);
1276 continue;
1277 }
1278
1279 p->flags |= FTRACE_FL_CONVERTED;
1280 ftrace_update_cnt++;
1281
1282 /*
1283 * If the tracing is enabled, go ahead and enable the record.
1284 *
1285 * The reason not to enable the record immediatelly is the
1286 * inherent check of ftrace_make_nop/ftrace_make_call for
1287 * correct previous instructions. Making first the NOP
1288 * conversion puts the module to the correct state, thus
1289 * passing the ftrace_make_call check.
1290 */
1291 if (ftrace_start_up) {
1292 int failed = __ftrace_replace_code(p, 1);
1293 if (failed) {
1294 ftrace_bug(failed, p->ip);
1295 ftrace_free_rec(p);
1296 }
1297 }
1306 } 1298 }
1307 1299
1308 stop = ftrace_now(raw_smp_processor_id()); 1300 stop = ftrace_now(raw_smp_processor_id());
@@ -1358,36 +1350,38 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
1358 1350
1359enum { 1351enum {
1360 FTRACE_ITER_FILTER = (1 << 0), 1352 FTRACE_ITER_FILTER = (1 << 0),
1361 FTRACE_ITER_CONT = (1 << 1), 1353 FTRACE_ITER_NOTRACE = (1 << 1),
1362 FTRACE_ITER_NOTRACE = (1 << 2), 1354 FTRACE_ITER_FAILURES = (1 << 2),
1363 FTRACE_ITER_FAILURES = (1 << 3), 1355 FTRACE_ITER_PRINTALL = (1 << 3),
1364 FTRACE_ITER_PRINTALL = (1 << 4), 1356 FTRACE_ITER_HASH = (1 << 4),
1365 FTRACE_ITER_HASH = (1 << 5),
1366}; 1357};
1367 1358
1368#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1359#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
1369 1360
1370struct ftrace_iterator { 1361struct ftrace_iterator {
1371 struct ftrace_page *pg; 1362 loff_t pos;
1372 int hidx; 1363 loff_t func_pos;
1373 int idx; 1364 struct ftrace_page *pg;
1374 unsigned flags; 1365 struct dyn_ftrace *func;
1375 unsigned char buffer[FTRACE_BUFF_MAX+1]; 1366 struct ftrace_func_probe *probe;
1376 unsigned buffer_idx; 1367 struct trace_parser parser;
1377 unsigned filtered; 1368 int hidx;
1369 int idx;
1370 unsigned flags;
1378}; 1371};
1379 1372
1380static void * 1373static void *
1381t_hash_next(struct seq_file *m, void *v, loff_t *pos) 1374t_hash_next(struct seq_file *m, loff_t *pos)
1382{ 1375{
1383 struct ftrace_iterator *iter = m->private; 1376 struct ftrace_iterator *iter = m->private;
1384 struct hlist_node *hnd = v; 1377 struct hlist_node *hnd = NULL;
1385 struct hlist_head *hhd; 1378 struct hlist_head *hhd;
1386 1379
1387 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
1388
1389 (*pos)++; 1380 (*pos)++;
1381 iter->pos = *pos;
1390 1382
1383 if (iter->probe)
1384 hnd = &iter->probe->node;
1391 retry: 1385 retry:
1392 if (iter->hidx >= FTRACE_FUNC_HASHSIZE) 1386 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
1393 return NULL; 1387 return NULL;
@@ -1410,35 +1404,51 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
1410 } 1404 }
1411 } 1405 }
1412 1406
1413 return hnd; 1407 if (WARN_ON_ONCE(!hnd))
1408 return NULL;
1409
1410 iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
1411
1412 return iter;
1414} 1413}
1415 1414
1416static void *t_hash_start(struct seq_file *m, loff_t *pos) 1415static void *t_hash_start(struct seq_file *m, loff_t *pos)
1417{ 1416{
1418 struct ftrace_iterator *iter = m->private; 1417 struct ftrace_iterator *iter = m->private;
1419 void *p = NULL; 1418 void *p = NULL;
1419 loff_t l;
1420 1420
1421 if (iter->func_pos > *pos)
1422 return NULL;
1423
1424 iter->hidx = 0;
1425 for (l = 0; l <= (*pos - iter->func_pos); ) {
1426 p = t_hash_next(m, &l);
1427 if (!p)
1428 break;
1429 }
1430 if (!p)
1431 return NULL;
1432
1433 /* Only set this if we have an item */
1421 iter->flags |= FTRACE_ITER_HASH; 1434 iter->flags |= FTRACE_ITER_HASH;
1422 1435
1423 return t_hash_next(m, p, pos); 1436 return iter;
1424} 1437}
1425 1438
1426static int t_hash_show(struct seq_file *m, void *v) 1439static int
1440t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
1427{ 1441{
1428 struct ftrace_func_probe *rec; 1442 struct ftrace_func_probe *rec;
1429 struct hlist_node *hnd = v;
1430 char str[KSYM_SYMBOL_LEN];
1431 1443
1432 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1444 rec = iter->probe;
1445 if (WARN_ON_ONCE(!rec))
1446 return -EIO;
1433 1447
1434 if (rec->ops->print) 1448 if (rec->ops->print)
1435 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1449 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
1436 1450
1437 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1451 seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func);
1438 seq_printf(m, "%s:", str);
1439
1440 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
1441 seq_printf(m, "%s", str);
1442 1452
1443 if (rec->data) 1453 if (rec->data)
1444 seq_printf(m, ":%p", rec->data); 1454 seq_printf(m, ":%p", rec->data);
@@ -1454,12 +1464,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1454 struct dyn_ftrace *rec = NULL; 1464 struct dyn_ftrace *rec = NULL;
1455 1465
1456 if (iter->flags & FTRACE_ITER_HASH) 1466 if (iter->flags & FTRACE_ITER_HASH)
1457 return t_hash_next(m, v, pos); 1467 return t_hash_next(m, pos);
1458 1468
1459 (*pos)++; 1469 (*pos)++;
1470 iter->pos = *pos;
1460 1471
1461 if (iter->flags & FTRACE_ITER_PRINTALL) 1472 if (iter->flags & FTRACE_ITER_PRINTALL)
1462 return NULL; 1473 return t_hash_start(m, pos);
1463 1474
1464 retry: 1475 retry:
1465 if (iter->idx >= iter->pg->index) { 1476 if (iter->idx >= iter->pg->index) {
@@ -1467,8 +1478,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1467 iter->pg = iter->pg->next; 1478 iter->pg = iter->pg->next;
1468 iter->idx = 0; 1479 iter->idx = 0;
1469 goto retry; 1480 goto retry;
1470 } else {
1471 iter->idx = -1;
1472 } 1481 }
1473 } else { 1482 } else {
1474 rec = &iter->pg->records[iter->idx++]; 1483 rec = &iter->pg->records[iter->idx++];
@@ -1490,16 +1499,36 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1490 } 1499 }
1491 } 1500 }
1492 1501
1493 return rec; 1502 if (!rec)
1503 return t_hash_start(m, pos);
1504
1505 iter->func_pos = *pos;
1506 iter->func = rec;
1507
1508 return iter;
1509}
1510
1511static void reset_iter_read(struct ftrace_iterator *iter)
1512{
1513 iter->pos = 0;
1514 iter->func_pos = 0;
1515 iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
1494} 1516}
1495 1517
1496static void *t_start(struct seq_file *m, loff_t *pos) 1518static void *t_start(struct seq_file *m, loff_t *pos)
1497{ 1519{
1498 struct ftrace_iterator *iter = m->private; 1520 struct ftrace_iterator *iter = m->private;
1499 void *p = NULL; 1521 void *p = NULL;
1522 loff_t l;
1500 1523
1501 mutex_lock(&ftrace_lock); 1524 mutex_lock(&ftrace_lock);
1502 /* 1525 /*
1526 * If an lseek was done, then reset and start from beginning.
1527 */
1528 if (*pos < iter->pos)
1529 reset_iter_read(iter);
1530
1531 /*
1503 * For set_ftrace_filter reading, if we have the filter 1532 * For set_ftrace_filter reading, if we have the filter
1504 * off, we can short cut and just print out that all 1533 * off, we can short cut and just print out that all
1505 * functions are enabled. 1534 * functions are enabled.
@@ -1508,26 +1537,35 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1508 if (*pos > 0) 1537 if (*pos > 0)
1509 return t_hash_start(m, pos); 1538 return t_hash_start(m, pos);
1510 iter->flags |= FTRACE_ITER_PRINTALL; 1539 iter->flags |= FTRACE_ITER_PRINTALL;
1511 (*pos)++; 1540 /* reset in case of seek/pread */
1541 iter->flags &= ~FTRACE_ITER_HASH;
1512 return iter; 1542 return iter;
1513 } 1543 }
1514 1544
1515 if (iter->flags & FTRACE_ITER_HASH) 1545 if (iter->flags & FTRACE_ITER_HASH)
1516 return t_hash_start(m, pos); 1546 return t_hash_start(m, pos);
1517 1547
1518 if (*pos > 0) { 1548 /*
1519 if (iter->idx < 0) 1549 * Unfortunately, we need to restart at ftrace_pages_start
1520 return p; 1550 * every time we let go of the ftrace_mutex. This is because
1521 (*pos)--; 1551 * those pointers can change without the lock.
1522 iter->idx--; 1552 */
1553 iter->pg = ftrace_pages_start;
1554 iter->idx = 0;
1555 for (l = 0; l <= *pos; ) {
1556 p = t_next(m, p, &l);
1557 if (!p)
1558 break;
1523 } 1559 }
1524 1560
1525 p = t_next(m, p, pos); 1561 if (!p) {
1562 if (iter->flags & FTRACE_ITER_FILTER)
1563 return t_hash_start(m, pos);
1526 1564
1527 if (!p) 1565 return NULL;
1528 return t_hash_start(m, pos); 1566 }
1529 1567
1530 return p; 1568 return iter;
1531} 1569}
1532 1570
1533static void t_stop(struct seq_file *m, void *p) 1571static void t_stop(struct seq_file *m, void *p)
@@ -1538,28 +1576,27 @@ static void t_stop(struct seq_file *m, void *p)
1538static int t_show(struct seq_file *m, void *v) 1576static int t_show(struct seq_file *m, void *v)
1539{ 1577{
1540 struct ftrace_iterator *iter = m->private; 1578 struct ftrace_iterator *iter = m->private;
1541 struct dyn_ftrace *rec = v; 1579 struct dyn_ftrace *rec;
1542 char str[KSYM_SYMBOL_LEN];
1543 1580
1544 if (iter->flags & FTRACE_ITER_HASH) 1581 if (iter->flags & FTRACE_ITER_HASH)
1545 return t_hash_show(m, v); 1582 return t_hash_show(m, iter);
1546 1583
1547 if (iter->flags & FTRACE_ITER_PRINTALL) { 1584 if (iter->flags & FTRACE_ITER_PRINTALL) {
1548 seq_printf(m, "#### all functions enabled ####\n"); 1585 seq_printf(m, "#### all functions enabled ####\n");
1549 return 0; 1586 return 0;
1550 } 1587 }
1551 1588
1589 rec = iter->func;
1590
1552 if (!rec) 1591 if (!rec)
1553 return 0; 1592 return 0;
1554 1593
1555 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1594 seq_printf(m, "%ps\n", (void *)rec->ip);
1556
1557 seq_printf(m, "%s\n", str);
1558 1595
1559 return 0; 1596 return 0;
1560} 1597}
1561 1598
1562static struct seq_operations show_ftrace_seq_ops = { 1599static const struct seq_operations show_ftrace_seq_ops = {
1563 .start = t_start, 1600 .start = t_start,
1564 .next = t_next, 1601 .next = t_next,
1565 .stop = t_stop, 1602 .stop = t_stop,
@@ -1593,17 +1630,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
1593 return ret; 1630 return ret;
1594} 1631}
1595 1632
1596int ftrace_avail_release(struct inode *inode, struct file *file)
1597{
1598 struct seq_file *m = (struct seq_file *)file->private_data;
1599 struct ftrace_iterator *iter = m->private;
1600
1601 seq_release(inode, file);
1602 kfree(iter);
1603
1604 return 0;
1605}
1606
1607static int 1633static int
1608ftrace_failures_open(struct inode *inode, struct file *file) 1634ftrace_failures_open(struct inode *inode, struct file *file)
1609{ 1635{
@@ -1613,8 +1639,8 @@ ftrace_failures_open(struct inode *inode, struct file *file)
1613 1639
1614 ret = ftrace_avail_open(inode, file); 1640 ret = ftrace_avail_open(inode, file);
1615 if (!ret) { 1641 if (!ret) {
1616 m = (struct seq_file *)file->private_data; 1642 m = file->private_data;
1617 iter = (struct ftrace_iterator *)m->private; 1643 iter = m->private;
1618 iter->flags = FTRACE_ITER_FAILURES; 1644 iter->flags = FTRACE_ITER_FAILURES;
1619 } 1645 }
1620 1646
@@ -1652,9 +1678,14 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1652 if (!iter) 1678 if (!iter)
1653 return -ENOMEM; 1679 return -ENOMEM;
1654 1680
1681 if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) {
1682 kfree(iter);
1683 return -ENOMEM;
1684 }
1685
1655 mutex_lock(&ftrace_regex_lock); 1686 mutex_lock(&ftrace_regex_lock);
1656 if ((file->f_mode & FMODE_WRITE) && 1687 if ((file->f_mode & FMODE_WRITE) &&
1657 !(file->f_flags & O_APPEND)) 1688 (file->f_flags & O_TRUNC))
1658 ftrace_filter_reset(enable); 1689 ftrace_filter_reset(enable);
1659 1690
1660 if (file->f_mode & FMODE_READ) { 1691 if (file->f_mode & FMODE_READ) {
@@ -1666,8 +1697,10 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1666 if (!ret) { 1697 if (!ret) {
1667 struct seq_file *m = file->private_data; 1698 struct seq_file *m = file->private_data;
1668 m->private = iter; 1699 m->private = iter;
1669 } else 1700 } else {
1701 trace_parser_put(&iter->parser);
1670 kfree(iter); 1702 kfree(iter);
1703 }
1671 } else 1704 } else
1672 file->private_data = iter; 1705 file->private_data = iter;
1673 mutex_unlock(&ftrace_regex_lock); 1706 mutex_unlock(&ftrace_regex_lock);
@@ -1700,64 +1733,10 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1700 return ret; 1733 return ret;
1701} 1734}
1702 1735
1703enum {
1704 MATCH_FULL,
1705 MATCH_FRONT_ONLY,
1706 MATCH_MIDDLE_ONLY,
1707 MATCH_END_ONLY,
1708};
1709
1710/*
1711 * (static function - no need for kernel doc)
1712 *
1713 * Pass in a buffer containing a glob and this function will
1714 * set search to point to the search part of the buffer and
1715 * return the type of search it is (see enum above).
1716 * This does modify buff.
1717 *
1718 * Returns enum type.
1719 * search returns the pointer to use for comparison.
1720 * not returns 1 if buff started with a '!'
1721 * 0 otherwise.
1722 */
1723static int
1724ftrace_setup_glob(char *buff, int len, char **search, int *not)
1725{
1726 int type = MATCH_FULL;
1727 int i;
1728
1729 if (buff[0] == '!') {
1730 *not = 1;
1731 buff++;
1732 len--;
1733 } else
1734 *not = 0;
1735
1736 *search = buff;
1737
1738 for (i = 0; i < len; i++) {
1739 if (buff[i] == '*') {
1740 if (!i) {
1741 *search = buff + 1;
1742 type = MATCH_END_ONLY;
1743 } else {
1744 if (type == MATCH_END_ONLY)
1745 type = MATCH_MIDDLE_ONLY;
1746 else
1747 type = MATCH_FRONT_ONLY;
1748 buff[i] = 0;
1749 break;
1750 }
1751 }
1752 }
1753
1754 return type;
1755}
1756
1757static int ftrace_match(char *str, char *regex, int len, int type) 1736static int ftrace_match(char *str, char *regex, int len, int type)
1758{ 1737{
1759 int matched = 0; 1738 int matched = 0;
1760 char *ptr; 1739 int slen;
1761 1740
1762 switch (type) { 1741 switch (type) {
1763 case MATCH_FULL: 1742 case MATCH_FULL:
@@ -1773,8 +1752,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
1773 matched = 1; 1752 matched = 1;
1774 break; 1753 break;
1775 case MATCH_END_ONLY: 1754 case MATCH_END_ONLY:
1776 ptr = strstr(str, regex); 1755 slen = strlen(str);
1777 if (ptr && (ptr[len] == 0)) 1756 if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
1778 matched = 1; 1757 matched = 1;
1779 break; 1758 break;
1780 } 1759 }
@@ -1791,7 +1770,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1791 return ftrace_match(str, regex, len, type); 1770 return ftrace_match(str, regex, len, type);
1792} 1771}
1793 1772
1794static void ftrace_match_records(char *buff, int len, int enable) 1773static int ftrace_match_records(char *buff, int len, int enable)
1795{ 1774{
1796 unsigned int search_len; 1775 unsigned int search_len;
1797 struct ftrace_page *pg; 1776 struct ftrace_page *pg;
@@ -1800,9 +1779,10 @@ static void ftrace_match_records(char *buff, int len, int enable)
1800 char *search; 1779 char *search;
1801 int type; 1780 int type;
1802 int not; 1781 int not;
1782 int found = 0;
1803 1783
1804 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1784 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1805 type = ftrace_setup_glob(buff, len, &search, &not); 1785 type = filter_parse_regex(buff, len, &search, &not);
1806 1786
1807 search_len = strlen(search); 1787 search_len = strlen(search);
1808 1788
@@ -1817,6 +1797,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1817 rec->flags &= ~flag; 1797 rec->flags &= ~flag;
1818 else 1798 else
1819 rec->flags |= flag; 1799 rec->flags |= flag;
1800 found = 1;
1820 } 1801 }
1821 /* 1802 /*
1822 * Only enable filtering if we have a function that 1803 * Only enable filtering if we have a function that
@@ -1826,6 +1807,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
1826 ftrace_filtered = 1; 1807 ftrace_filtered = 1;
1827 } while_for_each_ftrace_rec(); 1808 } while_for_each_ftrace_rec();
1828 mutex_unlock(&ftrace_lock); 1809 mutex_unlock(&ftrace_lock);
1810
1811 return found;
1829} 1812}
1830 1813
1831static int 1814static int
@@ -1847,7 +1830,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1847 return 1; 1830 return 1;
1848} 1831}
1849 1832
1850static void ftrace_match_module_records(char *buff, char *mod, int enable) 1833static int ftrace_match_module_records(char *buff, char *mod, int enable)
1851{ 1834{
1852 unsigned search_len = 0; 1835 unsigned search_len = 0;
1853 struct ftrace_page *pg; 1836 struct ftrace_page *pg;
@@ -1856,6 +1839,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1856 char *search = buff; 1839 char *search = buff;
1857 unsigned long flag; 1840 unsigned long flag;
1858 int not = 0; 1841 int not = 0;
1842 int found = 0;
1859 1843
1860 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1844 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1861 1845
@@ -1870,7 +1854,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1870 } 1854 }
1871 1855
1872 if (strlen(buff)) { 1856 if (strlen(buff)) {
1873 type = ftrace_setup_glob(buff, strlen(buff), &search, &not); 1857 type = filter_parse_regex(buff, strlen(buff), &search, &not);
1874 search_len = strlen(search); 1858 search_len = strlen(search);
1875 } 1859 }
1876 1860
@@ -1886,12 +1870,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1886 rec->flags &= ~flag; 1870 rec->flags &= ~flag;
1887 else 1871 else
1888 rec->flags |= flag; 1872 rec->flags |= flag;
1873 found = 1;
1889 } 1874 }
1890 if (enable && (rec->flags & FTRACE_FL_FILTER)) 1875 if (enable && (rec->flags & FTRACE_FL_FILTER))
1891 ftrace_filtered = 1; 1876 ftrace_filtered = 1;
1892 1877
1893 } while_for_each_ftrace_rec(); 1878 } while_for_each_ftrace_rec();
1894 mutex_unlock(&ftrace_lock); 1879 mutex_unlock(&ftrace_lock);
1880
1881 return found;
1895} 1882}
1896 1883
1897/* 1884/*
@@ -1920,8 +1907,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1920 if (!strlen(mod)) 1907 if (!strlen(mod))
1921 return -EINVAL; 1908 return -EINVAL;
1922 1909
1923 ftrace_match_module_records(func, mod, enable); 1910 if (ftrace_match_module_records(func, mod, enable))
1924 return 0; 1911 return 0;
1912 return -EINVAL;
1925} 1913}
1926 1914
1927static struct ftrace_func_command ftrace_mod_cmd = { 1915static struct ftrace_func_command ftrace_mod_cmd = {
@@ -1942,7 +1930,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1942 struct hlist_head *hhd; 1930 struct hlist_head *hhd;
1943 struct hlist_node *n; 1931 struct hlist_node *n;
1944 unsigned long key; 1932 unsigned long key;
1945 int resched;
1946 1933
1947 key = hash_long(ip, FTRACE_HASH_BITS); 1934 key = hash_long(ip, FTRACE_HASH_BITS);
1948 1935
@@ -1956,12 +1943,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1956 * period. This syncs the hash iteration and freeing of items 1943 * period. This syncs the hash iteration and freeing of items
1957 * on the hash. rcu_read_lock is too dangerous here. 1944 * on the hash. rcu_read_lock is too dangerous here.
1958 */ 1945 */
1959 resched = ftrace_preempt_disable(); 1946 preempt_disable_notrace();
1960 hlist_for_each_entry_rcu(entry, n, hhd, node) { 1947 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1961 if (entry->ip == ip) 1948 if (entry->ip == ip)
1962 entry->ops->func(ip, parent_ip, &entry->data); 1949 entry->ops->func(ip, parent_ip, &entry->data);
1963 } 1950 }
1964 ftrace_preempt_enable(resched); 1951 preempt_enable_notrace();
1965} 1952}
1966 1953
1967static struct ftrace_ops trace_probe_ops __read_mostly = 1954static struct ftrace_ops trace_probe_ops __read_mostly =
@@ -2035,7 +2022,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2035 int count = 0; 2022 int count = 0;
2036 char *search; 2023 char *search;
2037 2024
2038 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 2025 type = filter_parse_regex(glob, strlen(glob), &search, &not);
2039 len = strlen(search); 2026 len = strlen(search);
2040 2027
2041 /* we do not support '!' for function probes */ 2028 /* we do not support '!' for function probes */
@@ -2107,12 +2094,12 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2107 int i, len = 0; 2094 int i, len = 0;
2108 char *search; 2095 char *search;
2109 2096
2110 if (glob && (strcmp(glob, "*") || !strlen(glob))) 2097 if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
2111 glob = NULL; 2098 glob = NULL;
2112 else { 2099 else if (glob) {
2113 int not; 2100 int not;
2114 2101
2115 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 2102 type = filter_parse_regex(glob, strlen(glob), &search, &not);
2116 len = strlen(search); 2103 len = strlen(search);
2117 2104
2118 /* we do not support '!' for function probes */ 2105 /* we do not support '!' for function probes */
@@ -2218,8 +2205,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2218 func = strsep(&next, ":"); 2205 func = strsep(&next, ":");
2219 2206
2220 if (!next) { 2207 if (!next) {
2221 ftrace_match_records(func, len, enable); 2208 if (ftrace_match_records(func, len, enable))
2222 return 0; 2209 return 0;
2210 return ret;
2223 } 2211 }
2224 2212
2225 /* command found */ 2213 /* command found */
@@ -2244,11 +2232,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2244 size_t cnt, loff_t *ppos, int enable) 2232 size_t cnt, loff_t *ppos, int enable)
2245{ 2233{
2246 struct ftrace_iterator *iter; 2234 struct ftrace_iterator *iter;
2247 char ch; 2235 struct trace_parser *parser;
2248 size_t read = 0; 2236 ssize_t ret, read;
2249 ssize_t ret;
2250 2237
2251 if (!cnt || cnt < 0) 2238 if (!cnt)
2252 return 0; 2239 return 0;
2253 2240
2254 mutex_lock(&ftrace_regex_lock); 2241 mutex_lock(&ftrace_regex_lock);
@@ -2259,66 +2246,20 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2259 } else 2246 } else
2260 iter = file->private_data; 2247 iter = file->private_data;
2261 2248
2262 if (!*ppos) { 2249 parser = &iter->parser;
2263 iter->flags &= ~FTRACE_ITER_CONT; 2250 read = trace_get_user(parser, ubuf, cnt, ppos);
2264 iter->buffer_idx = 0;
2265 }
2266
2267 ret = get_user(ch, ubuf++);
2268 if (ret)
2269 goto out;
2270 read++;
2271 cnt--;
2272
2273 if (!(iter->flags & ~FTRACE_ITER_CONT)) {
2274 /* skip white space */
2275 while (cnt && isspace(ch)) {
2276 ret = get_user(ch, ubuf++);
2277 if (ret)
2278 goto out;
2279 read++;
2280 cnt--;
2281 }
2282 2251
2283 if (isspace(ch)) { 2252 if (read >= 0 && trace_parser_loaded(parser) &&
2284 file->f_pos += read; 2253 !trace_parser_cont(parser)) {
2285 ret = read; 2254 ret = ftrace_process_regex(parser->buffer,
2286 goto out; 2255 parser->idx, enable);
2287 } 2256 trace_parser_clear(parser);
2288
2289 iter->buffer_idx = 0;
2290 }
2291
2292 while (cnt && !isspace(ch)) {
2293 if (iter->buffer_idx < FTRACE_BUFF_MAX)
2294 iter->buffer[iter->buffer_idx++] = ch;
2295 else {
2296 ret = -EINVAL;
2297 goto out;
2298 }
2299 ret = get_user(ch, ubuf++);
2300 if (ret) 2257 if (ret)
2301 goto out; 2258 goto out_unlock;
2302 read++;
2303 cnt--;
2304 } 2259 }
2305 2260
2306 if (isspace(ch)) {
2307 iter->filtered++;
2308 iter->buffer[iter->buffer_idx] = 0;
2309 ret = ftrace_process_regex(iter->buffer,
2310 iter->buffer_idx, enable);
2311 if (ret)
2312 goto out;
2313 iter->buffer_idx = 0;
2314 } else
2315 iter->flags |= FTRACE_ITER_CONT;
2316
2317
2318 file->f_pos += read;
2319
2320 ret = read; 2261 ret = read;
2321 out: 2262out_unlock:
2322 mutex_unlock(&ftrace_regex_lock); 2263 mutex_unlock(&ftrace_regex_lock);
2323 2264
2324 return ret; 2265 return ret;
@@ -2402,6 +2343,34 @@ static int __init set_ftrace_filter(char *str)
2402} 2343}
2403__setup("ftrace_filter=", set_ftrace_filter); 2344__setup("ftrace_filter=", set_ftrace_filter);
2404 2345
2346#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2347static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2348static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2349
2350static int __init set_graph_function(char *str)
2351{
2352 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
2353 return 1;
2354}
2355__setup("ftrace_graph_filter=", set_graph_function);
2356
2357static void __init set_ftrace_early_graph(char *buf)
2358{
2359 int ret;
2360 char *func;
2361
2362 while (buf) {
2363 func = strsep(&buf, ",");
2364 /* we allow only one expression at a time */
2365 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2366 func);
2367 if (ret)
2368 printk(KERN_DEBUG "ftrace: function %s not "
2369 "traceable\n", func);
2370 }
2371}
2372#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2373
2405static void __init set_ftrace_early_filter(char *buf, int enable) 2374static void __init set_ftrace_early_filter(char *buf, int enable)
2406{ 2375{
2407 char *func; 2376 char *func;
@@ -2418,6 +2387,10 @@ static void __init set_ftrace_early_filters(void)
2418 set_ftrace_early_filter(ftrace_filter_buf, 1); 2387 set_ftrace_early_filter(ftrace_filter_buf, 1);
2419 if (ftrace_notrace_buf[0]) 2388 if (ftrace_notrace_buf[0])
2420 set_ftrace_early_filter(ftrace_notrace_buf, 0); 2389 set_ftrace_early_filter(ftrace_notrace_buf, 0);
2390#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2391 if (ftrace_graph_buf[0])
2392 set_ftrace_early_graph(ftrace_graph_buf);
2393#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2421} 2394}
2422 2395
2423static int 2396static int
@@ -2425,6 +2398,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2425{ 2398{
2426 struct seq_file *m = (struct seq_file *)file->private_data; 2399 struct seq_file *m = (struct seq_file *)file->private_data;
2427 struct ftrace_iterator *iter; 2400 struct ftrace_iterator *iter;
2401 struct trace_parser *parser;
2428 2402
2429 mutex_lock(&ftrace_regex_lock); 2403 mutex_lock(&ftrace_regex_lock);
2430 if (file->f_mode & FMODE_READ) { 2404 if (file->f_mode & FMODE_READ) {
@@ -2434,10 +2408,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2434 } else 2408 } else
2435 iter = file->private_data; 2409 iter = file->private_data;
2436 2410
2437 if (iter->buffer_idx) { 2411 parser = &iter->parser;
2438 iter->filtered++; 2412 if (trace_parser_loaded(parser)) {
2439 iter->buffer[iter->buffer_idx] = 0; 2413 parser->buffer[parser->idx] = 0;
2440 ftrace_match_records(iter->buffer, iter->buffer_idx, enable); 2414 ftrace_match_records(parser->buffer, parser->idx, enable);
2441 } 2415 }
2442 2416
2443 mutex_lock(&ftrace_lock); 2417 mutex_lock(&ftrace_lock);
@@ -2445,7 +2419,9 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2445 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 2419 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
2446 mutex_unlock(&ftrace_lock); 2420 mutex_unlock(&ftrace_lock);
2447 2421
2422 trace_parser_put(parser);
2448 kfree(iter); 2423 kfree(iter);
2424
2449 mutex_unlock(&ftrace_regex_lock); 2425 mutex_unlock(&ftrace_regex_lock);
2450 return 0; 2426 return 0;
2451} 2427}
@@ -2466,14 +2442,14 @@ static const struct file_operations ftrace_avail_fops = {
2466 .open = ftrace_avail_open, 2442 .open = ftrace_avail_open,
2467 .read = seq_read, 2443 .read = seq_read,
2468 .llseek = seq_lseek, 2444 .llseek = seq_lseek,
2469 .release = ftrace_avail_release, 2445 .release = seq_release_private,
2470}; 2446};
2471 2447
2472static const struct file_operations ftrace_failures_fops = { 2448static const struct file_operations ftrace_failures_fops = {
2473 .open = ftrace_failures_open, 2449 .open = ftrace_failures_open,
2474 .read = seq_read, 2450 .read = seq_read,
2475 .llseek = seq_lseek, 2451 .llseek = seq_lseek,
2476 .release = ftrace_avail_release, 2452 .release = seq_release_private,
2477}; 2453};
2478 2454
2479static const struct file_operations ftrace_filter_fops = { 2455static const struct file_operations ftrace_filter_fops = {
@@ -2497,35 +2473,33 @@ static const struct file_operations ftrace_notrace_fops = {
2497static DEFINE_MUTEX(graph_lock); 2473static DEFINE_MUTEX(graph_lock);
2498 2474
2499int ftrace_graph_count; 2475int ftrace_graph_count;
2476int ftrace_graph_filter_enabled;
2500unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2477unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2501 2478
2502static void * 2479static void *
2503g_next(struct seq_file *m, void *v, loff_t *pos) 2480__g_next(struct seq_file *m, loff_t *pos)
2504{ 2481{
2505 unsigned long *array = m->private; 2482 if (*pos >= ftrace_graph_count)
2506 int index = *pos;
2507
2508 (*pos)++;
2509
2510 if (index >= ftrace_graph_count)
2511 return NULL; 2483 return NULL;
2484 return &ftrace_graph_funcs[*pos];
2485}
2512 2486
2513 return &array[index]; 2487static void *
2488g_next(struct seq_file *m, void *v, loff_t *pos)
2489{
2490 (*pos)++;
2491 return __g_next(m, pos);
2514} 2492}
2515 2493
2516static void *g_start(struct seq_file *m, loff_t *pos) 2494static void *g_start(struct seq_file *m, loff_t *pos)
2517{ 2495{
2518 void *p = NULL;
2519
2520 mutex_lock(&graph_lock); 2496 mutex_lock(&graph_lock);
2521 2497
2522 /* Nothing, tell g_show to print all functions are enabled */ 2498 /* Nothing, tell g_show to print all functions are enabled */
2523 if (!ftrace_graph_count && !*pos) 2499 if (!ftrace_graph_filter_enabled && !*pos)
2524 return (void *)1; 2500 return (void *)1;
2525 2501
2526 p = g_next(m, p, pos); 2502 return __g_next(m, pos);
2527
2528 return p;
2529} 2503}
2530 2504
2531static void g_stop(struct seq_file *m, void *p) 2505static void g_stop(struct seq_file *m, void *p)
@@ -2536,7 +2510,6 @@ static void g_stop(struct seq_file *m, void *p)
2536static int g_show(struct seq_file *m, void *v) 2510static int g_show(struct seq_file *m, void *v)
2537{ 2511{
2538 unsigned long *ptr = v; 2512 unsigned long *ptr = v;
2539 char str[KSYM_SYMBOL_LEN];
2540 2513
2541 if (!ptr) 2514 if (!ptr)
2542 return 0; 2515 return 0;
@@ -2546,14 +2519,12 @@ static int g_show(struct seq_file *m, void *v)
2546 return 0; 2519 return 0;
2547 } 2520 }
2548 2521
2549 kallsyms_lookup(*ptr, NULL, NULL, NULL, str); 2522 seq_printf(m, "%ps\n", (void *)*ptr);
2550
2551 seq_printf(m, "%s\n", str);
2552 2523
2553 return 0; 2524 return 0;
2554} 2525}
2555 2526
2556static struct seq_operations ftrace_graph_seq_ops = { 2527static const struct seq_operations ftrace_graph_seq_ops = {
2557 .start = g_start, 2528 .start = g_start,
2558 .next = g_next, 2529 .next = g_next,
2559 .stop = g_stop, 2530 .stop = g_stop,
@@ -2570,31 +2541,34 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2570 2541
2571 mutex_lock(&graph_lock); 2542 mutex_lock(&graph_lock);
2572 if ((file->f_mode & FMODE_WRITE) && 2543 if ((file->f_mode & FMODE_WRITE) &&
2573 !(file->f_flags & O_APPEND)) { 2544 (file->f_flags & O_TRUNC)) {
2545 ftrace_graph_filter_enabled = 0;
2574 ftrace_graph_count = 0; 2546 ftrace_graph_count = 0;
2575 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2547 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2576 } 2548 }
2549 mutex_unlock(&graph_lock);
2577 2550
2578 if (file->f_mode & FMODE_READ) { 2551 if (file->f_mode & FMODE_READ)
2579 ret = seq_open(file, &ftrace_graph_seq_ops); 2552 ret = seq_open(file, &ftrace_graph_seq_ops);
2580 if (!ret) {
2581 struct seq_file *m = file->private_data;
2582 m->private = ftrace_graph_funcs;
2583 }
2584 } else
2585 file->private_data = ftrace_graph_funcs;
2586 mutex_unlock(&graph_lock);
2587 2553
2588 return ret; 2554 return ret;
2589} 2555}
2590 2556
2591static int 2557static int
2558ftrace_graph_release(struct inode *inode, struct file *file)
2559{
2560 if (file->f_mode & FMODE_READ)
2561 seq_release(inode, file);
2562 return 0;
2563}
2564
2565static int
2592ftrace_set_func(unsigned long *array, int *idx, char *buffer) 2566ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2593{ 2567{
2594 struct dyn_ftrace *rec; 2568 struct dyn_ftrace *rec;
2595 struct ftrace_page *pg; 2569 struct ftrace_page *pg;
2596 int search_len; 2570 int search_len;
2597 int found = 0; 2571 int fail = 1;
2598 int type, not; 2572 int type, not;
2599 char *search; 2573 char *search;
2600 bool exists; 2574 bool exists;
@@ -2604,122 +2578,99 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2604 return -ENODEV; 2578 return -ENODEV;
2605 2579
2606 /* decode regex */ 2580 /* decode regex */
2607 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not); 2581 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2608 if (not) 2582 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2609 return -EINVAL; 2583 return -EBUSY;
2610 2584
2611 search_len = strlen(search); 2585 search_len = strlen(search);
2612 2586
2613 mutex_lock(&ftrace_lock); 2587 mutex_lock(&ftrace_lock);
2614 do_for_each_ftrace_rec(pg, rec) { 2588 do_for_each_ftrace_rec(pg, rec) {
2615 2589
2616 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2617 break;
2618
2619 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2590 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2620 continue; 2591 continue;
2621 2592
2622 if (ftrace_match_record(rec, search, search_len, type)) { 2593 if (ftrace_match_record(rec, search, search_len, type)) {
2623 /* ensure it is not already in the array */ 2594 /* if it is in the array */
2624 exists = false; 2595 exists = false;
2625 for (i = 0; i < *idx; i++) 2596 for (i = 0; i < *idx; i++) {
2626 if (array[i] == rec->ip) { 2597 if (array[i] == rec->ip) {
2627 exists = true; 2598 exists = true;
2628 break; 2599 break;
2629 } 2600 }
2630 if (!exists) { 2601 }
2631 array[(*idx)++] = rec->ip; 2602
2632 found = 1; 2603 if (!not) {
2604 fail = 0;
2605 if (!exists) {
2606 array[(*idx)++] = rec->ip;
2607 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2608 goto out;
2609 }
2610 } else {
2611 if (exists) {
2612 array[i] = array[--(*idx)];
2613 array[*idx] = 0;
2614 fail = 0;
2615 }
2633 } 2616 }
2634 } 2617 }
2635 } while_for_each_ftrace_rec(); 2618 } while_for_each_ftrace_rec();
2636 2619out:
2637 mutex_unlock(&ftrace_lock); 2620 mutex_unlock(&ftrace_lock);
2638 2621
2639 return found ? 0 : -EINVAL; 2622 if (fail)
2623 return -EINVAL;
2624
2625 ftrace_graph_filter_enabled = 1;
2626 return 0;
2640} 2627}
2641 2628
2642static ssize_t 2629static ssize_t
2643ftrace_graph_write(struct file *file, const char __user *ubuf, 2630ftrace_graph_write(struct file *file, const char __user *ubuf,
2644 size_t cnt, loff_t *ppos) 2631 size_t cnt, loff_t *ppos)
2645{ 2632{
2646 unsigned char buffer[FTRACE_BUFF_MAX+1]; 2633 struct trace_parser parser;
2647 unsigned long *array; 2634 ssize_t read, ret;
2648 size_t read = 0;
2649 ssize_t ret;
2650 int index = 0;
2651 char ch;
2652 2635
2653 if (!cnt || cnt < 0) 2636 if (!cnt)
2654 return 0; 2637 return 0;
2655 2638
2656 mutex_lock(&graph_lock); 2639 mutex_lock(&graph_lock);
2657 2640
2658 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { 2641 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2659 ret = -EBUSY; 2642 ret = -ENOMEM;
2660 goto out; 2643 goto out_unlock;
2661 } 2644 }
2662 2645
2663 if (file->f_mode & FMODE_READ) { 2646 read = trace_get_user(&parser, ubuf, cnt, ppos);
2664 struct seq_file *m = file->private_data;
2665 array = m->private;
2666 } else
2667 array = file->private_data;
2668 2647
2669 ret = get_user(ch, ubuf++); 2648 if (read >= 0 && trace_parser_loaded((&parser))) {
2670 if (ret) 2649 parser.buffer[parser.idx] = 0;
2671 goto out;
2672 read++;
2673 cnt--;
2674 2650
2675 /* skip white space */ 2651 /* we allow only one expression at a time */
2676 while (cnt && isspace(ch)) { 2652 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2677 ret = get_user(ch, ubuf++); 2653 parser.buffer);
2678 if (ret) 2654 if (ret)
2679 goto out; 2655 goto out_free;
2680 read++;
2681 cnt--;
2682 }
2683
2684 if (isspace(ch)) {
2685 *ppos += read;
2686 ret = read;
2687 goto out;
2688 }
2689
2690 while (cnt && !isspace(ch)) {
2691 if (index < FTRACE_BUFF_MAX)
2692 buffer[index++] = ch;
2693 else {
2694 ret = -EINVAL;
2695 goto out;
2696 }
2697 ret = get_user(ch, ubuf++);
2698 if (ret)
2699 goto out;
2700 read++;
2701 cnt--;
2702 } 2656 }
2703 buffer[index] = 0;
2704
2705 /* we allow only one expression at a time */
2706 ret = ftrace_set_func(array, &ftrace_graph_count, buffer);
2707 if (ret)
2708 goto out;
2709
2710 file->f_pos += read;
2711 2657
2712 ret = read; 2658 ret = read;
2713 out: 2659
2660out_free:
2661 trace_parser_put(&parser);
2662out_unlock:
2714 mutex_unlock(&graph_lock); 2663 mutex_unlock(&graph_lock);
2715 2664
2716 return ret; 2665 return ret;
2717} 2666}
2718 2667
2719static const struct file_operations ftrace_graph_fops = { 2668static const struct file_operations ftrace_graph_fops = {
2720 .open = ftrace_graph_open, 2669 .open = ftrace_graph_open,
2721 .read = seq_read, 2670 .read = seq_read,
2722 .write = ftrace_graph_write, 2671 .write = ftrace_graph_write,
2672 .release = ftrace_graph_release,
2673 .llseek = seq_lseek,
2723}; 2674};
2724#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2675#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2725 2676
@@ -2747,7 +2698,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2747 return 0; 2698 return 0;
2748} 2699}
2749 2700
2750static int ftrace_convert_nops(struct module *mod, 2701static int ftrace_process_locs(struct module *mod,
2751 unsigned long *start, 2702 unsigned long *start,
2752 unsigned long *end) 2703 unsigned long *end)
2753{ 2704{
@@ -2780,19 +2731,17 @@ static int ftrace_convert_nops(struct module *mod,
2780} 2731}
2781 2732
2782#ifdef CONFIG_MODULES 2733#ifdef CONFIG_MODULES
2783void ftrace_release(void *start, void *end) 2734void ftrace_release_mod(struct module *mod)
2784{ 2735{
2785 struct dyn_ftrace *rec; 2736 struct dyn_ftrace *rec;
2786 struct ftrace_page *pg; 2737 struct ftrace_page *pg;
2787 unsigned long s = (unsigned long)start;
2788 unsigned long e = (unsigned long)end;
2789 2738
2790 if (ftrace_disabled || !start || start == end) 2739 if (ftrace_disabled)
2791 return; 2740 return;
2792 2741
2793 mutex_lock(&ftrace_lock); 2742 mutex_lock(&ftrace_lock);
2794 do_for_each_ftrace_rec(pg, rec) { 2743 do_for_each_ftrace_rec(pg, rec) {
2795 if ((rec->ip >= s) && (rec->ip < e)) { 2744 if (within_module_core(rec->ip, mod)) {
2796 /* 2745 /*
2797 * rec->ip is changed in ftrace_free_rec() 2746 * rec->ip is changed in ftrace_free_rec()
2798 * It should not between s and e if record was freed. 2747 * It should not between s and e if record was freed.
@@ -2809,7 +2758,7 @@ static void ftrace_init_module(struct module *mod,
2809{ 2758{
2810 if (ftrace_disabled || start == end) 2759 if (ftrace_disabled || start == end)
2811 return; 2760 return;
2812 ftrace_convert_nops(mod, start, end); 2761 ftrace_process_locs(mod, start, end);
2813} 2762}
2814 2763
2815static int ftrace_module_notify(struct notifier_block *self, 2764static int ftrace_module_notify(struct notifier_block *self,
@@ -2824,9 +2773,7 @@ static int ftrace_module_notify(struct notifier_block *self,
2824 mod->num_ftrace_callsites); 2773 mod->num_ftrace_callsites);
2825 break; 2774 break;
2826 case MODULE_STATE_GOING: 2775 case MODULE_STATE_GOING:
2827 ftrace_release(mod->ftrace_callsites, 2776 ftrace_release_mod(mod);
2828 mod->ftrace_callsites +
2829 mod->num_ftrace_callsites);
2830 break; 2777 break;
2831 } 2778 }
2832 2779
@@ -2872,7 +2819,7 @@ void __init ftrace_init(void)
2872 2819
2873 last_ftrace_enabled = ftrace_enabled = 1; 2820 last_ftrace_enabled = ftrace_enabled = 1;
2874 2821
2875 ret = ftrace_convert_nops(NULL, 2822 ret = ftrace_process_locs(NULL,
2876 __start_mcount_loc, 2823 __start_mcount_loc,
2877 __stop_mcount_loc); 2824 __stop_mcount_loc);
2878 2825
@@ -2905,23 +2852,6 @@ static inline void ftrace_startup_enable(int command) { }
2905# define ftrace_shutdown_sysctl() do { } while (0) 2852# define ftrace_shutdown_sysctl() do { } while (0)
2906#endif /* CONFIG_DYNAMIC_FTRACE */ 2853#endif /* CONFIG_DYNAMIC_FTRACE */
2907 2854
2908static ssize_t
2909ftrace_pid_read(struct file *file, char __user *ubuf,
2910 size_t cnt, loff_t *ppos)
2911{
2912 char buf[64];
2913 int r;
2914
2915 if (ftrace_pid_trace == ftrace_swapper_pid)
2916 r = sprintf(buf, "swapper tasks\n");
2917 else if (ftrace_pid_trace)
2918 r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
2919 else
2920 r = sprintf(buf, "no pid\n");
2921
2922 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2923}
2924
2925static void clear_ftrace_swapper(void) 2855static void clear_ftrace_swapper(void)
2926{ 2856{
2927 struct task_struct *p; 2857 struct task_struct *p;
@@ -2972,14 +2902,12 @@ static void set_ftrace_pid(struct pid *pid)
2972 rcu_read_unlock(); 2902 rcu_read_unlock();
2973} 2903}
2974 2904
2975static void clear_ftrace_pid_task(struct pid **pid) 2905static void clear_ftrace_pid_task(struct pid *pid)
2976{ 2906{
2977 if (*pid == ftrace_swapper_pid) 2907 if (pid == ftrace_swapper_pid)
2978 clear_ftrace_swapper(); 2908 clear_ftrace_swapper();
2979 else 2909 else
2980 clear_ftrace_pid(*pid); 2910 clear_ftrace_pid(pid);
2981
2982 *pid = NULL;
2983} 2911}
2984 2912
2985static void set_ftrace_pid_task(struct pid *pid) 2913static void set_ftrace_pid_task(struct pid *pid)
@@ -2990,74 +2918,184 @@ static void set_ftrace_pid_task(struct pid *pid)
2990 set_ftrace_pid(pid); 2918 set_ftrace_pid(pid);
2991} 2919}
2992 2920
2993static ssize_t 2921static int ftrace_pid_add(int p)
2994ftrace_pid_write(struct file *filp, const char __user *ubuf,
2995 size_t cnt, loff_t *ppos)
2996{ 2922{
2997 struct pid *pid; 2923 struct pid *pid;
2998 char buf[64]; 2924 struct ftrace_pid *fpid;
2999 long val; 2925 int ret = -EINVAL;
3000 int ret;
3001 2926
3002 if (cnt >= sizeof(buf)) 2927 mutex_lock(&ftrace_lock);
3003 return -EINVAL;
3004 2928
3005 if (copy_from_user(&buf, ubuf, cnt)) 2929 if (!p)
3006 return -EFAULT; 2930 pid = ftrace_swapper_pid;
2931 else
2932 pid = find_get_pid(p);
3007 2933
3008 buf[cnt] = 0; 2934 if (!pid)
2935 goto out;
3009 2936
3010 ret = strict_strtol(buf, 10, &val); 2937 ret = 0;
3011 if (ret < 0)
3012 return ret;
3013 2938
3014 mutex_lock(&ftrace_lock); 2939 list_for_each_entry(fpid, &ftrace_pids, list)
3015 if (val < 0) { 2940 if (fpid->pid == pid)
3016 /* disable pid tracing */ 2941 goto out_put;
3017 if (!ftrace_pid_trace)
3018 goto out;
3019 2942
3020 clear_ftrace_pid_task(&ftrace_pid_trace); 2943 ret = -ENOMEM;
3021 2944
3022 } else { 2945 fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
3023 /* swapper task is special */ 2946 if (!fpid)
3024 if (!val) { 2947 goto out_put;
3025 pid = ftrace_swapper_pid;
3026 if (pid == ftrace_pid_trace)
3027 goto out;
3028 } else {
3029 pid = find_get_pid(val);
3030 2948
3031 if (pid == ftrace_pid_trace) { 2949 list_add(&fpid->list, &ftrace_pids);
3032 put_pid(pid); 2950 fpid->pid = pid;
3033 goto out;
3034 }
3035 }
3036 2951
3037 if (ftrace_pid_trace) 2952 set_ftrace_pid_task(pid);
3038 clear_ftrace_pid_task(&ftrace_pid_trace);
3039 2953
3040 if (!pid) 2954 ftrace_update_pid_func();
3041 goto out; 2955 ftrace_startup_enable(0);
2956
2957 mutex_unlock(&ftrace_lock);
2958 return 0;
2959
2960out_put:
2961 if (pid != ftrace_swapper_pid)
2962 put_pid(pid);
2963
2964out:
2965 mutex_unlock(&ftrace_lock);
2966 return ret;
2967}
2968
2969static void ftrace_pid_reset(void)
2970{
2971 struct ftrace_pid *fpid, *safe;
2972
2973 mutex_lock(&ftrace_lock);
2974 list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
2975 struct pid *pid = fpid->pid;
3042 2976
3043 ftrace_pid_trace = pid; 2977 clear_ftrace_pid_task(pid);
3044 2978
3045 set_ftrace_pid_task(ftrace_pid_trace); 2979 list_del(&fpid->list);
2980 kfree(fpid);
3046 } 2981 }
3047 2982
3048 /* update the function call */
3049 ftrace_update_pid_func(); 2983 ftrace_update_pid_func();
3050 ftrace_startup_enable(0); 2984 ftrace_startup_enable(0);
3051 2985
3052 out:
3053 mutex_unlock(&ftrace_lock); 2986 mutex_unlock(&ftrace_lock);
2987}
3054 2988
3055 return cnt; 2989static void *fpid_start(struct seq_file *m, loff_t *pos)
2990{
2991 mutex_lock(&ftrace_lock);
2992
2993 if (list_empty(&ftrace_pids) && (!*pos))
2994 return (void *) 1;
2995
2996 return seq_list_start(&ftrace_pids, *pos);
2997}
2998
2999static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
3000{
3001 if (v == (void *)1)
3002 return NULL;
3003
3004 return seq_list_next(v, &ftrace_pids, pos);
3005}
3006
3007static void fpid_stop(struct seq_file *m, void *p)
3008{
3009 mutex_unlock(&ftrace_lock);
3010}
3011
3012static int fpid_show(struct seq_file *m, void *v)
3013{
3014 const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
3015
3016 if (v == (void *)1) {
3017 seq_printf(m, "no pid\n");
3018 return 0;
3019 }
3020
3021 if (fpid->pid == ftrace_swapper_pid)
3022 seq_printf(m, "swapper tasks\n");
3023 else
3024 seq_printf(m, "%u\n", pid_vnr(fpid->pid));
3025
3026 return 0;
3027}
3028
3029static const struct seq_operations ftrace_pid_sops = {
3030 .start = fpid_start,
3031 .next = fpid_next,
3032 .stop = fpid_stop,
3033 .show = fpid_show,
3034};
3035
3036static int
3037ftrace_pid_open(struct inode *inode, struct file *file)
3038{
3039 int ret = 0;
3040
3041 if ((file->f_mode & FMODE_WRITE) &&
3042 (file->f_flags & O_TRUNC))
3043 ftrace_pid_reset();
3044
3045 if (file->f_mode & FMODE_READ)
3046 ret = seq_open(file, &ftrace_pid_sops);
3047
3048 return ret;
3049}
3050
3051static ssize_t
3052ftrace_pid_write(struct file *filp, const char __user *ubuf,
3053 size_t cnt, loff_t *ppos)
3054{
3055 char buf[64], *tmp;
3056 long val;
3057 int ret;
3058
3059 if (cnt >= sizeof(buf))
3060 return -EINVAL;
3061
3062 if (copy_from_user(&buf, ubuf, cnt))
3063 return -EFAULT;
3064
3065 buf[cnt] = 0;
3066
3067 /*
3068 * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
3069 * to clean the filter quietly.
3070 */
3071 tmp = strstrip(buf);
3072 if (strlen(tmp) == 0)
3073 return 1;
3074
3075 ret = strict_strtol(tmp, 10, &val);
3076 if (ret < 0)
3077 return ret;
3078
3079 ret = ftrace_pid_add(val);
3080
3081 return ret ? ret : cnt;
3082}
3083
3084static int
3085ftrace_pid_release(struct inode *inode, struct file *file)
3086{
3087 if (file->f_mode & FMODE_READ)
3088 seq_release(inode, file);
3089
3090 return 0;
3056} 3091}
3057 3092
3058static const struct file_operations ftrace_pid_fops = { 3093static const struct file_operations ftrace_pid_fops = {
3059 .read = ftrace_pid_read, 3094 .open = ftrace_pid_open,
3060 .write = ftrace_pid_write, 3095 .write = ftrace_pid_write,
3096 .read = seq_read,
3097 .llseek = seq_lseek,
3098 .release = ftrace_pid_release,
3061}; 3099};
3062 3100
3063static __init int ftrace_init_debugfs(void) 3101static __init int ftrace_init_debugfs(void)
@@ -3140,7 +3178,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
3140 3178
3141int 3179int
3142ftrace_enable_sysctl(struct ctl_table *table, int write, 3180ftrace_enable_sysctl(struct ctl_table *table, int write,
3143 struct file *file, void __user *buffer, size_t *lenp, 3181 void __user *buffer, size_t *lenp,
3144 loff_t *ppos) 3182 loff_t *ppos)
3145{ 3183{
3146 int ret; 3184 int ret;
@@ -3150,12 +3188,12 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3150 3188
3151 mutex_lock(&ftrace_lock); 3189 mutex_lock(&ftrace_lock);
3152 3190
3153 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 3191 ret = proc_dointvec(table, write, buffer, lenp, ppos);
3154 3192
3155 if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) 3193 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3156 goto out; 3194 goto out;
3157 3195
3158 last_ftrace_enabled = ftrace_enabled; 3196 last_ftrace_enabled = !!ftrace_enabled;
3159 3197
3160 if (ftrace_enabled) { 3198 if (ftrace_enabled) {
3161 3199
@@ -3243,8 +3281,8 @@ free:
3243} 3281}
3244 3282
3245static void 3283static void
3246ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, 3284ftrace_graph_probe_sched_switch(void *ignore,
3247 struct task_struct *next) 3285 struct task_struct *prev, struct task_struct *next)
3248{ 3286{
3249 unsigned long long timestamp; 3287 unsigned long long timestamp;
3250 int index; 3288 int index;
@@ -3298,7 +3336,7 @@ static int start_graph_tracing(void)
3298 } while (ret == -EAGAIN); 3336 } while (ret == -EAGAIN);
3299 3337
3300 if (!ret) { 3338 if (!ret) {
3301 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); 3339 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3302 if (ret) 3340 if (ret)
3303 pr_info("ftrace_graph: Couldn't activate tracepoint" 3341 pr_info("ftrace_graph: Couldn't activate tracepoint"
3304 " probe to kernel_sched_switch\n"); 3342 " probe to kernel_sched_switch\n");
@@ -3370,11 +3408,11 @@ void unregister_ftrace_graph(void)
3370 goto out; 3408 goto out;
3371 3409
3372 ftrace_graph_active--; 3410 ftrace_graph_active--;
3373 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
3374 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 3411 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
3375 ftrace_graph_entry = ftrace_graph_entry_stub; 3412 ftrace_graph_entry = ftrace_graph_entry_stub;
3376 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 3413 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
3377 unregister_pm_notifier(&ftrace_suspend_notifier); 3414 unregister_pm_notifier(&ftrace_suspend_notifier);
3415 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3378 3416
3379 out: 3417 out:
3380 mutex_unlock(&ftrace_lock); 3418 mutex_unlock(&ftrace_lock);
@@ -3385,6 +3423,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3385{ 3423{
3386 /* Make sure we do not use the parent ret_stack */ 3424 /* Make sure we do not use the parent ret_stack */
3387 t->ret_stack = NULL; 3425 t->ret_stack = NULL;
3426 t->curr_ret_stack = -1;
3388 3427
3389 if (ftrace_graph_active) { 3428 if (ftrace_graph_active) {
3390 struct ftrace_ret_stack *ret_stack; 3429 struct ftrace_ret_stack *ret_stack;
@@ -3394,7 +3433,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3394 GFP_KERNEL); 3433 GFP_KERNEL);
3395 if (!ret_stack) 3434 if (!ret_stack)
3396 return; 3435 return;
3397 t->curr_ret_stack = -1;
3398 atomic_set(&t->tracing_graph_pause, 0); 3436 atomic_set(&t->tracing_graph_pause, 0);
3399 atomic_set(&t->trace_overrun, 0); 3437 atomic_set(&t->trace_overrun, 0);
3400 t->ftrace_timestamp = 0; 3438 t->ftrace_timestamp = 0;
@@ -3420,4 +3458,3 @@ void ftrace_graph_stop(void)
3420 ftrace_stop(); 3458 ftrace_stop();
3421} 3459}
3422#endif 3460#endif
3423
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644
index 1edaa9516e81..000000000000
--- a/kernel/trace/kmemtrace.c
+++ /dev/null
@@ -1,468 +0,0 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/tracepoint.h>
10#include <linux/seq_file.h>
11#include <linux/debugfs.h>
12#include <linux/dcache.h>
13#include <linux/fs.h>
14
15#include <linux/kmemtrace.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20/* Select an alternative, minimalistic output than the original one */
21#define TRACE_KMEM_OPT_MINIMAL 0x1
22
23static struct tracer_opt kmem_opts[] = {
24 /* Default disable the minimalistic output */
25 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
26 { }
27};
28
29static struct tracer_flags kmem_tracer_flags = {
30 .val = 0,
31 .opts = kmem_opts
32};
33
34static struct trace_array *kmemtrace_array;
35
36/* Trace allocations */
37static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
38 unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node)
44{
45 struct ftrace_event_call *call = &event_kmem_alloc;
46 struct trace_array *tr = kmemtrace_array;
47 struct kmemtrace_alloc_entry *entry;
48 struct ring_buffer_event *event;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
51 if (!event)
52 return;
53
54 entry = ring_buffer_event_data(event);
55 tracing_generic_entry_update(&entry->ent, 0, 0);
56
57 entry->ent.type = TRACE_KMEM_ALLOC;
58 entry->type_id = type_id;
59 entry->call_site = call_site;
60 entry->ptr = ptr;
61 entry->bytes_req = bytes_req;
62 entry->bytes_alloc = bytes_alloc;
63 entry->gfp_flags = gfp_flags;
64 entry->node = node;
65
66 if (!filter_check_discard(call, entry, tr->buffer, event))
67 ring_buffer_unlock_commit(tr->buffer, event);
68
69 trace_wake_up();
70}
71
72static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
73 unsigned long call_site,
74 const void *ptr)
75{
76 struct ftrace_event_call *call = &event_kmem_free;
77 struct trace_array *tr = kmemtrace_array;
78 struct kmemtrace_free_entry *entry;
79 struct ring_buffer_event *event;
80
81 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
82 if (!event)
83 return;
84 entry = ring_buffer_event_data(event);
85 tracing_generic_entry_update(&entry->ent, 0, 0);
86
87 entry->ent.type = TRACE_KMEM_FREE;
88 entry->type_id = type_id;
89 entry->call_site = call_site;
90 entry->ptr = ptr;
91
92 if (!filter_check_discard(call, entry, tr->buffer, event))
93 ring_buffer_unlock_commit(tr->buffer, event);
94
95 trace_wake_up();
96}
97
98static void kmemtrace_kmalloc(unsigned long call_site,
99 const void *ptr,
100 size_t bytes_req,
101 size_t bytes_alloc,
102 gfp_t gfp_flags)
103{
104 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
105 bytes_req, bytes_alloc, gfp_flags, -1);
106}
107
108static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
109 const void *ptr,
110 size_t bytes_req,
111 size_t bytes_alloc,
112 gfp_t gfp_flags)
113{
114 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
115 bytes_req, bytes_alloc, gfp_flags, -1);
116}
117
118static void kmemtrace_kmalloc_node(unsigned long call_site,
119 const void *ptr,
120 size_t bytes_req,
121 size_t bytes_alloc,
122 gfp_t gfp_flags,
123 int node)
124{
125 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
126 bytes_req, bytes_alloc, gfp_flags, node);
127}
128
129static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
130 const void *ptr,
131 size_t bytes_req,
132 size_t bytes_alloc,
133 gfp_t gfp_flags,
134 int node)
135{
136 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
137 bytes_req, bytes_alloc, gfp_flags, node);
138}
139
140static void kmemtrace_kfree(unsigned long call_site, const void *ptr)
141{
142 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
143}
144
145static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr)
146{
147 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
148}
149
150static int kmemtrace_start_probes(void)
151{
152 int err;
153
154 err = register_trace_kmalloc(kmemtrace_kmalloc);
155 if (err)
156 return err;
157 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
158 if (err)
159 return err;
160 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node);
161 if (err)
162 return err;
163 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
164 if (err)
165 return err;
166 err = register_trace_kfree(kmemtrace_kfree);
167 if (err)
168 return err;
169 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
170
171 return err;
172}
173
174static void kmemtrace_stop_probes(void)
175{
176 unregister_trace_kmalloc(kmemtrace_kmalloc);
177 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
178 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node);
179 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
180 unregister_trace_kfree(kmemtrace_kfree);
181 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
182}
183
184static int kmem_trace_init(struct trace_array *tr)
185{
186 int cpu;
187 kmemtrace_array = tr;
188
189 for_each_cpu(cpu, cpu_possible_mask)
190 tracing_reset(tr, cpu);
191
192 kmemtrace_start_probes();
193
194 return 0;
195}
196
197static void kmem_trace_reset(struct trace_array *tr)
198{
199 kmemtrace_stop_probes();
200}
201
202static void kmemtrace_headers(struct seq_file *s)
203{
204 /* Don't need headers for the original kmemtrace output */
205 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
206 return;
207
208 seq_printf(s, "#\n");
209 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
210 " POINTER NODE CALLER\n");
211 seq_printf(s, "# FREE | | | | "
212 " | | | |\n");
213 seq_printf(s, "# |\n\n");
214}
215
216/*
217 * The following functions give the original output from kmemtrace,
218 * plus the origin CPU, since reordering occurs in-kernel now.
219 */
220
221#define KMEMTRACE_USER_ALLOC 0
222#define KMEMTRACE_USER_FREE 1
223
224struct kmemtrace_user_event {
225 u8 event_id;
226 u8 type_id;
227 u16 event_size;
228 u32 cpu;
229 u64 timestamp;
230 unsigned long call_site;
231 unsigned long ptr;
232};
233
234struct kmemtrace_user_event_alloc {
235 size_t bytes_req;
236 size_t bytes_alloc;
237 unsigned gfp_flags;
238 int node;
239};
240
241static enum print_line_t
242kmemtrace_print_alloc_user(struct trace_iterator *iter,
243 struct kmemtrace_alloc_entry *entry)
244{
245 struct kmemtrace_user_event_alloc *ev_alloc;
246 struct trace_seq *s = &iter->seq;
247 struct kmemtrace_user_event *ev;
248
249 ev = trace_seq_reserve(s, sizeof(*ev));
250 if (!ev)
251 return TRACE_TYPE_PARTIAL_LINE;
252
253 ev->event_id = KMEMTRACE_USER_ALLOC;
254 ev->type_id = entry->type_id;
255 ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
256 ev->cpu = iter->cpu;
257 ev->timestamp = iter->ts;
258 ev->call_site = entry->call_site;
259 ev->ptr = (unsigned long)entry->ptr;
260
261 ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
262 if (!ev_alloc)
263 return TRACE_TYPE_PARTIAL_LINE;
264
265 ev_alloc->bytes_req = entry->bytes_req;
266 ev_alloc->bytes_alloc = entry->bytes_alloc;
267 ev_alloc->gfp_flags = entry->gfp_flags;
268 ev_alloc->node = entry->node;
269
270 return TRACE_TYPE_HANDLED;
271}
272
273static enum print_line_t
274kmemtrace_print_free_user(struct trace_iterator *iter,
275 struct kmemtrace_free_entry *entry)
276{
277 struct trace_seq *s = &iter->seq;
278 struct kmemtrace_user_event *ev;
279
280 ev = trace_seq_reserve(s, sizeof(*ev));
281 if (!ev)
282 return TRACE_TYPE_PARTIAL_LINE;
283
284 ev->event_id = KMEMTRACE_USER_FREE;
285 ev->type_id = entry->type_id;
286 ev->event_size = sizeof(*ev);
287 ev->cpu = iter->cpu;
288 ev->timestamp = iter->ts;
289 ev->call_site = entry->call_site;
290 ev->ptr = (unsigned long)entry->ptr;
291
292 return TRACE_TYPE_HANDLED;
293}
294
295/* The two other following provide a more minimalistic output */
296static enum print_line_t
297kmemtrace_print_alloc_compress(struct trace_iterator *iter,
298 struct kmemtrace_alloc_entry *entry)
299{
300 struct trace_seq *s = &iter->seq;
301 int ret;
302
303 /* Alloc entry */
304 ret = trace_seq_printf(s, " + ");
305 if (!ret)
306 return TRACE_TYPE_PARTIAL_LINE;
307
308 /* Type */
309 switch (entry->type_id) {
310 case KMEMTRACE_TYPE_KMALLOC:
311 ret = trace_seq_printf(s, "K ");
312 break;
313 case KMEMTRACE_TYPE_CACHE:
314 ret = trace_seq_printf(s, "C ");
315 break;
316 case KMEMTRACE_TYPE_PAGES:
317 ret = trace_seq_printf(s, "P ");
318 break;
319 default:
320 ret = trace_seq_printf(s, "? ");
321 }
322
323 if (!ret)
324 return TRACE_TYPE_PARTIAL_LINE;
325
326 /* Requested */
327 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
328 if (!ret)
329 return TRACE_TYPE_PARTIAL_LINE;
330
331 /* Allocated */
332 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
333 if (!ret)
334 return TRACE_TYPE_PARTIAL_LINE;
335
336 /* Flags
337 * TODO: would be better to see the name of the GFP flag names
338 */
339 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
340 if (!ret)
341 return TRACE_TYPE_PARTIAL_LINE;
342
343 /* Pointer to allocated */
344 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
345 if (!ret)
346 return TRACE_TYPE_PARTIAL_LINE;
347
348 /* Node */
349 ret = trace_seq_printf(s, "%4d ", entry->node);
350 if (!ret)
351 return TRACE_TYPE_PARTIAL_LINE;
352
353 /* Call site */
354 ret = seq_print_ip_sym(s, entry->call_site, 0);
355 if (!ret)
356 return TRACE_TYPE_PARTIAL_LINE;
357
358 if (!trace_seq_printf(s, "\n"))
359 return TRACE_TYPE_PARTIAL_LINE;
360
361 return TRACE_TYPE_HANDLED;
362}
363
364static enum print_line_t
365kmemtrace_print_free_compress(struct trace_iterator *iter,
366 struct kmemtrace_free_entry *entry)
367{
368 struct trace_seq *s = &iter->seq;
369 int ret;
370
371 /* Free entry */
372 ret = trace_seq_printf(s, " - ");
373 if (!ret)
374 return TRACE_TYPE_PARTIAL_LINE;
375
376 /* Type */
377 switch (entry->type_id) {
378 case KMEMTRACE_TYPE_KMALLOC:
379 ret = trace_seq_printf(s, "K ");
380 break;
381 case KMEMTRACE_TYPE_CACHE:
382 ret = trace_seq_printf(s, "C ");
383 break;
384 case KMEMTRACE_TYPE_PAGES:
385 ret = trace_seq_printf(s, "P ");
386 break;
387 default:
388 ret = trace_seq_printf(s, "? ");
389 }
390
391 if (!ret)
392 return TRACE_TYPE_PARTIAL_LINE;
393
394 /* Skip requested/allocated/flags */
395 ret = trace_seq_printf(s, " ");
396 if (!ret)
397 return TRACE_TYPE_PARTIAL_LINE;
398
399 /* Pointer to allocated */
400 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
401 if (!ret)
402 return TRACE_TYPE_PARTIAL_LINE;
403
404 /* Skip node */
405 ret = trace_seq_printf(s, " ");
406 if (!ret)
407 return TRACE_TYPE_PARTIAL_LINE;
408
409 /* Call site */
410 ret = seq_print_ip_sym(s, entry->call_site, 0);
411 if (!ret)
412 return TRACE_TYPE_PARTIAL_LINE;
413
414 if (!trace_seq_printf(s, "\n"))
415 return TRACE_TYPE_PARTIAL_LINE;
416
417 return TRACE_TYPE_HANDLED;
418}
419
420static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
421{
422 struct trace_entry *entry = iter->ent;
423
424 switch (entry->type) {
425 case TRACE_KMEM_ALLOC: {
426 struct kmemtrace_alloc_entry *field;
427
428 trace_assign_type(field, entry);
429 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
430 return kmemtrace_print_alloc_compress(iter, field);
431 else
432 return kmemtrace_print_alloc_user(iter, field);
433 }
434
435 case TRACE_KMEM_FREE: {
436 struct kmemtrace_free_entry *field;
437
438 trace_assign_type(field, entry);
439 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
440 return kmemtrace_print_free_compress(iter, field);
441 else
442 return kmemtrace_print_free_user(iter, field);
443 }
444
445 default:
446 return TRACE_TYPE_UNHANDLED;
447 }
448}
449
450static struct tracer kmem_tracer __read_mostly = {
451 .name = "kmemtrace",
452 .init = kmem_trace_init,
453 .reset = kmem_trace_reset,
454 .print_line = kmemtrace_print_line,
455 .print_header = kmemtrace_headers,
456 .flags = &kmem_tracer_flags
457};
458
459void kmemtrace_init(void)
460{
461 /* earliest opportunity to start kmem tracing */
462}
463
464static int __init init_kmem_tracer(void)
465{
466 return register_tracer(&kmem_tracer);
467}
468device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
new file mode 100644
index 000000000000..f55fcf61b223
--- /dev/null
+++ b/kernel/trace/power-traces.c
@@ -0,0 +1,20 @@
1/*
2 * Power trace points
3 *
4 * Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com>
5 */
6
7#include <linux/string.h>
8#include <linux/types.h>
9#include <linux/workqueue.h>
10#include <linux/sched.h>
11#include <linux/module.h>
12
13#define CREATE_TRACE_POINTS
14#include <trace/events/power.h>
15
16#ifdef EVENT_POWER_TRACING_DEPRECATED
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18#endif
19EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
20
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 04dac2638258..bd1c35a4fbcc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,12 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22 23
24#include <asm/local.h>
23#include "trace.h" 25#include "trace.h"
24 26
25/* 27/*
@@ -201,13 +203,19 @@ int tracing_is_on(void)
201} 203}
202EXPORT_SYMBOL_GPL(tracing_is_on); 204EXPORT_SYMBOL_GPL(tracing_is_on);
203 205
204#include "trace.h"
205
206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
207#define RB_ALIGNMENT 4U 207#define RB_ALIGNMENT 4U
208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
210 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
211/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
212#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
213 221
@@ -216,19 +224,17 @@ enum {
216 RB_LEN_TIME_STAMP = 16, 224 RB_LEN_TIME_STAMP = 16,
217}; 225};
218 226
219static inline int rb_null_event(struct ring_buffer_event *event) 227#define skip_time_extend(event) \
220{ 228 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
221 return event->type_len == RINGBUF_TYPE_PADDING
222 && event->time_delta == 0;
223}
224 229
225static inline int rb_discarded_event(struct ring_buffer_event *event) 230static inline int rb_null_event(struct ring_buffer_event *event)
226{ 231{
227 return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; 232 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
228} 233}
229 234
230static void rb_event_set_padding(struct ring_buffer_event *event) 235static void rb_event_set_padding(struct ring_buffer_event *event)
231{ 236{
237 /* padding has a NULL time_delta */
232 event->type_len = RINGBUF_TYPE_PADDING; 238 event->type_len = RINGBUF_TYPE_PADDING;
233 event->time_delta = 0; 239 event->time_delta = 0;
234} 240}
@@ -245,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
245 return length + RB_EVNT_HDR_SIZE; 251 return length + RB_EVNT_HDR_SIZE;
246} 252}
247 253
248/* inline for ring buffer fast paths */ 254/*
249static unsigned 255 * Return the length of the given event. Will return
256 * the length of the time extend if the event is a
257 * time extend.
258 */
259static inline unsigned
250rb_event_length(struct ring_buffer_event *event) 260rb_event_length(struct ring_buffer_event *event)
251{ 261{
252 switch (event->type_len) { 262 switch (event->type_len) {
@@ -271,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
271 return 0; 281 return 0;
272} 282}
273 283
284/*
285 * Return total length of time extend and data,
286 * or just the event length for all other events.
287 */
288static inline unsigned
289rb_event_ts_length(struct ring_buffer_event *event)
290{
291 unsigned len = 0;
292
293 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
294 /* time extends include the data event after it */
295 len = RB_LEN_TIME_EXTEND;
296 event = skip_time_extend(event);
297 }
298 return len + rb_event_length(event);
299}
300
274/** 301/**
275 * ring_buffer_event_length - return the length of the event 302 * ring_buffer_event_length - return the length of the event
276 * @event: the event to get the length of 303 * @event: the event to get the length of
304 *
305 * Returns the size of the data load of a data event.
306 * If the event is something other than a data event, it
307 * returns the size of the event itself. With the exception
308 * of a TIME EXTEND, where it still returns the size of the
309 * data load of the data event after it.
277 */ 310 */
278unsigned ring_buffer_event_length(struct ring_buffer_event *event) 311unsigned ring_buffer_event_length(struct ring_buffer_event *event)
279{ 312{
280 unsigned length = rb_event_length(event); 313 unsigned length;
314
315 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
316 event = skip_time_extend(event);
317
318 length = rb_event_length(event);
281 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 319 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
282 return length; 320 return length;
283 length -= RB_EVNT_HDR_SIZE; 321 length -= RB_EVNT_HDR_SIZE;
@@ -291,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
291static void * 329static void *
292rb_event_data(struct ring_buffer_event *event) 330rb_event_data(struct ring_buffer_event *event)
293{ 331{
332 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
333 event = skip_time_extend(event);
294 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 334 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
295 /* If length is in len field, then array[0] has the data */ 335 /* If length is in len field, then array[0] has the data */
296 if (event->type_len) 336 if (event->type_len)
@@ -316,20 +356,49 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
316#define TS_MASK ((1ULL << TS_SHIFT) - 1) 356#define TS_MASK ((1ULL << TS_SHIFT) - 1)
317#define TS_DELTA_TEST (~TS_MASK) 357#define TS_DELTA_TEST (~TS_MASK)
318 358
359/* Flag when events were overwritten */
360#define RB_MISSED_EVENTS (1 << 31)
361/* Missed count stored at end */
362#define RB_MISSED_STORED (1 << 30)
363
319struct buffer_data_page { 364struct buffer_data_page {
320 u64 time_stamp; /* page time stamp */ 365 u64 time_stamp; /* page time stamp */
321 local_t commit; /* write committed index */ 366 local_t commit; /* write committed index */
322 unsigned char data[]; /* data of buffer page */ 367 unsigned char data[]; /* data of buffer page */
323}; 368};
324 369
370/*
371 * Note, the buffer_page list must be first. The buffer pages
372 * are allocated in cache lines, which means that each buffer
373 * page will be at the beginning of a cache line, and thus
374 * the least significant bits will be zero. We use this to
375 * add flags in the list struct pointers, to make the ring buffer
376 * lockless.
377 */
325struct buffer_page { 378struct buffer_page {
326 struct list_head list; /* list of buffer pages */ 379 struct list_head list; /* list of buffer pages */
327 local_t write; /* index for next write */ 380 local_t write; /* index for next write */
328 unsigned read; /* index for next read */ 381 unsigned read; /* index for next read */
329 local_t entries; /* entries on this page */ 382 local_t entries; /* entries on this page */
383 unsigned long real_end; /* real end of data */
330 struct buffer_data_page *page; /* Actual data page */ 384 struct buffer_data_page *page; /* Actual data page */
331}; 385};
332 386
387/*
388 * The buffer page counters, write and entries, must be reset
389 * atomically when crossing page boundaries. To synchronize this
390 * update, two counters are inserted into the number. One is
391 * the actual counter for the write position or count on the page.
392 *
393 * The other is a counter of updaters. Before an update happens
394 * the update partition of the counter is incremented. This will
395 * allow the updater to update the counter atomically.
396 *
397 * The counter is 20 bits, and the state data is 12.
398 */
399#define RB_WRITE_MASK 0xfffff
400#define RB_WRITE_INTCNT (1 << 20)
401
333static void rb_init_page(struct buffer_data_page *bpage) 402static void rb_init_page(struct buffer_data_page *bpage)
334{ 403{
335 local_set(&bpage->commit, 0); 404 local_set(&bpage->commit, 0);
@@ -372,27 +441,33 @@ static inline int test_time_stamp(u64 delta)
372/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 441/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
373#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 442#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
374 443
375/* Max number of timestamps that can fit on a page */
376#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
377
378int ring_buffer_print_page_header(struct trace_seq *s) 444int ring_buffer_print_page_header(struct trace_seq *s)
379{ 445{
380 struct buffer_data_page field; 446 struct buffer_data_page field;
381 int ret; 447 int ret;
382 448
383 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" 449 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
384 "offset:0;\tsize:%u;\n", 450 "offset:0;\tsize:%u;\tsigned:%u;\n",
385 (unsigned int)sizeof(field.time_stamp)); 451 (unsigned int)sizeof(field.time_stamp),
452 (unsigned int)is_signed_type(u64));
386 453
387 ret = trace_seq_printf(s, "\tfield: local_t commit;\t" 454 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
388 "offset:%u;\tsize:%u;\n", 455 "offset:%u;\tsize:%u;\tsigned:%u;\n",
456 (unsigned int)offsetof(typeof(field), commit),
457 (unsigned int)sizeof(field.commit),
458 (unsigned int)is_signed_type(long));
459
460 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
461 "offset:%u;\tsize:%u;\tsigned:%u;\n",
389 (unsigned int)offsetof(typeof(field), commit), 462 (unsigned int)offsetof(typeof(field), commit),
390 (unsigned int)sizeof(field.commit)); 463 1,
464 (unsigned int)is_signed_type(long));
391 465
392 ret = trace_seq_printf(s, "\tfield: char data;\t" 466 ret = trace_seq_printf(s, "\tfield: char data;\t"
393 "offset:%u;\tsize:%u;\n", 467 "offset:%u;\tsize:%u;\tsigned:%u;\n",
394 (unsigned int)offsetof(typeof(field), data), 468 (unsigned int)offsetof(typeof(field), data),
395 (unsigned int)BUF_PAGE_SIZE); 469 (unsigned int)BUF_PAGE_SIZE,
470 (unsigned int)is_signed_type(char));
396 471
397 return ret; 472 return ret;
398} 473}
@@ -402,25 +477,26 @@ int ring_buffer_print_page_header(struct trace_seq *s)
402 */ 477 */
403struct ring_buffer_per_cpu { 478struct ring_buffer_per_cpu {
404 int cpu; 479 int cpu;
480 atomic_t record_disabled;
405 struct ring_buffer *buffer; 481 struct ring_buffer *buffer;
406 spinlock_t reader_lock; /* serialize readers */ 482 spinlock_t reader_lock; /* serialize readers */
407 raw_spinlock_t lock; 483 arch_spinlock_t lock;
408 struct lock_class_key lock_key; 484 struct lock_class_key lock_key;
409 struct list_head pages; 485 struct list_head *pages;
410 struct buffer_page *head_page; /* read from head */ 486 struct buffer_page *head_page; /* read from head */
411 struct buffer_page *tail_page; /* write to tail */ 487 struct buffer_page *tail_page; /* write to tail */
412 struct buffer_page *commit_page; /* committed pages */ 488 struct buffer_page *commit_page; /* committed pages */
413 struct buffer_page *reader_page; 489 struct buffer_page *reader_page;
414 unsigned long nmi_dropped; 490 unsigned long lost_events;
415 unsigned long commit_overrun; 491 unsigned long last_overrun;
416 unsigned long overrun; 492 local_t commit_overrun;
417 unsigned long read; 493 local_t overrun;
418 local_t entries; 494 local_t entries;
419 local_t committing; 495 local_t committing;
420 local_t commits; 496 local_t commits;
497 unsigned long read;
421 u64 write_stamp; 498 u64 write_stamp;
422 u64 read_stamp; 499 u64 read_stamp;
423 atomic_t record_disabled;
424}; 500};
425 501
426struct ring_buffer { 502struct ring_buffer {
@@ -446,24 +522,31 @@ struct ring_buffer_iter {
446 struct ring_buffer_per_cpu *cpu_buffer; 522 struct ring_buffer_per_cpu *cpu_buffer;
447 unsigned long head; 523 unsigned long head;
448 struct buffer_page *head_page; 524 struct buffer_page *head_page;
525 struct buffer_page *cache_reader_page;
526 unsigned long cache_read;
449 u64 read_stamp; 527 u64 read_stamp;
450}; 528};
451 529
452/* buffer may be either ring_buffer or ring_buffer_per_cpu */ 530/* buffer may be either ring_buffer or ring_buffer_per_cpu */
453#define RB_WARN_ON(buffer, cond) \ 531#define RB_WARN_ON(b, cond) \
454 ({ \ 532 ({ \
455 int _____ret = unlikely(cond); \ 533 int _____ret = unlikely(cond); \
456 if (_____ret) { \ 534 if (_____ret) { \
457 atomic_inc(&buffer->record_disabled); \ 535 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
458 WARN_ON(1); \ 536 struct ring_buffer_per_cpu *__b = \
459 } \ 537 (void *)b; \
460 _____ret; \ 538 atomic_inc(&__b->buffer->record_disabled); \
539 } else \
540 atomic_inc(&b->record_disabled); \
541 WARN_ON(1); \
542 } \
543 _____ret; \
461 }) 544 })
462 545
463/* Up this if you want to test the TIME_EXTENTS and normalization */ 546/* Up this if you want to test the TIME_EXTENTS and normalization */
464#define DEBUG_SHIFT 0 547#define DEBUG_SHIFT 0
465 548
466static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) 549static inline u64 rb_time_stamp(struct ring_buffer *buffer)
467{ 550{
468 /* shift to debug/test normalization and TIME_EXTENTS */ 551 /* shift to debug/test normalization and TIME_EXTENTS */
469 return buffer->clock() << DEBUG_SHIFT; 552 return buffer->clock() << DEBUG_SHIFT;
@@ -474,7 +557,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
474 u64 time; 557 u64 time;
475 558
476 preempt_disable_notrace(); 559 preempt_disable_notrace();
477 time = rb_time_stamp(buffer, cpu); 560 time = rb_time_stamp(buffer);
478 preempt_enable_no_resched_notrace(); 561 preempt_enable_no_resched_notrace();
479 562
480 return time; 563 return time;
@@ -489,6 +572,390 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
489} 572}
490EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 573EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
491 574
575/*
576 * Making the ring buffer lockless makes things tricky.
577 * Although writes only happen on the CPU that they are on,
578 * and they only need to worry about interrupts. Reads can
579 * happen on any CPU.
580 *
581 * The reader page is always off the ring buffer, but when the
582 * reader finishes with a page, it needs to swap its page with
583 * a new one from the buffer. The reader needs to take from
584 * the head (writes go to the tail). But if a writer is in overwrite
585 * mode and wraps, it must push the head page forward.
586 *
587 * Here lies the problem.
588 *
589 * The reader must be careful to replace only the head page, and
590 * not another one. As described at the top of the file in the
591 * ASCII art, the reader sets its old page to point to the next
592 * page after head. It then sets the page after head to point to
593 * the old reader page. But if the writer moves the head page
594 * during this operation, the reader could end up with the tail.
595 *
596 * We use cmpxchg to help prevent this race. We also do something
597 * special with the page before head. We set the LSB to 1.
598 *
599 * When the writer must push the page forward, it will clear the
600 * bit that points to the head page, move the head, and then set
601 * the bit that points to the new head page.
602 *
603 * We also don't want an interrupt coming in and moving the head
604 * page on another writer. Thus we use the second LSB to catch
605 * that too. Thus:
606 *
607 * head->list->prev->next bit 1 bit 0
608 * ------- -------
609 * Normal page 0 0
610 * Points to head page 0 1
611 * New head page 1 0
612 *
613 * Note we can not trust the prev pointer of the head page, because:
614 *
615 * +----+ +-----+ +-----+
616 * | |------>| T |---X--->| N |
617 * | |<------| | | |
618 * +----+ +-----+ +-----+
619 * ^ ^ |
620 * | +-----+ | |
621 * +----------| R |----------+ |
622 * | |<-----------+
623 * +-----+
624 *
625 * Key: ---X--> HEAD flag set in pointer
626 * T Tail page
627 * R Reader page
628 * N Next page
629 *
630 * (see __rb_reserve_next() to see where this happens)
631 *
632 * What the above shows is that the reader just swapped out
633 * the reader page with a page in the buffer, but before it
634 * could make the new header point back to the new page added
635 * it was preempted by a writer. The writer moved forward onto
636 * the new page added by the reader and is about to move forward
637 * again.
638 *
639 * You can see, it is legitimate for the previous pointer of
640 * the head (or any page) not to point back to itself. But only
641 * temporarially.
642 */
643
644#define RB_PAGE_NORMAL 0UL
645#define RB_PAGE_HEAD 1UL
646#define RB_PAGE_UPDATE 2UL
647
648
649#define RB_FLAG_MASK 3UL
650
651/* PAGE_MOVED is not part of the mask */
652#define RB_PAGE_MOVED 4UL
653
654/*
655 * rb_list_head - remove any bit
656 */
657static struct list_head *rb_list_head(struct list_head *list)
658{
659 unsigned long val = (unsigned long)list;
660
661 return (struct list_head *)(val & ~RB_FLAG_MASK);
662}
663
664/*
665 * rb_is_head_page - test if the given page is the head page
666 *
667 * Because the reader may move the head_page pointer, we can
668 * not trust what the head page is (it may be pointing to
669 * the reader page). But if the next page is a header page,
670 * its flags will be non zero.
671 */
672static int inline
673rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
674 struct buffer_page *page, struct list_head *list)
675{
676 unsigned long val;
677
678 val = (unsigned long)list->next;
679
680 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
681 return RB_PAGE_MOVED;
682
683 return val & RB_FLAG_MASK;
684}
685
686/*
687 * rb_is_reader_page
688 *
689 * The unique thing about the reader page, is that, if the
690 * writer is ever on it, the previous pointer never points
691 * back to the reader page.
692 */
693static int rb_is_reader_page(struct buffer_page *page)
694{
695 struct list_head *list = page->list.prev;
696
697 return rb_list_head(list->next) != &page->list;
698}
699
700/*
701 * rb_set_list_to_head - set a list_head to be pointing to head.
702 */
703static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
704 struct list_head *list)
705{
706 unsigned long *ptr;
707
708 ptr = (unsigned long *)&list->next;
709 *ptr |= RB_PAGE_HEAD;
710 *ptr &= ~RB_PAGE_UPDATE;
711}
712
713/*
714 * rb_head_page_activate - sets up head page
715 */
716static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
717{
718 struct buffer_page *head;
719
720 head = cpu_buffer->head_page;
721 if (!head)
722 return;
723
724 /*
725 * Set the previous list pointer to have the HEAD flag.
726 */
727 rb_set_list_to_head(cpu_buffer, head->list.prev);
728}
729
730static void rb_list_head_clear(struct list_head *list)
731{
732 unsigned long *ptr = (unsigned long *)&list->next;
733
734 *ptr &= ~RB_FLAG_MASK;
735}
736
737/*
738 * rb_head_page_dactivate - clears head page ptr (for free list)
739 */
740static void
741rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
742{
743 struct list_head *hd;
744
745 /* Go through the whole list and clear any pointers found. */
746 rb_list_head_clear(cpu_buffer->pages);
747
748 list_for_each(hd, cpu_buffer->pages)
749 rb_list_head_clear(hd);
750}
751
752static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
753 struct buffer_page *head,
754 struct buffer_page *prev,
755 int old_flag, int new_flag)
756{
757 struct list_head *list;
758 unsigned long val = (unsigned long)&head->list;
759 unsigned long ret;
760
761 list = &prev->list;
762
763 val &= ~RB_FLAG_MASK;
764
765 ret = cmpxchg((unsigned long *)&list->next,
766 val | old_flag, val | new_flag);
767
768 /* check if the reader took the page */
769 if ((ret & ~RB_FLAG_MASK) != val)
770 return RB_PAGE_MOVED;
771
772 return ret & RB_FLAG_MASK;
773}
774
775static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
776 struct buffer_page *head,
777 struct buffer_page *prev,
778 int old_flag)
779{
780 return rb_head_page_set(cpu_buffer, head, prev,
781 old_flag, RB_PAGE_UPDATE);
782}
783
784static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
785 struct buffer_page *head,
786 struct buffer_page *prev,
787 int old_flag)
788{
789 return rb_head_page_set(cpu_buffer, head, prev,
790 old_flag, RB_PAGE_HEAD);
791}
792
793static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
794 struct buffer_page *head,
795 struct buffer_page *prev,
796 int old_flag)
797{
798 return rb_head_page_set(cpu_buffer, head, prev,
799 old_flag, RB_PAGE_NORMAL);
800}
801
802static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
803 struct buffer_page **bpage)
804{
805 struct list_head *p = rb_list_head((*bpage)->list.next);
806
807 *bpage = list_entry(p, struct buffer_page, list);
808}
809
810static struct buffer_page *
811rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
812{
813 struct buffer_page *head;
814 struct buffer_page *page;
815 struct list_head *list;
816 int i;
817
818 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
819 return NULL;
820
821 /* sanity check */
822 list = cpu_buffer->pages;
823 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
824 return NULL;
825
826 page = head = cpu_buffer->head_page;
827 /*
828 * It is possible that the writer moves the header behind
829 * where we started, and we miss in one loop.
830 * A second loop should grab the header, but we'll do
831 * three loops just because I'm paranoid.
832 */
833 for (i = 0; i < 3; i++) {
834 do {
835 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
836 cpu_buffer->head_page = page;
837 return page;
838 }
839 rb_inc_page(cpu_buffer, &page);
840 } while (page != head);
841 }
842
843 RB_WARN_ON(cpu_buffer, 1);
844
845 return NULL;
846}
847
848static int rb_head_page_replace(struct buffer_page *old,
849 struct buffer_page *new)
850{
851 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
852 unsigned long val;
853 unsigned long ret;
854
855 val = *ptr & ~RB_FLAG_MASK;
856 val |= RB_PAGE_HEAD;
857
858 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
859
860 return ret == val;
861}
862
863/*
864 * rb_tail_page_update - move the tail page forward
865 *
866 * Returns 1 if moved tail page, 0 if someone else did.
867 */
868static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
869 struct buffer_page *tail_page,
870 struct buffer_page *next_page)
871{
872 struct buffer_page *old_tail;
873 unsigned long old_entries;
874 unsigned long old_write;
875 int ret = 0;
876
877 /*
878 * The tail page now needs to be moved forward.
879 *
880 * We need to reset the tail page, but without messing
881 * with possible erasing of data brought in by interrupts
882 * that have moved the tail page and are currently on it.
883 *
884 * We add a counter to the write field to denote this.
885 */
886 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
887 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
888
889 /*
890 * Just make sure we have seen our old_write and synchronize
891 * with any interrupts that come in.
892 */
893 barrier();
894
895 /*
896 * If the tail page is still the same as what we think
897 * it is, then it is up to us to update the tail
898 * pointer.
899 */
900 if (tail_page == cpu_buffer->tail_page) {
901 /* Zero the write counter */
902 unsigned long val = old_write & ~RB_WRITE_MASK;
903 unsigned long eval = old_entries & ~RB_WRITE_MASK;
904
905 /*
906 * This will only succeed if an interrupt did
907 * not come in and change it. In which case, we
908 * do not want to modify it.
909 *
910 * We add (void) to let the compiler know that we do not care
911 * about the return value of these functions. We use the
912 * cmpxchg to only update if an interrupt did not already
913 * do it for us. If the cmpxchg fails, we don't care.
914 */
915 (void)local_cmpxchg(&next_page->write, old_write, val);
916 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
917
918 /*
919 * No need to worry about races with clearing out the commit.
920 * it only can increment when a commit takes place. But that
921 * only happens in the outer most nested commit.
922 */
923 local_set(&next_page->page->commit, 0);
924
925 old_tail = cmpxchg(&cpu_buffer->tail_page,
926 tail_page, next_page);
927
928 if (old_tail == tail_page)
929 ret = 1;
930 }
931
932 return ret;
933}
934
935static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
936 struct buffer_page *bpage)
937{
938 unsigned long val = (unsigned long)bpage;
939
940 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
941 return 1;
942
943 return 0;
944}
945
946/**
947 * rb_check_list - make sure a pointer to a list has the last bits zero
948 */
949static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
950 struct list_head *list)
951{
952 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
953 return 1;
954 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
955 return 1;
956 return 0;
957}
958
492/** 959/**
493 * check_pages - integrity check of buffer pages 960 * check_pages - integrity check of buffer pages
494 * @cpu_buffer: CPU buffer with pages to test 961 * @cpu_buffer: CPU buffer with pages to test
@@ -498,14 +965,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
498 */ 965 */
499static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 966static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
500{ 967{
501 struct list_head *head = &cpu_buffer->pages; 968 struct list_head *head = cpu_buffer->pages;
502 struct buffer_page *bpage, *tmp; 969 struct buffer_page *bpage, *tmp;
503 970
971 rb_head_page_deactivate(cpu_buffer);
972
504 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 973 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
505 return -1; 974 return -1;
506 if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) 975 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
507 return -1; 976 return -1;
508 977
978 if (rb_check_list(cpu_buffer, head))
979 return -1;
980
509 list_for_each_entry_safe(bpage, tmp, head, list) { 981 list_for_each_entry_safe(bpage, tmp, head, list) {
510 if (RB_WARN_ON(cpu_buffer, 982 if (RB_WARN_ON(cpu_buffer,
511 bpage->list.next->prev != &bpage->list)) 983 bpage->list.next->prev != &bpage->list))
@@ -513,25 +985,33 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
513 if (RB_WARN_ON(cpu_buffer, 985 if (RB_WARN_ON(cpu_buffer,
514 bpage->list.prev->next != &bpage->list)) 986 bpage->list.prev->next != &bpage->list))
515 return -1; 987 return -1;
988 if (rb_check_list(cpu_buffer, &bpage->list))
989 return -1;
516 } 990 }
517 991
992 rb_head_page_activate(cpu_buffer);
993
518 return 0; 994 return 0;
519} 995}
520 996
521static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 997static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
522 unsigned nr_pages) 998 unsigned nr_pages)
523{ 999{
524 struct list_head *head = &cpu_buffer->pages;
525 struct buffer_page *bpage, *tmp; 1000 struct buffer_page *bpage, *tmp;
526 unsigned long addr; 1001 unsigned long addr;
527 LIST_HEAD(pages); 1002 LIST_HEAD(pages);
528 unsigned i; 1003 unsigned i;
529 1004
1005 WARN_ON(!nr_pages);
1006
530 for (i = 0; i < nr_pages; i++) { 1007 for (i = 0; i < nr_pages; i++) {
531 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1008 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
532 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 1009 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
533 if (!bpage) 1010 if (!bpage)
534 goto free_pages; 1011 goto free_pages;
1012
1013 rb_check_bpage(cpu_buffer, bpage);
1014
535 list_add(&bpage->list, &pages); 1015 list_add(&bpage->list, &pages);
536 1016
537 addr = __get_free_page(GFP_KERNEL); 1017 addr = __get_free_page(GFP_KERNEL);
@@ -541,7 +1021,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
541 rb_init_page(bpage->page); 1021 rb_init_page(bpage->page);
542 } 1022 }
543 1023
544 list_splice(&pages, head); 1024 /*
1025 * The ring buffer page list is a circular list that does not
1026 * start and end with a list head. All page list items point to
1027 * other pages.
1028 */
1029 cpu_buffer->pages = pages.next;
1030 list_del(&pages);
545 1031
546 rb_check_pages(cpu_buffer); 1032 rb_check_pages(cpu_buffer);
547 1033
@@ -572,14 +1058,15 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
572 cpu_buffer->buffer = buffer; 1058 cpu_buffer->buffer = buffer;
573 spin_lock_init(&cpu_buffer->reader_lock); 1059 spin_lock_init(&cpu_buffer->reader_lock);
574 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1060 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
575 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1061 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
576 INIT_LIST_HEAD(&cpu_buffer->pages);
577 1062
578 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1063 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
579 GFP_KERNEL, cpu_to_node(cpu)); 1064 GFP_KERNEL, cpu_to_node(cpu));
580 if (!bpage) 1065 if (!bpage)
581 goto fail_free_buffer; 1066 goto fail_free_buffer;
582 1067
1068 rb_check_bpage(cpu_buffer, bpage);
1069
583 cpu_buffer->reader_page = bpage; 1070 cpu_buffer->reader_page = bpage;
584 addr = __get_free_page(GFP_KERNEL); 1071 addr = __get_free_page(GFP_KERNEL);
585 if (!addr) 1072 if (!addr)
@@ -594,9 +1081,11 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
594 goto fail_free_reader; 1081 goto fail_free_reader;
595 1082
596 cpu_buffer->head_page 1083 cpu_buffer->head_page
597 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1084 = list_entry(cpu_buffer->pages, struct buffer_page, list);
598 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; 1085 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
599 1086
1087 rb_head_page_activate(cpu_buffer);
1088
600 return cpu_buffer; 1089 return cpu_buffer;
601 1090
602 fail_free_reader: 1091 fail_free_reader:
@@ -609,15 +1098,22 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
609 1098
610static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 1099static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
611{ 1100{
612 struct list_head *head = &cpu_buffer->pages; 1101 struct list_head *head = cpu_buffer->pages;
613 struct buffer_page *bpage, *tmp; 1102 struct buffer_page *bpage, *tmp;
614 1103
615 free_buffer_page(cpu_buffer->reader_page); 1104 free_buffer_page(cpu_buffer->reader_page);
616 1105
617 list_for_each_entry_safe(bpage, tmp, head, list) { 1106 rb_head_page_deactivate(cpu_buffer);
618 list_del_init(&bpage->list); 1107
1108 if (head) {
1109 list_for_each_entry_safe(bpage, tmp, head, list) {
1110 list_del_init(&bpage->list);
1111 free_buffer_page(bpage);
1112 }
1113 bpage = list_entry(head, struct buffer_page, list);
619 free_buffer_page(bpage); 1114 free_buffer_page(bpage);
620 } 1115 }
1116
621 kfree(cpu_buffer); 1117 kfree(cpu_buffer);
622} 1118}
623 1119
@@ -735,6 +1231,7 @@ ring_buffer_free(struct ring_buffer *buffer)
735 1231
736 put_online_cpus(); 1232 put_online_cpus();
737 1233
1234 kfree(buffer->buffers);
738 free_cpumask_var(buffer->cpumask); 1235 free_cpumask_var(buffer->cpumask);
739 1236
740 kfree(buffer); 1237 kfree(buffer);
@@ -756,26 +1253,25 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
756 struct list_head *p; 1253 struct list_head *p;
757 unsigned i; 1254 unsigned i;
758 1255
759 atomic_inc(&cpu_buffer->record_disabled); 1256 spin_lock_irq(&cpu_buffer->reader_lock);
760 synchronize_sched(); 1257 rb_head_page_deactivate(cpu_buffer);
761 1258
762 for (i = 0; i < nr_pages; i++) { 1259 for (i = 0; i < nr_pages; i++) {
763 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1260 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
764 return; 1261 goto out;
765 p = cpu_buffer->pages.next; 1262 p = cpu_buffer->pages->next;
766 bpage = list_entry(p, struct buffer_page, list); 1263 bpage = list_entry(p, struct buffer_page, list);
767 list_del_init(&bpage->list); 1264 list_del_init(&bpage->list);
768 free_buffer_page(bpage); 1265 free_buffer_page(bpage);
769 } 1266 }
770 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1267 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
771 return; 1268 goto out;
772 1269
773 rb_reset_cpu(cpu_buffer); 1270 rb_reset_cpu(cpu_buffer);
774
775 rb_check_pages(cpu_buffer); 1271 rb_check_pages(cpu_buffer);
776 1272
777 atomic_dec(&cpu_buffer->record_disabled); 1273out:
778 1274 spin_unlock_irq(&cpu_buffer->reader_lock);
779} 1275}
780 1276
781static void 1277static void
@@ -786,22 +1282,22 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
786 struct list_head *p; 1282 struct list_head *p;
787 unsigned i; 1283 unsigned i;
788 1284
789 atomic_inc(&cpu_buffer->record_disabled); 1285 spin_lock_irq(&cpu_buffer->reader_lock);
790 synchronize_sched(); 1286 rb_head_page_deactivate(cpu_buffer);
791 1287
792 for (i = 0; i < nr_pages; i++) { 1288 for (i = 0; i < nr_pages; i++) {
793 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1289 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
794 return; 1290 goto out;
795 p = pages->next; 1291 p = pages->next;
796 bpage = list_entry(p, struct buffer_page, list); 1292 bpage = list_entry(p, struct buffer_page, list);
797 list_del_init(&bpage->list); 1293 list_del_init(&bpage->list);
798 list_add_tail(&bpage->list, &cpu_buffer->pages); 1294 list_add_tail(&bpage->list, cpu_buffer->pages);
799 } 1295 }
800 rb_reset_cpu(cpu_buffer); 1296 rb_reset_cpu(cpu_buffer);
801
802 rb_check_pages(cpu_buffer); 1297 rb_check_pages(cpu_buffer);
803 1298
804 atomic_dec(&cpu_buffer->record_disabled); 1299out:
1300 spin_unlock_irq(&cpu_buffer->reader_lock);
805} 1301}
806 1302
807/** 1303/**
@@ -809,11 +1305,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
809 * @buffer: the buffer to resize. 1305 * @buffer: the buffer to resize.
810 * @size: the new size. 1306 * @size: the new size.
811 * 1307 *
812 * The tracer is responsible for making sure that the buffer is
813 * not being used while changing the size.
814 * Note: We may be able to change the above requirement by using
815 * RCU synchronizations.
816 *
817 * Minimum size is 2 * BUF_PAGE_SIZE. 1308 * Minimum size is 2 * BUF_PAGE_SIZE.
818 * 1309 *
819 * Returns -1 on failure. 1310 * Returns -1 on failure.
@@ -845,6 +1336,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
845 if (size == buffer_size) 1336 if (size == buffer_size)
846 return size; 1337 return size;
847 1338
1339 atomic_inc(&buffer->record_disabled);
1340
1341 /* Make sure all writers are done with this buffer. */
1342 synchronize_sched();
1343
848 mutex_lock(&buffer->mutex); 1344 mutex_lock(&buffer->mutex);
849 get_online_cpus(); 1345 get_online_cpus();
850 1346
@@ -907,6 +1403,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
907 put_online_cpus(); 1403 put_online_cpus();
908 mutex_unlock(&buffer->mutex); 1404 mutex_unlock(&buffer->mutex);
909 1405
1406 atomic_dec(&buffer->record_disabled);
1407
910 return size; 1408 return size;
911 1409
912 free_pages: 1410 free_pages:
@@ -916,6 +1414,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
916 } 1414 }
917 put_online_cpus(); 1415 put_online_cpus();
918 mutex_unlock(&buffer->mutex); 1416 mutex_unlock(&buffer->mutex);
1417 atomic_dec(&buffer->record_disabled);
919 return -ENOMEM; 1418 return -ENOMEM;
920 1419
921 /* 1420 /*
@@ -925,6 +1424,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
925 out_fail: 1424 out_fail:
926 put_online_cpus(); 1425 put_online_cpus();
927 mutex_unlock(&buffer->mutex); 1426 mutex_unlock(&buffer->mutex);
1427 atomic_dec(&buffer->record_disabled);
928 return -1; 1428 return -1;
929} 1429}
930EXPORT_SYMBOL_GPL(ring_buffer_resize); 1430EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -948,21 +1448,14 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
948} 1448}
949 1449
950static inline struct ring_buffer_event * 1450static inline struct ring_buffer_event *
951rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
952{
953 return __rb_page_index(cpu_buffer->head_page,
954 cpu_buffer->head_page->read);
955}
956
957static inline struct ring_buffer_event *
958rb_iter_head_event(struct ring_buffer_iter *iter) 1451rb_iter_head_event(struct ring_buffer_iter *iter)
959{ 1452{
960 return __rb_page_index(iter->head_page, iter->head); 1453 return __rb_page_index(iter->head_page, iter->head);
961} 1454}
962 1455
963static inline unsigned rb_page_write(struct buffer_page *bpage) 1456static inline unsigned long rb_page_write(struct buffer_page *bpage)
964{ 1457{
965 return local_read(&bpage->write); 1458 return local_read(&bpage->write) & RB_WRITE_MASK;
966} 1459}
967 1460
968static inline unsigned rb_page_commit(struct buffer_page *bpage) 1461static inline unsigned rb_page_commit(struct buffer_page *bpage)
@@ -970,6 +1463,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage)
970 return local_read(&bpage->page->commit); 1463 return local_read(&bpage->page->commit);
971} 1464}
972 1465
1466static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1467{
1468 return local_read(&bpage->entries) & RB_WRITE_MASK;
1469}
1470
973/* Size is determined by what has been commited */ 1471/* Size is determined by what has been commited */
974static inline unsigned rb_page_size(struct buffer_page *bpage) 1472static inline unsigned rb_page_size(struct buffer_page *bpage)
975{ 1473{
@@ -982,22 +1480,6 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
982 return rb_page_commit(cpu_buffer->commit_page); 1480 return rb_page_commit(cpu_buffer->commit_page);
983} 1481}
984 1482
985static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
986{
987 return rb_page_commit(cpu_buffer->head_page);
988}
989
990static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
991 struct buffer_page **bpage)
992{
993 struct list_head *p = (*bpage)->list.next;
994
995 if (p == &cpu_buffer->pages)
996 p = p->next;
997
998 *bpage = list_entry(p, struct buffer_page, list);
999}
1000
1001static inline unsigned 1483static inline unsigned
1002rb_event_index(struct ring_buffer_event *event) 1484rb_event_index(struct ring_buffer_event *event)
1003{ 1485{
@@ -1023,6 +1505,8 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1023static void 1505static void
1024rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1506rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1025{ 1507{
1508 unsigned long max_count;
1509
1026 /* 1510 /*
1027 * We only race with interrupts and NMIs on this CPU. 1511 * We only race with interrupts and NMIs on this CPU.
1028 * If we own the commit event, then we can commit 1512 * If we own the commit event, then we can commit
@@ -1032,9 +1516,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1032 * assign the commit to the tail. 1516 * assign the commit to the tail.
1033 */ 1517 */
1034 again: 1518 again:
1519 max_count = cpu_buffer->buffer->pages * 100;
1520
1035 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1521 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1036 cpu_buffer->commit_page->page->commit = 1522 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1037 cpu_buffer->commit_page->write; 1523 return;
1524 if (RB_WARN_ON(cpu_buffer,
1525 rb_is_reader_page(cpu_buffer->tail_page)))
1526 return;
1527 local_set(&cpu_buffer->commit_page->page->commit,
1528 rb_page_write(cpu_buffer->commit_page));
1038 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 1529 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1039 cpu_buffer->write_stamp = 1530 cpu_buffer->write_stamp =
1040 cpu_buffer->commit_page->page->time_stamp; 1531 cpu_buffer->commit_page->page->time_stamp;
@@ -1043,8 +1534,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1043 } 1534 }
1044 while (rb_commit_index(cpu_buffer) != 1535 while (rb_commit_index(cpu_buffer) !=
1045 rb_page_write(cpu_buffer->commit_page)) { 1536 rb_page_write(cpu_buffer->commit_page)) {
1046 cpu_buffer->commit_page->page->commit = 1537
1047 cpu_buffer->commit_page->write; 1538 local_set(&cpu_buffer->commit_page->page->commit,
1539 rb_page_write(cpu_buffer->commit_page));
1540 RB_WARN_ON(cpu_buffer,
1541 local_read(&cpu_buffer->commit_page->page->commit) &
1542 ~RB_WRITE_MASK);
1048 barrier(); 1543 barrier();
1049 } 1544 }
1050 1545
@@ -1077,7 +1572,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1077 * to the head page instead of next. 1572 * to the head page instead of next.
1078 */ 1573 */
1079 if (iter->head_page == cpu_buffer->reader_page) 1574 if (iter->head_page == cpu_buffer->reader_page)
1080 iter->head_page = cpu_buffer->head_page; 1575 iter->head_page = rb_set_head_page(cpu_buffer);
1081 else 1576 else
1082 rb_inc_page(cpu_buffer, &iter->head_page); 1577 rb_inc_page(cpu_buffer, &iter->head_page);
1083 1578
@@ -1085,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1085 iter->head = 0; 1580 iter->head = 0;
1086} 1581}
1087 1582
1583/* Slow path, do not inline */
1584static noinline struct ring_buffer_event *
1585rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1586{
1587 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1588
1589 /* Not the first event on the page? */
1590 if (rb_event_index(event)) {
1591 event->time_delta = delta & TS_MASK;
1592 event->array[0] = delta >> TS_SHIFT;
1593 } else {
1594 /* nope, just zero it */
1595 event->time_delta = 0;
1596 event->array[0] = 0;
1597 }
1598
1599 return skip_time_extend(event);
1600}
1601
1088/** 1602/**
1089 * ring_buffer_update_event - update event type and data 1603 * ring_buffer_update_event - update event type and data
1090 * @event: the even to update 1604 * @event: the even to update
@@ -1097,28 +1611,188 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1097 * data field. 1611 * data field.
1098 */ 1612 */
1099static void 1613static void
1100rb_update_event(struct ring_buffer_event *event, 1614rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1101 unsigned type, unsigned length) 1615 struct ring_buffer_event *event, unsigned length,
1616 int add_timestamp, u64 delta)
1102{ 1617{
1103 event->type_len = type; 1618 /* Only a commit updates the timestamp */
1619 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1620 delta = 0;
1621
1622 /*
1623 * If we need to add a timestamp, then we
1624 * add it to the start of the resevered space.
1625 */
1626 if (unlikely(add_timestamp)) {
1627 event = rb_add_time_stamp(event, delta);
1628 length -= RB_LEN_TIME_EXTEND;
1629 delta = 0;
1630 }
1631
1632 event->time_delta = delta;
1633 length -= RB_EVNT_HDR_SIZE;
1634 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1635 event->type_len = 0;
1636 event->array[0] = length;
1637 } else
1638 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1639}
1640
1641/*
1642 * rb_handle_head_page - writer hit the head page
1643 *
1644 * Returns: +1 to retry page
1645 * 0 to continue
1646 * -1 on error
1647 */
1648static int
1649rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1650 struct buffer_page *tail_page,
1651 struct buffer_page *next_page)
1652{
1653 struct buffer_page *new_head;
1654 int entries;
1655 int type;
1656 int ret;
1657
1658 entries = rb_page_entries(next_page);
1659
1660 /*
1661 * The hard part is here. We need to move the head
1662 * forward, and protect against both readers on
1663 * other CPUs and writers coming in via interrupts.
1664 */
1665 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1666 RB_PAGE_HEAD);
1667
1668 /*
1669 * type can be one of four:
1670 * NORMAL - an interrupt already moved it for us
1671 * HEAD - we are the first to get here.
1672 * UPDATE - we are the interrupt interrupting
1673 * a current move.
1674 * MOVED - a reader on another CPU moved the next
1675 * pointer to its reader page. Give up
1676 * and try again.
1677 */
1104 1678
1105 switch (type) { 1679 switch (type) {
1680 case RB_PAGE_HEAD:
1681 /*
1682 * We changed the head to UPDATE, thus
1683 * it is our responsibility to update
1684 * the counters.
1685 */
1686 local_add(entries, &cpu_buffer->overrun);
1106 1687
1107 case RINGBUF_TYPE_PADDING: 1688 /*
1108 case RINGBUF_TYPE_TIME_EXTEND: 1689 * The entries will be zeroed out when we move the
1109 case RINGBUF_TYPE_TIME_STAMP: 1690 * tail page.
1691 */
1692
1693 /* still more to do */
1110 break; 1694 break;
1111 1695
1112 case 0: 1696 case RB_PAGE_UPDATE:
1113 length -= RB_EVNT_HDR_SIZE; 1697 /*
1114 if (length > RB_MAX_SMALL_DATA) 1698 * This is an interrupt that interrupt the
1115 event->array[0] = length; 1699 * previous update. Still more to do.
1116 else 1700 */
1117 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1118 break; 1701 break;
1702 case RB_PAGE_NORMAL:
1703 /*
1704 * An interrupt came in before the update
1705 * and processed this for us.
1706 * Nothing left to do.
1707 */
1708 return 1;
1709 case RB_PAGE_MOVED:
1710 /*
1711 * The reader is on another CPU and just did
1712 * a swap with our next_page.
1713 * Try again.
1714 */
1715 return 1;
1119 default: 1716 default:
1120 BUG(); 1717 RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
1718 return -1;
1719 }
1720
1721 /*
1722 * Now that we are here, the old head pointer is
1723 * set to UPDATE. This will keep the reader from
1724 * swapping the head page with the reader page.
1725 * The reader (on another CPU) will spin till
1726 * we are finished.
1727 *
1728 * We just need to protect against interrupts
1729 * doing the job. We will set the next pointer
1730 * to HEAD. After that, we set the old pointer
1731 * to NORMAL, but only if it was HEAD before.
1732 * otherwise we are an interrupt, and only
1733 * want the outer most commit to reset it.
1734 */
1735 new_head = next_page;
1736 rb_inc_page(cpu_buffer, &new_head);
1737
1738 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1739 RB_PAGE_NORMAL);
1740
1741 /*
1742 * Valid returns are:
1743 * HEAD - an interrupt came in and already set it.
1744 * NORMAL - One of two things:
1745 * 1) We really set it.
1746 * 2) A bunch of interrupts came in and moved
1747 * the page forward again.
1748 */
1749 switch (ret) {
1750 case RB_PAGE_HEAD:
1751 case RB_PAGE_NORMAL:
1752 /* OK */
1753 break;
1754 default:
1755 RB_WARN_ON(cpu_buffer, 1);
1756 return -1;
1121 } 1757 }
1758
1759 /*
1760 * It is possible that an interrupt came in,
1761 * set the head up, then more interrupts came in
1762 * and moved it again. When we get back here,
1763 * the page would have been set to NORMAL but we
1764 * just set it back to HEAD.
1765 *
1766 * How do you detect this? Well, if that happened
1767 * the tail page would have moved.
1768 */
1769 if (ret == RB_PAGE_NORMAL) {
1770 /*
1771 * If the tail had moved passed next, then we need
1772 * to reset the pointer.
1773 */
1774 if (cpu_buffer->tail_page != tail_page &&
1775 cpu_buffer->tail_page != next_page)
1776 rb_head_page_set_normal(cpu_buffer, new_head,
1777 next_page,
1778 RB_PAGE_HEAD);
1779 }
1780
1781 /*
1782 * If this was the outer most commit (the one that
1783 * changed the original pointer from HEAD to UPDATE),
1784 * then it is up to us to reset it to NORMAL.
1785 */
1786 if (type == RB_PAGE_HEAD) {
1787 ret = rb_head_page_set_normal(cpu_buffer, next_page,
1788 tail_page,
1789 RB_PAGE_UPDATE);
1790 if (RB_WARN_ON(cpu_buffer,
1791 ret != RB_PAGE_UPDATE))
1792 return -1;
1793 }
1794
1795 return 0;
1122} 1796}
1123 1797
1124static unsigned rb_calculate_event_length(unsigned length) 1798static unsigned rb_calculate_event_length(unsigned length)
@@ -1129,11 +1803,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1129 if (!length) 1803 if (!length)
1130 length = 1; 1804 length = 1;
1131 1805
1132 if (length > RB_MAX_SMALL_DATA) 1806 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1133 length += sizeof(event.array[0]); 1807 length += sizeof(event.array[0]);
1134 1808
1135 length += RB_EVNT_HDR_SIZE; 1809 length += RB_EVNT_HDR_SIZE;
1136 length = ALIGN(length, RB_ALIGNMENT); 1810 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1137 1811
1138 return length; 1812 return length;
1139} 1813}
@@ -1150,6 +1824,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1150 * must fill the old tail_page with padding. 1824 * must fill the old tail_page with padding.
1151 */ 1825 */
1152 if (tail >= BUF_PAGE_SIZE) { 1826 if (tail >= BUF_PAGE_SIZE) {
1827 /*
1828 * If the page was filled, then we still need
1829 * to update the real_end. Reset it to zero
1830 * and the reader will ignore it.
1831 */
1832 if (tail == BUF_PAGE_SIZE)
1833 tail_page->real_end = 0;
1834
1153 local_sub(length, &tail_page->write); 1835 local_sub(length, &tail_page->write);
1154 return; 1836 return;
1155 } 1837 }
@@ -1158,6 +1840,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1158 kmemcheck_annotate_bitfield(event, bitfield); 1840 kmemcheck_annotate_bitfield(event, bitfield);
1159 1841
1160 /* 1842 /*
1843 * Save the original length to the meta data.
1844 * This will be used by the reader to add lost event
1845 * counter.
1846 */
1847 tail_page->real_end = tail;
1848
1849 /*
1161 * If this event is bigger than the minimum size, then 1850 * If this event is bigger than the minimum size, then
1162 * we need to be careful that we don't subtract the 1851 * we need to be careful that we don't subtract the
1163 * write counter enough to allow another writer to slip 1852 * write counter enough to allow another writer to slip
@@ -1184,111 +1873,108 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1184 event->type_len = RINGBUF_TYPE_PADDING; 1873 event->type_len = RINGBUF_TYPE_PADDING;
1185 /* time delta must be non zero */ 1874 /* time delta must be non zero */
1186 event->time_delta = 1; 1875 event->time_delta = 1;
1187 /* Account for this as an entry */
1188 local_inc(&tail_page->entries);
1189 local_inc(&cpu_buffer->entries);
1190 1876
1191 /* Set write to end of buffer */ 1877 /* Set write to end of buffer */
1192 length = (tail + length) - BUF_PAGE_SIZE; 1878 length = (tail + length) - BUF_PAGE_SIZE;
1193 local_sub(length, &tail_page->write); 1879 local_sub(length, &tail_page->write);
1194} 1880}
1195 1881
1196static struct ring_buffer_event * 1882/*
1883 * This is the slow path, force gcc not to inline it.
1884 */
1885static noinline struct ring_buffer_event *
1197rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1886rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1198 unsigned long length, unsigned long tail, 1887 unsigned long length, unsigned long tail,
1199 struct buffer_page *commit_page, 1888 struct buffer_page *tail_page, u64 ts)
1200 struct buffer_page *tail_page, u64 *ts)
1201{ 1889{
1202 struct buffer_page *next_page, *head_page, *reader_page; 1890 struct buffer_page *commit_page = cpu_buffer->commit_page;
1203 struct ring_buffer *buffer = cpu_buffer->buffer; 1891 struct ring_buffer *buffer = cpu_buffer->buffer;
1204 bool lock_taken = false; 1892 struct buffer_page *next_page;
1205 unsigned long flags; 1893 int ret;
1206 1894
1207 next_page = tail_page; 1895 next_page = tail_page;
1208 1896
1209 local_irq_save(flags);
1210 /*
1211 * Since the write to the buffer is still not
1212 * fully lockless, we must be careful with NMIs.
1213 * The locks in the writers are taken when a write
1214 * crosses to a new page. The locks protect against
1215 * races with the readers (this will soon be fixed
1216 * with a lockless solution).
1217 *
1218 * Because we can not protect against NMIs, and we
1219 * want to keep traces reentrant, we need to manage
1220 * what happens when we are in an NMI.
1221 *
1222 * NMIs can happen after we take the lock.
1223 * If we are in an NMI, only take the lock
1224 * if it is not already taken. Otherwise
1225 * simply fail.
1226 */
1227 if (unlikely(in_nmi())) {
1228 if (!__raw_spin_trylock(&cpu_buffer->lock)) {
1229 cpu_buffer->nmi_dropped++;
1230 goto out_reset;
1231 }
1232 } else
1233 __raw_spin_lock(&cpu_buffer->lock);
1234
1235 lock_taken = true;
1236
1237 rb_inc_page(cpu_buffer, &next_page); 1897 rb_inc_page(cpu_buffer, &next_page);
1238 1898
1239 head_page = cpu_buffer->head_page;
1240 reader_page = cpu_buffer->reader_page;
1241
1242 /* we grabbed the lock before incrementing */
1243 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1244 goto out_reset;
1245
1246 /* 1899 /*
1247 * If for some reason, we had an interrupt storm that made 1900 * If for some reason, we had an interrupt storm that made
1248 * it all the way around the buffer, bail, and warn 1901 * it all the way around the buffer, bail, and warn
1249 * about it. 1902 * about it.
1250 */ 1903 */
1251 if (unlikely(next_page == commit_page)) { 1904 if (unlikely(next_page == commit_page)) {
1252 cpu_buffer->commit_overrun++; 1905 local_inc(&cpu_buffer->commit_overrun);
1253 goto out_reset; 1906 goto out_reset;
1254 } 1907 }
1255 1908
1256 if (next_page == head_page) { 1909 /*
1257 if (!(buffer->flags & RB_FL_OVERWRITE)) 1910 * This is where the fun begins!
1258 goto out_reset; 1911 *
1259 1912 * We are fighting against races between a reader that
1260 /* tail_page has not moved yet? */ 1913 * could be on another CPU trying to swap its reader
1261 if (tail_page == cpu_buffer->tail_page) { 1914 * page with the buffer head.
1262 /* count overflows */ 1915 *
1263 cpu_buffer->overrun += 1916 * We are also fighting against interrupts coming in and
1264 local_read(&head_page->entries); 1917 * moving the head or tail on us as well.
1918 *
1919 * If the next page is the head page then we have filled
1920 * the buffer, unless the commit page is still on the
1921 * reader page.
1922 */
1923 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
1265 1924
1266 rb_inc_page(cpu_buffer, &head_page); 1925 /*
1267 cpu_buffer->head_page = head_page; 1926 * If the commit is not on the reader page, then
1268 cpu_buffer->head_page->read = 0; 1927 * move the header page.
1928 */
1929 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
1930 /*
1931 * If we are not in overwrite mode,
1932 * this is easy, just stop here.
1933 */
1934 if (!(buffer->flags & RB_FL_OVERWRITE))
1935 goto out_reset;
1936
1937 ret = rb_handle_head_page(cpu_buffer,
1938 tail_page,
1939 next_page);
1940 if (ret < 0)
1941 goto out_reset;
1942 if (ret)
1943 goto out_again;
1944 } else {
1945 /*
1946 * We need to be careful here too. The
1947 * commit page could still be on the reader
1948 * page. We could have a small buffer, and
1949 * have filled up the buffer with events
1950 * from interrupts and such, and wrapped.
1951 *
1952 * Note, if the tail page is also the on the
1953 * reader_page, we let it move out.
1954 */
1955 if (unlikely((cpu_buffer->commit_page !=
1956 cpu_buffer->tail_page) &&
1957 (cpu_buffer->commit_page ==
1958 cpu_buffer->reader_page))) {
1959 local_inc(&cpu_buffer->commit_overrun);
1960 goto out_reset;
1961 }
1269 } 1962 }
1270 } 1963 }
1271 1964
1272 /* 1965 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
1273 * If the tail page is still the same as what we think 1966 if (ret) {
1274 * it is, then it is up to us to update the tail 1967 /*
1275 * pointer. 1968 * Nested commits always have zero deltas, so
1276 */ 1969 * just reread the time stamp
1277 if (tail_page == cpu_buffer->tail_page) { 1970 */
1278 local_set(&next_page->write, 0); 1971 ts = rb_time_stamp(buffer);
1279 local_set(&next_page->entries, 0); 1972 next_page->page->time_stamp = ts;
1280 local_set(&next_page->page->commit, 0);
1281 cpu_buffer->tail_page = next_page;
1282
1283 /* reread the time stamp */
1284 *ts = rb_time_stamp(buffer, cpu_buffer->cpu);
1285 cpu_buffer->tail_page->page->time_stamp = *ts;
1286 } 1973 }
1287 1974
1288 rb_reset_tail(cpu_buffer, tail_page, tail, length); 1975 out_again:
1289 1976
1290 __raw_spin_unlock(&cpu_buffer->lock); 1977 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1291 local_irq_restore(flags);
1292 1978
1293 /* fail and let the caller try again */ 1979 /* fail and let the caller try again */
1294 return ERR_PTR(-EAGAIN); 1980 return ERR_PTR(-EAGAIN);
@@ -1297,48 +1983,52 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1297 /* reset write */ 1983 /* reset write */
1298 rb_reset_tail(cpu_buffer, tail_page, tail, length); 1984 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1299 1985
1300 if (likely(lock_taken))
1301 __raw_spin_unlock(&cpu_buffer->lock);
1302 local_irq_restore(flags);
1303 return NULL; 1986 return NULL;
1304} 1987}
1305 1988
1306static struct ring_buffer_event * 1989static struct ring_buffer_event *
1307__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1990__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1308 unsigned type, unsigned long length, u64 *ts) 1991 unsigned long length, u64 ts,
1992 u64 delta, int add_timestamp)
1309{ 1993{
1310 struct buffer_page *tail_page, *commit_page; 1994 struct buffer_page *tail_page;
1311 struct ring_buffer_event *event; 1995 struct ring_buffer_event *event;
1312 unsigned long tail, write; 1996 unsigned long tail, write;
1313 1997
1314 commit_page = cpu_buffer->commit_page; 1998 /*
1315 /* we just need to protect against interrupts */ 1999 * If the time delta since the last event is too big to
1316 barrier(); 2000 * hold in the time field of the event, then we append a
2001 * TIME EXTEND event ahead of the data event.
2002 */
2003 if (unlikely(add_timestamp))
2004 length += RB_LEN_TIME_EXTEND;
2005
1317 tail_page = cpu_buffer->tail_page; 2006 tail_page = cpu_buffer->tail_page;
1318 write = local_add_return(length, &tail_page->write); 2007 write = local_add_return(length, &tail_page->write);
2008
2009 /* set write to only the index of the write */
2010 write &= RB_WRITE_MASK;
1319 tail = write - length; 2011 tail = write - length;
1320 2012
1321 /* See if we shot pass the end of this buffer page */ 2013 /* See if we shot pass the end of this buffer page */
1322 if (write > BUF_PAGE_SIZE) 2014 if (unlikely(write > BUF_PAGE_SIZE))
1323 return rb_move_tail(cpu_buffer, length, tail, 2015 return rb_move_tail(cpu_buffer, length, tail,
1324 commit_page, tail_page, ts); 2016 tail_page, ts);
1325 2017
1326 /* We reserved something on the buffer */ 2018 /* We reserved something on the buffer */
1327 2019
1328 event = __rb_page_index(tail_page, tail); 2020 event = __rb_page_index(tail_page, tail);
1329 kmemcheck_annotate_bitfield(event, bitfield); 2021 kmemcheck_annotate_bitfield(event, bitfield);
1330 rb_update_event(event, type, length); 2022 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
1331 2023
1332 /* The passed in type is zero for DATA */ 2024 local_inc(&tail_page->entries);
1333 if (likely(!type))
1334 local_inc(&tail_page->entries);
1335 2025
1336 /* 2026 /*
1337 * If this is the first commit on the page, then update 2027 * If this is the first commit on the page, then update
1338 * its timestamp. 2028 * its timestamp.
1339 */ 2029 */
1340 if (!tail) 2030 if (!tail)
1341 tail_page->page->time_stamp = *ts; 2031 tail_page->page->time_stamp = ts;
1342 2032
1343 return event; 2033 return event;
1344} 2034}
@@ -1353,19 +2043,23 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1353 unsigned long addr; 2043 unsigned long addr;
1354 2044
1355 new_index = rb_event_index(event); 2045 new_index = rb_event_index(event);
1356 old_index = new_index + rb_event_length(event); 2046 old_index = new_index + rb_event_ts_length(event);
1357 addr = (unsigned long)event; 2047 addr = (unsigned long)event;
1358 addr &= PAGE_MASK; 2048 addr &= PAGE_MASK;
1359 2049
1360 bpage = cpu_buffer->tail_page; 2050 bpage = cpu_buffer->tail_page;
1361 2051
1362 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { 2052 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2053 unsigned long write_mask =
2054 local_read(&bpage->write) & ~RB_WRITE_MASK;
1363 /* 2055 /*
1364 * This is on the tail page. It is possible that 2056 * This is on the tail page. It is possible that
1365 * a write could come in and move the tail page 2057 * a write could come in and move the tail page
1366 * and write to the next page. That is fine 2058 * and write to the next page. That is fine
1367 * because we just shorten what is on this page. 2059 * because we just shorten what is on this page.
1368 */ 2060 */
2061 old_index += write_mask;
2062 new_index += write_mask;
1369 index = local_cmpxchg(&bpage->write, old_index, new_index); 2063 index = local_cmpxchg(&bpage->write, old_index, new_index);
1370 if (index == old_index) 2064 if (index == old_index)
1371 return 1; 2065 return 1;
@@ -1375,80 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1375 return 0; 2069 return 0;
1376} 2070}
1377 2071
1378static int
1379rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1380 u64 *ts, u64 *delta)
1381{
1382 struct ring_buffer_event *event;
1383 static int once;
1384 int ret;
1385
1386 if (unlikely(*delta > (1ULL << 59) && !once++)) {
1387 printk(KERN_WARNING "Delta way too big! %llu"
1388 " ts=%llu write stamp = %llu\n",
1389 (unsigned long long)*delta,
1390 (unsigned long long)*ts,
1391 (unsigned long long)cpu_buffer->write_stamp);
1392 WARN_ON(1);
1393 }
1394
1395 /*
1396 * The delta is too big, we to add a
1397 * new timestamp.
1398 */
1399 event = __rb_reserve_next(cpu_buffer,
1400 RINGBUF_TYPE_TIME_EXTEND,
1401 RB_LEN_TIME_EXTEND,
1402 ts);
1403 if (!event)
1404 return -EBUSY;
1405
1406 if (PTR_ERR(event) == -EAGAIN)
1407 return -EAGAIN;
1408
1409 /* Only a commited time event can update the write stamp */
1410 if (rb_event_is_commit(cpu_buffer, event)) {
1411 /*
1412 * If this is the first on the page, then it was
1413 * updated with the page itself. Try to discard it
1414 * and if we can't just make it zero.
1415 */
1416 if (rb_event_index(event)) {
1417 event->time_delta = *delta & TS_MASK;
1418 event->array[0] = *delta >> TS_SHIFT;
1419 } else {
1420 /* try to discard, since we do not need this */
1421 if (!rb_try_to_discard(cpu_buffer, event)) {
1422 /* nope, just zero it */
1423 event->time_delta = 0;
1424 event->array[0] = 0;
1425 }
1426 }
1427 cpu_buffer->write_stamp = *ts;
1428 /* let the caller know this was the commit */
1429 ret = 1;
1430 } else {
1431 /* Try to discard the event */
1432 if (!rb_try_to_discard(cpu_buffer, event)) {
1433 /* Darn, this is just wasted space */
1434 event->time_delta = 0;
1435 event->array[0] = 0;
1436 }
1437 ret = 0;
1438 }
1439
1440 *delta = 0;
1441
1442 return ret;
1443}
1444
1445static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2072static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
1446{ 2073{
1447 local_inc(&cpu_buffer->committing); 2074 local_inc(&cpu_buffer->committing);
1448 local_inc(&cpu_buffer->commits); 2075 local_inc(&cpu_buffer->commits);
1449} 2076}
1450 2077
1451static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) 2078static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
1452{ 2079{
1453 unsigned long commits; 2080 unsigned long commits;
1454 2081
@@ -1481,18 +2108,38 @@ static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
1481} 2108}
1482 2109
1483static struct ring_buffer_event * 2110static struct ring_buffer_event *
1484rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 2111rb_reserve_next_event(struct ring_buffer *buffer,
2112 struct ring_buffer_per_cpu *cpu_buffer,
1485 unsigned long length) 2113 unsigned long length)
1486{ 2114{
1487 struct ring_buffer_event *event; 2115 struct ring_buffer_event *event;
1488 u64 ts, delta = 0; 2116 u64 ts, delta;
1489 int commit = 0;
1490 int nr_loops = 0; 2117 int nr_loops = 0;
2118 int add_timestamp;
2119 u64 diff;
1491 2120
1492 rb_start_commit(cpu_buffer); 2121 rb_start_commit(cpu_buffer);
1493 2122
2123#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2124 /*
2125 * Due to the ability to swap a cpu buffer from a buffer
2126 * it is possible it was swapped before we committed.
2127 * (committing stops a swap). We check for it here and
2128 * if it happened, we have to fail the write.
2129 */
2130 barrier();
2131 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2132 local_dec(&cpu_buffer->committing);
2133 local_dec(&cpu_buffer->commits);
2134 return NULL;
2135 }
2136#endif
2137
1494 length = rb_calculate_event_length(length); 2138 length = rb_calculate_event_length(length);
1495 again: 2139 again:
2140 add_timestamp = 0;
2141 delta = 0;
2142
1496 /* 2143 /*
1497 * We allow for interrupts to reenter here and do a trace. 2144 * We allow for interrupts to reenter here and do a trace.
1498 * If one does, it will cause this original code to loop 2145 * If one does, it will cause this original code to loop
@@ -1505,57 +2152,33 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1505 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 2152 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1506 goto out_fail; 2153 goto out_fail;
1507 2154
1508 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 2155 ts = rb_time_stamp(cpu_buffer->buffer);
2156 diff = ts - cpu_buffer->write_stamp;
1509 2157
1510 /* 2158 /* make sure this diff is calculated here */
1511 * Only the first commit can update the timestamp. 2159 barrier();
1512 * Yes there is a race here. If an interrupt comes in
1513 * just after the conditional and it traces too, then it
1514 * will also check the deltas. More than one timestamp may
1515 * also be made. But only the entry that did the actual
1516 * commit will be something other than zero.
1517 */
1518 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
1519 rb_page_write(cpu_buffer->tail_page) ==
1520 rb_commit_index(cpu_buffer))) {
1521 u64 diff;
1522
1523 diff = ts - cpu_buffer->write_stamp;
1524
1525 /* make sure this diff is calculated here */
1526 barrier();
1527
1528 /* Did the write stamp get updated already? */
1529 if (unlikely(ts < cpu_buffer->write_stamp))
1530 goto get_event;
1531 2160
2161 /* Did the write stamp get updated already? */
2162 if (likely(ts >= cpu_buffer->write_stamp)) {
1532 delta = diff; 2163 delta = diff;
1533 if (unlikely(test_time_stamp(delta))) { 2164 if (unlikely(test_time_stamp(delta))) {
1534 2165 WARN_ONCE(delta > (1ULL << 59),
1535 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 2166 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
1536 if (commit == -EBUSY) 2167 (unsigned long long)delta,
1537 goto out_fail; 2168 (unsigned long long)ts,
1538 2169 (unsigned long long)cpu_buffer->write_stamp);
1539 if (commit == -EAGAIN) 2170 add_timestamp = 1;
1540 goto again;
1541
1542 RB_WARN_ON(cpu_buffer, commit < 0);
1543 } 2171 }
1544 } 2172 }
1545 2173
1546 get_event: 2174 event = __rb_reserve_next(cpu_buffer, length, ts,
1547 event = __rb_reserve_next(cpu_buffer, 0, length, &ts); 2175 delta, add_timestamp);
1548 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2176 if (unlikely(PTR_ERR(event) == -EAGAIN))
1549 goto again; 2177 goto again;
1550 2178
1551 if (!event) 2179 if (!event)
1552 goto out_fail; 2180 goto out_fail;
1553 2181
1554 if (!rb_event_is_commit(cpu_buffer, event))
1555 delta = 0;
1556
1557 event->time_delta = delta;
1558
1559 return event; 2182 return event;
1560 2183
1561 out_fail: 2184 out_fail:
@@ -1563,15 +2186,13 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1563 return NULL; 2186 return NULL;
1564} 2187}
1565 2188
2189#ifdef CONFIG_TRACING
2190
1566#define TRACE_RECURSIVE_DEPTH 16 2191#define TRACE_RECURSIVE_DEPTH 16
1567 2192
1568static int trace_recursive_lock(void) 2193/* Keep this code out of the fast path cache */
2194static noinline void trace_recursive_fail(void)
1569{ 2195{
1570 current->trace_recursion++;
1571
1572 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
1573 return 0;
1574
1575 /* Disable all tracing before we do anything else */ 2196 /* Disable all tracing before we do anything else */
1576 tracing_off_permanent(); 2197 tracing_off_permanent();
1577 2198
@@ -1583,17 +2204,33 @@ static int trace_recursive_lock(void)
1583 in_nmi()); 2204 in_nmi());
1584 2205
1585 WARN_ON_ONCE(1); 2206 WARN_ON_ONCE(1);
2207}
2208
2209static inline int trace_recursive_lock(void)
2210{
2211 current->trace_recursion++;
2212
2213 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2214 return 0;
2215
2216 trace_recursive_fail();
2217
1586 return -1; 2218 return -1;
1587} 2219}
1588 2220
1589static void trace_recursive_unlock(void) 2221static inline void trace_recursive_unlock(void)
1590{ 2222{
1591 WARN_ON_ONCE(!current->trace_recursion); 2223 WARN_ON_ONCE(!current->trace_recursion);
1592 2224
1593 current->trace_recursion--; 2225 current->trace_recursion--;
1594} 2226}
1595 2227
1596static DEFINE_PER_CPU(int, rb_need_resched); 2228#else
2229
2230#define trace_recursive_lock() (0)
2231#define trace_recursive_unlock() do { } while (0)
2232
2233#endif
1597 2234
1598/** 2235/**
1599 * ring_buffer_lock_reserve - reserve a part of the buffer 2236 * ring_buffer_lock_reserve - reserve a part of the buffer
@@ -1615,16 +2252,16 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1615{ 2252{
1616 struct ring_buffer_per_cpu *cpu_buffer; 2253 struct ring_buffer_per_cpu *cpu_buffer;
1617 struct ring_buffer_event *event; 2254 struct ring_buffer_event *event;
1618 int cpu, resched; 2255 int cpu;
1619 2256
1620 if (ring_buffer_flags != RB_BUFFERS_ON) 2257 if (ring_buffer_flags != RB_BUFFERS_ON)
1621 return NULL; 2258 return NULL;
1622 2259
1623 if (atomic_read(&buffer->record_disabled))
1624 return NULL;
1625
1626 /* If we are tracing schedule, we don't want to recurse */ 2260 /* If we are tracing schedule, we don't want to recurse */
1627 resched = ftrace_preempt_disable(); 2261 preempt_disable_notrace();
2262
2263 if (atomic_read(&buffer->record_disabled))
2264 goto out_nocheck;
1628 2265
1629 if (trace_recursive_lock()) 2266 if (trace_recursive_lock())
1630 goto out_nocheck; 2267 goto out_nocheck;
@@ -1642,41 +2279,54 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1642 if (length > BUF_MAX_DATA_SIZE) 2279 if (length > BUF_MAX_DATA_SIZE)
1643 goto out; 2280 goto out;
1644 2281
1645 event = rb_reserve_next_event(cpu_buffer, length); 2282 event = rb_reserve_next_event(buffer, cpu_buffer, length);
1646 if (!event) 2283 if (!event)
1647 goto out; 2284 goto out;
1648 2285
1649 /*
1650 * Need to store resched state on this cpu.
1651 * Only the first needs to.
1652 */
1653
1654 if (preempt_count() == 1)
1655 per_cpu(rb_need_resched, cpu) = resched;
1656
1657 return event; 2286 return event;
1658 2287
1659 out: 2288 out:
1660 trace_recursive_unlock(); 2289 trace_recursive_unlock();
1661 2290
1662 out_nocheck: 2291 out_nocheck:
1663 ftrace_preempt_enable(resched); 2292 preempt_enable_notrace();
1664 return NULL; 2293 return NULL;
1665} 2294}
1666EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2295EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
1667 2296
1668static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2297static void
2298rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1669 struct ring_buffer_event *event) 2299 struct ring_buffer_event *event)
1670{ 2300{
1671 local_inc(&cpu_buffer->entries); 2301 u64 delta;
1672 2302
1673 /* 2303 /*
1674 * The event first in the commit queue updates the 2304 * The event first in the commit queue updates the
1675 * time stamp. 2305 * time stamp.
1676 */ 2306 */
1677 if (rb_event_is_commit(cpu_buffer, event)) 2307 if (rb_event_is_commit(cpu_buffer, event)) {
1678 cpu_buffer->write_stamp += event->time_delta; 2308 /*
2309 * A commit event that is first on a page
2310 * updates the write timestamp with the page stamp
2311 */
2312 if (!rb_event_index(event))
2313 cpu_buffer->write_stamp =
2314 cpu_buffer->commit_page->page->time_stamp;
2315 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2316 delta = event->array[0];
2317 delta <<= TS_SHIFT;
2318 delta += event->time_delta;
2319 cpu_buffer->write_stamp += delta;
2320 } else
2321 cpu_buffer->write_stamp += event->time_delta;
2322 }
2323}
1679 2324
2325static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2326 struct ring_buffer_event *event)
2327{
2328 local_inc(&cpu_buffer->entries);
2329 rb_update_write_stamp(cpu_buffer, event);
1680 rb_end_commit(cpu_buffer); 2330 rb_end_commit(cpu_buffer);
1681} 2331}
1682 2332
@@ -1701,13 +2351,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1701 2351
1702 trace_recursive_unlock(); 2352 trace_recursive_unlock();
1703 2353
1704 /* 2354 preempt_enable_notrace();
1705 * Only the last preempt count needs to restore preemption.
1706 */
1707 if (preempt_count() == 1)
1708 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1709 else
1710 preempt_enable_no_resched_notrace();
1711 2355
1712 return 0; 2356 return 0;
1713} 2357}
@@ -1715,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
1715 2359
1716static inline void rb_event_discard(struct ring_buffer_event *event) 2360static inline void rb_event_discard(struct ring_buffer_event *event)
1717{ 2361{
2362 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2363 event = skip_time_extend(event);
2364
1718 /* array[0] holds the actual length for the discarded event */ 2365 /* array[0] holds the actual length for the discarded event */
1719 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2366 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
1720 event->type_len = RINGBUF_TYPE_PADDING; 2367 event->type_len = RINGBUF_TYPE_PADDING;
@@ -1723,32 +2370,57 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
1723 event->time_delta = 1; 2370 event->time_delta = 1;
1724} 2371}
1725 2372
1726/** 2373/*
1727 * ring_buffer_event_discard - discard any event in the ring buffer 2374 * Decrement the entries to the page that an event is on.
1728 * @event: the event to discard 2375 * The event does not even need to exist, only the pointer
1729 * 2376 * to the page it is on. This may only be called before the commit
1730 * Sometimes a event that is in the ring buffer needs to be ignored. 2377 * takes place.
1731 * This function lets the user discard an event in the ring buffer
1732 * and then that event will not be read later.
1733 *
1734 * Note, it is up to the user to be careful with this, and protect
1735 * against races. If the user discards an event that has been consumed
1736 * it is possible that it could corrupt the ring buffer.
1737 */ 2378 */
1738void ring_buffer_event_discard(struct ring_buffer_event *event) 2379static inline void
2380rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2381 struct ring_buffer_event *event)
1739{ 2382{
1740 rb_event_discard(event); 2383 unsigned long addr = (unsigned long)event;
2384 struct buffer_page *bpage = cpu_buffer->commit_page;
2385 struct buffer_page *start;
2386
2387 addr &= PAGE_MASK;
2388
2389 /* Do the likely case first */
2390 if (likely(bpage->page == (void *)addr)) {
2391 local_dec(&bpage->entries);
2392 return;
2393 }
2394
2395 /*
2396 * Because the commit page may be on the reader page we
2397 * start with the next page and check the end loop there.
2398 */
2399 rb_inc_page(cpu_buffer, &bpage);
2400 start = bpage;
2401 do {
2402 if (bpage->page == (void *)addr) {
2403 local_dec(&bpage->entries);
2404 return;
2405 }
2406 rb_inc_page(cpu_buffer, &bpage);
2407 } while (bpage != start);
2408
2409 /* commit not part of this buffer?? */
2410 RB_WARN_ON(cpu_buffer, 1);
1741} 2411}
1742EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
1743 2412
1744/** 2413/**
1745 * ring_buffer_commit_discard - discard an event that has not been committed 2414 * ring_buffer_commit_discard - discard an event that has not been committed
1746 * @buffer: the ring buffer 2415 * @buffer: the ring buffer
1747 * @event: non committed event to discard 2416 * @event: non committed event to discard
1748 * 2417 *
1749 * This is similar to ring_buffer_event_discard but must only be 2418 * Sometimes an event that is in the ring buffer needs to be ignored.
1750 * performed on an event that has not been committed yet. The difference 2419 * This function lets the user discard an event in the ring buffer
1751 * is that this will also try to free the event from the ring buffer 2420 * and then that event will not be read later.
2421 *
2422 * This function only works if it is called before the the item has been
2423 * committed. It will try to free the event from the ring buffer
1752 * if another event has not been added behind it. 2424 * if another event has not been added behind it.
1753 * 2425 *
1754 * If another event has been added behind it, it will set the event 2426 * If another event has been added behind it, it will set the event
@@ -1776,26 +2448,21 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1776 */ 2448 */
1777 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); 2449 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1778 2450
1779 if (!rb_try_to_discard(cpu_buffer, event)) 2451 rb_decrement_entry(cpu_buffer, event);
2452 if (rb_try_to_discard(cpu_buffer, event))
1780 goto out; 2453 goto out;
1781 2454
1782 /* 2455 /*
1783 * The commit is still visible by the reader, so we 2456 * The commit is still visible by the reader, so we
1784 * must increment entries. 2457 * must still update the timestamp.
1785 */ 2458 */
1786 local_inc(&cpu_buffer->entries); 2459 rb_update_write_stamp(cpu_buffer, event);
1787 out: 2460 out:
1788 rb_end_commit(cpu_buffer); 2461 rb_end_commit(cpu_buffer);
1789 2462
1790 trace_recursive_unlock(); 2463 trace_recursive_unlock();
1791 2464
1792 /* 2465 preempt_enable_notrace();
1793 * Only the last preempt count needs to restore preemption.
1794 */
1795 if (preempt_count() == 1)
1796 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1797 else
1798 preempt_enable_no_resched_notrace();
1799 2466
1800} 2467}
1801EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); 2468EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -1821,15 +2488,15 @@ int ring_buffer_write(struct ring_buffer *buffer,
1821 struct ring_buffer_event *event; 2488 struct ring_buffer_event *event;
1822 void *body; 2489 void *body;
1823 int ret = -EBUSY; 2490 int ret = -EBUSY;
1824 int cpu, resched; 2491 int cpu;
1825 2492
1826 if (ring_buffer_flags != RB_BUFFERS_ON) 2493 if (ring_buffer_flags != RB_BUFFERS_ON)
1827 return -EBUSY; 2494 return -EBUSY;
1828 2495
1829 if (atomic_read(&buffer->record_disabled)) 2496 preempt_disable_notrace();
1830 return -EBUSY;
1831 2497
1832 resched = ftrace_preempt_disable(); 2498 if (atomic_read(&buffer->record_disabled))
2499 goto out;
1833 2500
1834 cpu = raw_smp_processor_id(); 2501 cpu = raw_smp_processor_id();
1835 2502
@@ -1844,7 +2511,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1844 if (length > BUF_MAX_DATA_SIZE) 2511 if (length > BUF_MAX_DATA_SIZE)
1845 goto out; 2512 goto out;
1846 2513
1847 event = rb_reserve_next_event(cpu_buffer, length); 2514 event = rb_reserve_next_event(buffer, cpu_buffer, length);
1848 if (!event) 2515 if (!event)
1849 goto out; 2516 goto out;
1850 2517
@@ -1856,7 +2523,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1856 2523
1857 ret = 0; 2524 ret = 0;
1858 out: 2525 out:
1859 ftrace_preempt_enable(resched); 2526 preempt_enable_notrace();
1860 2527
1861 return ret; 2528 return ret;
1862} 2529}
@@ -1865,9 +2532,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write);
1865static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 2532static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1866{ 2533{
1867 struct buffer_page *reader = cpu_buffer->reader_page; 2534 struct buffer_page *reader = cpu_buffer->reader_page;
1868 struct buffer_page *head = cpu_buffer->head_page; 2535 struct buffer_page *head = rb_set_head_page(cpu_buffer);
1869 struct buffer_page *commit = cpu_buffer->commit_page; 2536 struct buffer_page *commit = cpu_buffer->commit_page;
1870 2537
2538 /* In case of error, head will be NULL */
2539 if (unlikely(!head))
2540 return 1;
2541
1871 return reader->read == rb_page_commit(reader) && 2542 return reader->read == rb_page_commit(reader) &&
1872 (commit == reader || 2543 (commit == reader ||
1873 (commit == head && 2544 (commit == head &&
@@ -1894,7 +2565,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
1894 * @buffer: The ring buffer to enable writes 2565 * @buffer: The ring buffer to enable writes
1895 * 2566 *
1896 * Note, multiple disables will need the same number of enables 2567 * Note, multiple disables will need the same number of enables
1897 * to truely enable the writing (much like preempt_disable). 2568 * to truly enable the writing (much like preempt_disable).
1898 */ 2569 */
1899void ring_buffer_record_enable(struct ring_buffer *buffer) 2570void ring_buffer_record_enable(struct ring_buffer *buffer)
1900{ 2571{
@@ -1930,7 +2601,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
1930 * @cpu: The CPU to enable. 2601 * @cpu: The CPU to enable.
1931 * 2602 *
1932 * Note, multiple disables will need the same number of enables 2603 * Note, multiple disables will need the same number of enables
1933 * to truely enable the writing (much like preempt_disable). 2604 * to truly enable the writing (much like preempt_disable).
1934 */ 2605 */
1935void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2606void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1936{ 2607{
@@ -1944,6 +2615,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1944} 2615}
1945EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 2616EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
1946 2617
2618/*
2619 * The total entries in the ring buffer is the running counter
2620 * of entries entered into the ring buffer, minus the sum of
2621 * the entries read from the ring buffer and the number of
2622 * entries that were overwritten.
2623 */
2624static inline unsigned long
2625rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2626{
2627 return local_read(&cpu_buffer->entries) -
2628 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2629}
2630
1947/** 2631/**
1948 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2632 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
1949 * @buffer: The ring buffer 2633 * @buffer: The ring buffer
@@ -1952,16 +2636,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
1952unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 2636unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1953{ 2637{
1954 struct ring_buffer_per_cpu *cpu_buffer; 2638 struct ring_buffer_per_cpu *cpu_buffer;
1955 unsigned long ret;
1956 2639
1957 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2640 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1958 return 0; 2641 return 0;
1959 2642
1960 cpu_buffer = buffer->buffers[cpu]; 2643 cpu_buffer = buffer->buffers[cpu];
1961 ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
1962 - cpu_buffer->read;
1963 2644
1964 return ret; 2645 return rb_num_of_entries(cpu_buffer);
1965} 2646}
1966EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2647EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
1967 2648
@@ -1979,33 +2660,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1979 return 0; 2660 return 0;
1980 2661
1981 cpu_buffer = buffer->buffers[cpu]; 2662 cpu_buffer = buffer->buffers[cpu];
1982 ret = cpu_buffer->overrun; 2663 ret = local_read(&cpu_buffer->overrun);
1983 2664
1984 return ret; 2665 return ret;
1985} 2666}
1986EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 2667EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1987 2668
1988/** 2669/**
1989 * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
1990 * @buffer: The ring buffer
1991 * @cpu: The per CPU buffer to get the number of overruns from
1992 */
1993unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
1994{
1995 struct ring_buffer_per_cpu *cpu_buffer;
1996 unsigned long ret;
1997
1998 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1999 return 0;
2000
2001 cpu_buffer = buffer->buffers[cpu];
2002 ret = cpu_buffer->nmi_dropped;
2003
2004 return ret;
2005}
2006EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
2007
2008/**
2009 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits 2670 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
2010 * @buffer: The ring buffer 2671 * @buffer: The ring buffer
2011 * @cpu: The per CPU buffer to get the number of overruns from 2672 * @cpu: The per CPU buffer to get the number of overruns from
@@ -2020,7 +2681,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
2020 return 0; 2681 return 0;
2021 2682
2022 cpu_buffer = buffer->buffers[cpu]; 2683 cpu_buffer = buffer->buffers[cpu];
2023 ret = cpu_buffer->commit_overrun; 2684 ret = local_read(&cpu_buffer->commit_overrun);
2024 2685
2025 return ret; 2686 return ret;
2026} 2687}
@@ -2042,8 +2703,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2042 /* if you care about this being correct, lock the buffer */ 2703 /* if you care about this being correct, lock the buffer */
2043 for_each_buffer_cpu(buffer, cpu) { 2704 for_each_buffer_cpu(buffer, cpu) {
2044 cpu_buffer = buffer->buffers[cpu]; 2705 cpu_buffer = buffer->buffers[cpu];
2045 entries += (local_read(&cpu_buffer->entries) - 2706 entries += rb_num_of_entries(cpu_buffer);
2046 cpu_buffer->overrun) - cpu_buffer->read;
2047 } 2707 }
2048 2708
2049 return entries; 2709 return entries;
@@ -2051,7 +2711,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2051EXPORT_SYMBOL_GPL(ring_buffer_entries); 2711EXPORT_SYMBOL_GPL(ring_buffer_entries);
2052 2712
2053/** 2713/**
2054 * ring_buffer_overrun_cpu - get the number of overruns in buffer 2714 * ring_buffer_overruns - get the number of overruns in buffer
2055 * @buffer: The ring buffer 2715 * @buffer: The ring buffer
2056 * 2716 *
2057 * Returns the total number of overruns in the ring buffer 2717 * Returns the total number of overruns in the ring buffer
@@ -2066,7 +2726,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
2066 /* if you care about this being correct, lock the buffer */ 2726 /* if you care about this being correct, lock the buffer */
2067 for_each_buffer_cpu(buffer, cpu) { 2727 for_each_buffer_cpu(buffer, cpu) {
2068 cpu_buffer = buffer->buffers[cpu]; 2728 cpu_buffer = buffer->buffers[cpu];
2069 overruns += cpu_buffer->overrun; 2729 overruns += local_read(&cpu_buffer->overrun);
2070 } 2730 }
2071 2731
2072 return overruns; 2732 return overruns;
@@ -2079,8 +2739,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2079 2739
2080 /* Iterator usage is expected to have record disabled */ 2740 /* Iterator usage is expected to have record disabled */
2081 if (list_empty(&cpu_buffer->reader_page->list)) { 2741 if (list_empty(&cpu_buffer->reader_page->list)) {
2082 iter->head_page = cpu_buffer->head_page; 2742 iter->head_page = rb_set_head_page(cpu_buffer);
2083 iter->head = cpu_buffer->head_page->read; 2743 if (unlikely(!iter->head_page))
2744 return;
2745 iter->head = iter->head_page->read;
2084 } else { 2746 } else {
2085 iter->head_page = cpu_buffer->reader_page; 2747 iter->head_page = cpu_buffer->reader_page;
2086 iter->head = cpu_buffer->reader_page->read; 2748 iter->head = cpu_buffer->reader_page->read;
@@ -2089,6 +2751,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2089 iter->read_stamp = cpu_buffer->read_stamp; 2751 iter->read_stamp = cpu_buffer->read_stamp;
2090 else 2752 else
2091 iter->read_stamp = iter->head_page->page->time_stamp; 2753 iter->read_stamp = iter->head_page->page->time_stamp;
2754 iter->cache_reader_page = cpu_buffer->reader_page;
2755 iter->cache_read = cpu_buffer->read;
2092} 2756}
2093 2757
2094/** 2758/**
@@ -2195,11 +2859,13 @@ static struct buffer_page *
2195rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 2859rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2196{ 2860{
2197 struct buffer_page *reader = NULL; 2861 struct buffer_page *reader = NULL;
2862 unsigned long overwrite;
2198 unsigned long flags; 2863 unsigned long flags;
2199 int nr_loops = 0; 2864 int nr_loops = 0;
2865 int ret;
2200 2866
2201 local_irq_save(flags); 2867 local_irq_save(flags);
2202 __raw_spin_lock(&cpu_buffer->lock); 2868 arch_spin_lock(&cpu_buffer->lock);
2203 2869
2204 again: 2870 again:
2205 /* 2871 /*
@@ -2230,39 +2896,83 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2230 goto out; 2896 goto out;
2231 2897
2232 /* 2898 /*
2233 * Splice the empty reader page into the list around the head.
2234 * Reset the reader page to size zero. 2899 * Reset the reader page to size zero.
2235 */ 2900 */
2236
2237 reader = cpu_buffer->head_page;
2238 cpu_buffer->reader_page->list.next = reader->list.next;
2239 cpu_buffer->reader_page->list.prev = reader->list.prev;
2240
2241 local_set(&cpu_buffer->reader_page->write, 0); 2901 local_set(&cpu_buffer->reader_page->write, 0);
2242 local_set(&cpu_buffer->reader_page->entries, 0); 2902 local_set(&cpu_buffer->reader_page->entries, 0);
2243 local_set(&cpu_buffer->reader_page->page->commit, 0); 2903 local_set(&cpu_buffer->reader_page->page->commit, 0);
2904 cpu_buffer->reader_page->real_end = 0;
2905
2906 spin:
2907 /*
2908 * Splice the empty reader page into the list around the head.
2909 */
2910 reader = rb_set_head_page(cpu_buffer);
2911 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
2912 cpu_buffer->reader_page->list.prev = reader->list.prev;
2913
2914 /*
2915 * cpu_buffer->pages just needs to point to the buffer, it
2916 * has no specific buffer page to point to. Lets move it out
2917 * of our way so we don't accidently swap it.
2918 */
2919 cpu_buffer->pages = reader->list.prev;
2244 2920
2245 /* Make the reader page now replace the head */ 2921 /* The reader page will be pointing to the new head */
2246 reader->list.prev->next = &cpu_buffer->reader_page->list; 2922 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2247 reader->list.next->prev = &cpu_buffer->reader_page->list;
2248 2923
2249 /* 2924 /*
2250 * If the tail is on the reader, then we must set the head 2925 * We want to make sure we read the overruns after we set up our
2251 * to the inserted page, otherwise we set it one before. 2926 * pointers to the next object. The writer side does a
2927 * cmpxchg to cross pages which acts as the mb on the writer
2928 * side. Note, the reader will constantly fail the swap
2929 * while the writer is updating the pointers, so this
2930 * guarantees that the overwrite recorded here is the one we
2931 * want to compare with the last_overrun.
2252 */ 2932 */
2253 cpu_buffer->head_page = cpu_buffer->reader_page; 2933 smp_mb();
2934 overwrite = local_read(&(cpu_buffer->overrun));
2254 2935
2255 if (cpu_buffer->commit_page != reader) 2936 /*
2256 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2937 * Here's the tricky part.
2938 *
2939 * We need to move the pointer past the header page.
2940 * But we can only do that if a writer is not currently
2941 * moving it. The page before the header page has the
2942 * flag bit '1' set if it is pointing to the page we want.
2943 * but if the writer is in the process of moving it
2944 * than it will be '2' or already moved '0'.
2945 */
2946
2947 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
2948
2949 /*
2950 * If we did not convert it, then we must try again.
2951 */
2952 if (!ret)
2953 goto spin;
2954
2955 /*
2956 * Yeah! We succeeded in replacing the page.
2957 *
2958 * Now make the new head point back to the reader page.
2959 */
2960 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
2961 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2257 2962
2258 /* Finally update the reader page to the new head */ 2963 /* Finally update the reader page to the new head */
2259 cpu_buffer->reader_page = reader; 2964 cpu_buffer->reader_page = reader;
2260 rb_reset_reader_page(cpu_buffer); 2965 rb_reset_reader_page(cpu_buffer);
2261 2966
2967 if (overwrite != cpu_buffer->last_overrun) {
2968 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
2969 cpu_buffer->last_overrun = overwrite;
2970 }
2971
2262 goto again; 2972 goto again;
2263 2973
2264 out: 2974 out:
2265 __raw_spin_unlock(&cpu_buffer->lock); 2975 arch_spin_unlock(&cpu_buffer->lock);
2266 local_irq_restore(flags); 2976 local_irq_restore(flags);
2267 2977
2268 return reader; 2978 return reader;
@@ -2282,8 +2992,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2282 2992
2283 event = rb_reader_event(cpu_buffer); 2993 event = rb_reader_event(cpu_buffer);
2284 2994
2285 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX 2995 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
2286 || rb_discarded_event(event))
2287 cpu_buffer->read++; 2996 cpu_buffer->read++;
2288 2997
2289 rb_update_read_stamp(cpu_buffer, event); 2998 rb_update_read_stamp(cpu_buffer, event);
@@ -2294,13 +3003,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2294 3003
2295static void rb_advance_iter(struct ring_buffer_iter *iter) 3004static void rb_advance_iter(struct ring_buffer_iter *iter)
2296{ 3005{
2297 struct ring_buffer *buffer;
2298 struct ring_buffer_per_cpu *cpu_buffer; 3006 struct ring_buffer_per_cpu *cpu_buffer;
2299 struct ring_buffer_event *event; 3007 struct ring_buffer_event *event;
2300 unsigned length; 3008 unsigned length;
2301 3009
2302 cpu_buffer = iter->cpu_buffer; 3010 cpu_buffer = iter->cpu_buffer;
2303 buffer = cpu_buffer->buffer;
2304 3011
2305 /* 3012 /*
2306 * Check if we are at the end of the buffer. 3013 * Check if we are at the end of the buffer.
@@ -2336,24 +3043,27 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
2336 rb_advance_iter(iter); 3043 rb_advance_iter(iter);
2337} 3044}
2338 3045
3046static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3047{
3048 return cpu_buffer->lost_events;
3049}
3050
2339static struct ring_buffer_event * 3051static struct ring_buffer_event *
2340rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 3052rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3053 unsigned long *lost_events)
2341{ 3054{
2342 struct ring_buffer_per_cpu *cpu_buffer;
2343 struct ring_buffer_event *event; 3055 struct ring_buffer_event *event;
2344 struct buffer_page *reader; 3056 struct buffer_page *reader;
2345 int nr_loops = 0; 3057 int nr_loops = 0;
2346 3058
2347 cpu_buffer = buffer->buffers[cpu];
2348
2349 again: 3059 again:
2350 /* 3060 /*
2351 * We repeat when a timestamp is encountered. It is possible 3061 * We repeat when a time extend is encountered.
2352 * to get multiple timestamps from an interrupt entering just 3062 * Since the time extend is always attached to a data event,
2353 * as one timestamp is about to be written, or from discarded 3063 * we should never loop more than once.
2354 * commits. The most that we can have is the number on a single page. 3064 * (We never hit the following condition more than twice).
2355 */ 3065 */
2356 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3066 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
2357 return NULL; 3067 return NULL;
2358 3068
2359 reader = rb_get_reader_page(cpu_buffer); 3069 reader = rb_get_reader_page(cpu_buffer);
@@ -2374,7 +3084,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2374 * the box. Return the padding, and we will release 3084 * the box. Return the padding, and we will release
2375 * the current locks, and try again. 3085 * the current locks, and try again.
2376 */ 3086 */
2377 rb_advance_reader(cpu_buffer);
2378 return event; 3087 return event;
2379 3088
2380 case RINGBUF_TYPE_TIME_EXTEND: 3089 case RINGBUF_TYPE_TIME_EXTEND:
@@ -2390,9 +3099,11 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2390 case RINGBUF_TYPE_DATA: 3099 case RINGBUF_TYPE_DATA:
2391 if (ts) { 3100 if (ts) {
2392 *ts = cpu_buffer->read_stamp + event->time_delta; 3101 *ts = cpu_buffer->read_stamp + event->time_delta;
2393 ring_buffer_normalize_time_stamp(buffer, 3102 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
2394 cpu_buffer->cpu, ts); 3103 cpu_buffer->cpu, ts);
2395 } 3104 }
3105 if (lost_events)
3106 *lost_events = rb_lost_events(cpu_buffer);
2396 return event; 3107 return event;
2397 3108
2398 default: 3109 default:
@@ -2411,27 +3122,39 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2411 struct ring_buffer_event *event; 3122 struct ring_buffer_event *event;
2412 int nr_loops = 0; 3123 int nr_loops = 0;
2413 3124
2414 if (ring_buffer_iter_empty(iter))
2415 return NULL;
2416
2417 cpu_buffer = iter->cpu_buffer; 3125 cpu_buffer = iter->cpu_buffer;
2418 buffer = cpu_buffer->buffer; 3126 buffer = cpu_buffer->buffer;
2419 3127
3128 /*
3129 * Check if someone performed a consuming read to
3130 * the buffer. A consuming read invalidates the iterator
3131 * and we need to reset the iterator in this case.
3132 */
3133 if (unlikely(iter->cache_read != cpu_buffer->read ||
3134 iter->cache_reader_page != cpu_buffer->reader_page))
3135 rb_iter_reset(iter);
3136
2420 again: 3137 again:
3138 if (ring_buffer_iter_empty(iter))
3139 return NULL;
3140
2421 /* 3141 /*
2422 * We repeat when a timestamp is encountered. 3142 * We repeat when a time extend is encountered.
2423 * We can get multiple timestamps by nested interrupts or also 3143 * Since the time extend is always attached to a data event,
2424 * if filtering is on (discarding commits). Since discarding 3144 * we should never loop more than once.
2425 * commits can be frequent we can get a lot of timestamps. 3145 * (We never hit the following condition more than twice).
2426 * But we limit them by not adding timestamps if they begin
2427 * at the start of a page.
2428 */ 3146 */
2429 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3147 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
2430 return NULL; 3148 return NULL;
2431 3149
2432 if (rb_per_cpu_empty(cpu_buffer)) 3150 if (rb_per_cpu_empty(cpu_buffer))
2433 return NULL; 3151 return NULL;
2434 3152
3153 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3154 rb_inc_iter(iter);
3155 goto again;
3156 }
3157
2435 event = rb_iter_head_event(iter); 3158 event = rb_iter_head_event(iter);
2436 3159
2437 switch (event->type_len) { 3160 switch (event->type_len) {
@@ -2477,7 +3200,7 @@ static inline int rb_ok_to_lock(void)
2477 * buffer too. A one time deal is all you get from reading 3200 * buffer too. A one time deal is all you get from reading
2478 * the ring buffer from an NMI. 3201 * the ring buffer from an NMI.
2479 */ 3202 */
2480 if (likely(!in_nmi() && !oops_in_progress)) 3203 if (likely(!in_nmi()))
2481 return 1; 3204 return 1;
2482 3205
2483 tracing_off_permanent(); 3206 tracing_off_permanent();
@@ -2489,12 +3212,14 @@ static inline int rb_ok_to_lock(void)
2489 * @buffer: The ring buffer to read 3212 * @buffer: The ring buffer to read
2490 * @cpu: The cpu to peak at 3213 * @cpu: The cpu to peak at
2491 * @ts: The timestamp counter of this event. 3214 * @ts: The timestamp counter of this event.
3215 * @lost_events: a variable to store if events were lost (may be NULL)
2492 * 3216 *
2493 * This will return the event that will be read next, but does 3217 * This will return the event that will be read next, but does
2494 * not consume the data. 3218 * not consume the data.
2495 */ 3219 */
2496struct ring_buffer_event * 3220struct ring_buffer_event *
2497ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 3221ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3222 unsigned long *lost_events)
2498{ 3223{
2499 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 3224 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2500 struct ring_buffer_event *event; 3225 struct ring_buffer_event *event;
@@ -2509,15 +3234,15 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2509 local_irq_save(flags); 3234 local_irq_save(flags);
2510 if (dolock) 3235 if (dolock)
2511 spin_lock(&cpu_buffer->reader_lock); 3236 spin_lock(&cpu_buffer->reader_lock);
2512 event = rb_buffer_peek(buffer, cpu, ts); 3237 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3238 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3239 rb_advance_reader(cpu_buffer);
2513 if (dolock) 3240 if (dolock)
2514 spin_unlock(&cpu_buffer->reader_lock); 3241 spin_unlock(&cpu_buffer->reader_lock);
2515 local_irq_restore(flags); 3242 local_irq_restore(flags);
2516 3243
2517 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3244 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2518 cpu_relax();
2519 goto again; 3245 goto again;
2520 }
2521 3246
2522 return event; 3247 return event;
2523} 3248}
@@ -2542,10 +3267,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2542 event = rb_iter_peek(iter, ts); 3267 event = rb_iter_peek(iter, ts);
2543 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3268 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2544 3269
2545 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3270 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2546 cpu_relax();
2547 goto again; 3271 goto again;
2548 }
2549 3272
2550 return event; 3273 return event;
2551} 3274}
@@ -2553,13 +3276,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2553/** 3276/**
2554 * ring_buffer_consume - return an event and consume it 3277 * ring_buffer_consume - return an event and consume it
2555 * @buffer: The ring buffer to get the next event from 3278 * @buffer: The ring buffer to get the next event from
3279 * @cpu: the cpu to read the buffer from
3280 * @ts: a variable to store the timestamp (may be NULL)
3281 * @lost_events: a variable to store if events were lost (may be NULL)
2556 * 3282 *
2557 * Returns the next event in the ring buffer, and that event is consumed. 3283 * Returns the next event in the ring buffer, and that event is consumed.
2558 * Meaning, that sequential reads will keep returning a different event, 3284 * Meaning, that sequential reads will keep returning a different event,
2559 * and eventually empty the ring buffer if the producer is slower. 3285 * and eventually empty the ring buffer if the producer is slower.
2560 */ 3286 */
2561struct ring_buffer_event * 3287struct ring_buffer_event *
2562ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 3288ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3289 unsigned long *lost_events)
2563{ 3290{
2564 struct ring_buffer_per_cpu *cpu_buffer; 3291 struct ring_buffer_per_cpu *cpu_buffer;
2565 struct ring_buffer_event *event = NULL; 3292 struct ring_buffer_event *event = NULL;
@@ -2580,13 +3307,12 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2580 if (dolock) 3307 if (dolock)
2581 spin_lock(&cpu_buffer->reader_lock); 3308 spin_lock(&cpu_buffer->reader_lock);
2582 3309
2583 event = rb_buffer_peek(buffer, cpu, ts); 3310 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
2584 if (!event) 3311 if (event) {
2585 goto out_unlock; 3312 cpu_buffer->lost_events = 0;
2586 3313 rb_advance_reader(cpu_buffer);
2587 rb_advance_reader(cpu_buffer); 3314 }
2588 3315
2589 out_unlock:
2590 if (dolock) 3316 if (dolock)
2591 spin_unlock(&cpu_buffer->reader_lock); 3317 spin_unlock(&cpu_buffer->reader_lock);
2592 local_irq_restore(flags); 3318 local_irq_restore(flags);
@@ -2594,33 +3320,38 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2594 out: 3320 out:
2595 preempt_enable(); 3321 preempt_enable();
2596 3322
2597 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3323 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2598 cpu_relax();
2599 goto again; 3324 goto again;
2600 }
2601 3325
2602 return event; 3326 return event;
2603} 3327}
2604EXPORT_SYMBOL_GPL(ring_buffer_consume); 3328EXPORT_SYMBOL_GPL(ring_buffer_consume);
2605 3329
2606/** 3330/**
2607 * ring_buffer_read_start - start a non consuming read of the buffer 3331 * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
2608 * @buffer: The ring buffer to read from 3332 * @buffer: The ring buffer to read from
2609 * @cpu: The cpu buffer to iterate over 3333 * @cpu: The cpu buffer to iterate over
2610 * 3334 *
2611 * This starts up an iteration through the buffer. It also disables 3335 * This performs the initial preparations necessary to iterate
2612 * the recording to the buffer until the reading is finished. 3336 * through the buffer. Memory is allocated, buffer recording
2613 * This prevents the reading from being corrupted. This is not 3337 * is disabled, and the iterator pointer is returned to the caller.
2614 * a consuming read, so a producer is not expected.
2615 * 3338 *
2616 * Must be paired with ring_buffer_finish. 3339 * Disabling buffer recordng prevents the reading from being
3340 * corrupted. This is not a consuming read, so a producer is not
3341 * expected.
3342 *
3343 * After a sequence of ring_buffer_read_prepare calls, the user is
3344 * expected to make at least one call to ring_buffer_prepare_sync.
3345 * Afterwards, ring_buffer_read_start is invoked to get things going
3346 * for real.
3347 *
3348 * This overall must be paired with ring_buffer_finish.
2617 */ 3349 */
2618struct ring_buffer_iter * 3350struct ring_buffer_iter *
2619ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 3351ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
2620{ 3352{
2621 struct ring_buffer_per_cpu *cpu_buffer; 3353 struct ring_buffer_per_cpu *cpu_buffer;
2622 struct ring_buffer_iter *iter; 3354 struct ring_buffer_iter *iter;
2623 unsigned long flags;
2624 3355
2625 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3356 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2626 return NULL; 3357 return NULL;
@@ -2634,15 +3365,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
2634 iter->cpu_buffer = cpu_buffer; 3365 iter->cpu_buffer = cpu_buffer;
2635 3366
2636 atomic_inc(&cpu_buffer->record_disabled); 3367 atomic_inc(&cpu_buffer->record_disabled);
3368
3369 return iter;
3370}
3371EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3372
3373/**
3374 * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
3375 *
3376 * All previously invoked ring_buffer_read_prepare calls to prepare
3377 * iterators will be synchronized. Afterwards, read_buffer_read_start
3378 * calls on those iterators are allowed.
3379 */
3380void
3381ring_buffer_read_prepare_sync(void)
3382{
2637 synchronize_sched(); 3383 synchronize_sched();
3384}
3385EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3386
3387/**
3388 * ring_buffer_read_start - start a non consuming read of the buffer
3389 * @iter: The iterator returned by ring_buffer_read_prepare
3390 *
3391 * This finalizes the startup of an iteration through the buffer.
3392 * The iterator comes from a call to ring_buffer_read_prepare and
3393 * an intervening ring_buffer_read_prepare_sync must have been
3394 * performed.
3395 *
3396 * Must be paired with ring_buffer_finish.
3397 */
3398void
3399ring_buffer_read_start(struct ring_buffer_iter *iter)
3400{
3401 struct ring_buffer_per_cpu *cpu_buffer;
3402 unsigned long flags;
3403
3404 if (!iter)
3405 return;
3406
3407 cpu_buffer = iter->cpu_buffer;
2638 3408
2639 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3409 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2640 __raw_spin_lock(&cpu_buffer->lock); 3410 arch_spin_lock(&cpu_buffer->lock);
2641 rb_iter_reset(iter); 3411 rb_iter_reset(iter);
2642 __raw_spin_unlock(&cpu_buffer->lock); 3412 arch_spin_unlock(&cpu_buffer->lock);
2643 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3413 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2644
2645 return iter;
2646} 3414}
2647EXPORT_SYMBOL_GPL(ring_buffer_read_start); 3415EXPORT_SYMBOL_GPL(ring_buffer_read_start);
2648 3416
@@ -2677,21 +3445,19 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2677 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3445 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2678 unsigned long flags; 3446 unsigned long flags;
2679 3447
2680 again:
2681 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3448 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3449 again:
2682 event = rb_iter_peek(iter, ts); 3450 event = rb_iter_peek(iter, ts);
2683 if (!event) 3451 if (!event)
2684 goto out; 3452 goto out;
2685 3453
3454 if (event->type_len == RINGBUF_TYPE_PADDING)
3455 goto again;
3456
2686 rb_advance_iter(iter); 3457 rb_advance_iter(iter);
2687 out: 3458 out:
2688 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3459 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2689 3460
2690 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2691 cpu_relax();
2692 goto again;
2693 }
2694
2695 return event; 3461 return event;
2696} 3462}
2697EXPORT_SYMBOL_GPL(ring_buffer_read); 3463EXPORT_SYMBOL_GPL(ring_buffer_read);
@@ -2709,8 +3475,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_size);
2709static void 3475static void
2710rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 3476rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2711{ 3477{
3478 rb_head_page_deactivate(cpu_buffer);
3479
2712 cpu_buffer->head_page 3480 cpu_buffer->head_page
2713 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 3481 = list_entry(cpu_buffer->pages, struct buffer_page, list);
2714 local_set(&cpu_buffer->head_page->write, 0); 3482 local_set(&cpu_buffer->head_page->write, 0);
2715 local_set(&cpu_buffer->head_page->entries, 0); 3483 local_set(&cpu_buffer->head_page->entries, 0);
2716 local_set(&cpu_buffer->head_page->page->commit, 0); 3484 local_set(&cpu_buffer->head_page->page->commit, 0);
@@ -2726,16 +3494,20 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2726 local_set(&cpu_buffer->reader_page->page->commit, 0); 3494 local_set(&cpu_buffer->reader_page->page->commit, 0);
2727 cpu_buffer->reader_page->read = 0; 3495 cpu_buffer->reader_page->read = 0;
2728 3496
2729 cpu_buffer->nmi_dropped = 0; 3497 local_set(&cpu_buffer->commit_overrun, 0);
2730 cpu_buffer->commit_overrun = 0; 3498 local_set(&cpu_buffer->overrun, 0);
2731 cpu_buffer->overrun = 0;
2732 cpu_buffer->read = 0;
2733 local_set(&cpu_buffer->entries, 0); 3499 local_set(&cpu_buffer->entries, 0);
2734 local_set(&cpu_buffer->committing, 0); 3500 local_set(&cpu_buffer->committing, 0);
2735 local_set(&cpu_buffer->commits, 0); 3501 local_set(&cpu_buffer->commits, 0);
3502 cpu_buffer->read = 0;
2736 3503
2737 cpu_buffer->write_stamp = 0; 3504 cpu_buffer->write_stamp = 0;
2738 cpu_buffer->read_stamp = 0; 3505 cpu_buffer->read_stamp = 0;
3506
3507 cpu_buffer->lost_events = 0;
3508 cpu_buffer->last_overrun = 0;
3509
3510 rb_head_page_activate(cpu_buffer);
2739} 3511}
2740 3512
2741/** 3513/**
@@ -2755,12 +3527,16 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2755 3527
2756 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3528 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2757 3529
2758 __raw_spin_lock(&cpu_buffer->lock); 3530 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3531 goto out;
3532
3533 arch_spin_lock(&cpu_buffer->lock);
2759 3534
2760 rb_reset_cpu(cpu_buffer); 3535 rb_reset_cpu(cpu_buffer);
2761 3536
2762 __raw_spin_unlock(&cpu_buffer->lock); 3537 arch_spin_unlock(&cpu_buffer->lock);
2763 3538
3539 out:
2764 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3540 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2765 3541
2766 atomic_dec(&cpu_buffer->record_disabled); 3542 atomic_dec(&cpu_buffer->record_disabled);
@@ -2843,6 +3619,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2843} 3619}
2844EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 3620EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
2845 3621
3622#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2846/** 3623/**
2847 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers 3624 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2848 * @buffer_a: One buffer to swap with 3625 * @buffer_a: One buffer to swap with
@@ -2897,20 +3674,28 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2897 atomic_inc(&cpu_buffer_a->record_disabled); 3674 atomic_inc(&cpu_buffer_a->record_disabled);
2898 atomic_inc(&cpu_buffer_b->record_disabled); 3675 atomic_inc(&cpu_buffer_b->record_disabled);
2899 3676
3677 ret = -EBUSY;
3678 if (local_read(&cpu_buffer_a->committing))
3679 goto out_dec;
3680 if (local_read(&cpu_buffer_b->committing))
3681 goto out_dec;
3682
2900 buffer_a->buffers[cpu] = cpu_buffer_b; 3683 buffer_a->buffers[cpu] = cpu_buffer_b;
2901 buffer_b->buffers[cpu] = cpu_buffer_a; 3684 buffer_b->buffers[cpu] = cpu_buffer_a;
2902 3685
2903 cpu_buffer_b->buffer = buffer_a; 3686 cpu_buffer_b->buffer = buffer_a;
2904 cpu_buffer_a->buffer = buffer_b; 3687 cpu_buffer_a->buffer = buffer_b;
2905 3688
3689 ret = 0;
3690
3691out_dec:
2906 atomic_dec(&cpu_buffer_a->record_disabled); 3692 atomic_dec(&cpu_buffer_a->record_disabled);
2907 atomic_dec(&cpu_buffer_b->record_disabled); 3693 atomic_dec(&cpu_buffer_b->record_disabled);
2908
2909 ret = 0;
2910out: 3694out:
2911 return ret; 3695 return ret;
2912} 3696}
2913EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 3697EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
3698#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
2914 3699
2915/** 3700/**
2916 * ring_buffer_alloc_read_page - allocate a page to read from buffer 3701 * ring_buffer_alloc_read_page - allocate a page to read from buffer
@@ -2997,6 +3782,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2997 struct ring_buffer_event *event; 3782 struct ring_buffer_event *event;
2998 struct buffer_data_page *bpage; 3783 struct buffer_data_page *bpage;
2999 struct buffer_page *reader; 3784 struct buffer_page *reader;
3785 unsigned long missed_events;
3000 unsigned long flags; 3786 unsigned long flags;
3001 unsigned int commit; 3787 unsigned int commit;
3002 unsigned int read; 3788 unsigned int read;
@@ -3033,6 +3819,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3033 read = reader->read; 3819 read = reader->read;
3034 commit = rb_page_commit(reader); 3820 commit = rb_page_commit(reader);
3035 3821
3822 /* Check if any events were dropped */
3823 missed_events = cpu_buffer->lost_events;
3824
3036 /* 3825 /*
3037 * If this page has been partially read or 3826 * If this page has been partially read or
3038 * if len is not big enough to read the rest of the page or 3827 * if len is not big enough to read the rest of the page or
@@ -3053,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3053 if (len > (commit - read)) 3842 if (len > (commit - read))
3054 len = (commit - read); 3843 len = (commit - read);
3055 3844
3056 size = rb_event_length(event); 3845 /* Always keep the time extend and data together */
3846 size = rb_event_ts_length(event);
3057 3847
3058 if (len < size) 3848 if (len < size)
3059 goto out_unlock; 3849 goto out_unlock;
@@ -3063,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3063 3853
3064 /* Need to copy one event at a time */ 3854 /* Need to copy one event at a time */
3065 do { 3855 do {
3856 /* We need the size of one event, because
3857 * rb_advance_reader only advances by one event,
3858 * whereas rb_event_ts_length may include the size of
3859 * one or two events.
3860 * We have already ensured there's enough space if this
3861 * is a time extend. */
3862 size = rb_event_length(event);
3066 memcpy(bpage->data + pos, rpage->data + rpos, size); 3863 memcpy(bpage->data + pos, rpage->data + rpos, size);
3067 3864
3068 len -= size; 3865 len -= size;
@@ -3071,9 +3868,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3071 rpos = reader->read; 3868 rpos = reader->read;
3072 pos += size; 3869 pos += size;
3073 3870
3871 if (rpos >= commit)
3872 break;
3873
3074 event = rb_reader_event(cpu_buffer); 3874 event = rb_reader_event(cpu_buffer);
3075 size = rb_event_length(event); 3875 /* Always keep the time extend and data together */
3076 } while (len > size); 3876 size = rb_event_ts_length(event);
3877 } while (len >= size);
3077 3878
3078 /* update bpage */ 3879 /* update bpage */
3079 local_set(&bpage->commit, pos); 3880 local_set(&bpage->commit, pos);
@@ -3083,7 +3884,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3083 read = 0; 3884 read = 0;
3084 } else { 3885 } else {
3085 /* update the entry counter */ 3886 /* update the entry counter */
3086 cpu_buffer->read += local_read(&reader->entries); 3887 cpu_buffer->read += rb_page_entries(reader);
3087 3888
3088 /* swap the pages */ 3889 /* swap the pages */
3089 rb_init_page(bpage); 3890 rb_init_page(bpage);
@@ -3093,9 +3894,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3093 local_set(&reader->entries, 0); 3894 local_set(&reader->entries, 0);
3094 reader->read = 0; 3895 reader->read = 0;
3095 *data_page = bpage; 3896 *data_page = bpage;
3897
3898 /*
3899 * Use the real_end for the data size,
3900 * This gives us a chance to store the lost events
3901 * on the page.
3902 */
3903 if (reader->real_end)
3904 local_set(&bpage->commit, reader->real_end);
3096 } 3905 }
3097 ret = read; 3906 ret = read;
3098 3907
3908 cpu_buffer->lost_events = 0;
3909
3910 commit = local_read(&bpage->commit);
3911 /*
3912 * Set a flag in the commit field if we lost events
3913 */
3914 if (missed_events) {
3915 /* If there is room at the end of the page to save the
3916 * missed events, then record it there.
3917 */
3918 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
3919 memcpy(&bpage->data[commit], &missed_events,
3920 sizeof(missed_events));
3921 local_add(RB_MISSED_STORED, &bpage->commit);
3922 commit += sizeof(missed_events);
3923 }
3924 local_add(RB_MISSED_EVENTS, &bpage->commit);
3925 }
3926
3927 /*
3928 * This page may be off to user land. Zero it out here.
3929 */
3930 if (commit < BUF_PAGE_SIZE)
3931 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
3932
3099 out_unlock: 3933 out_unlock:
3100 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3934 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3101 3935
@@ -3104,6 +3938,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3104} 3938}
3105EXPORT_SYMBOL_GPL(ring_buffer_read_page); 3939EXPORT_SYMBOL_GPL(ring_buffer_read_page);
3106 3940
3941#ifdef CONFIG_TRACING
3107static ssize_t 3942static ssize_t
3108rb_simple_read(struct file *filp, char __user *ubuf, 3943rb_simple_read(struct file *filp, char __user *ubuf,
3109 size_t cnt, loff_t *ppos) 3944 size_t cnt, loff_t *ppos)
@@ -3155,6 +3990,7 @@ static const struct file_operations rb_simple_fops = {
3155 .open = tracing_open_generic, 3990 .open = tracing_open_generic,
3156 .read = rb_simple_read, 3991 .read = rb_simple_read,
3157 .write = rb_simple_write, 3992 .write = rb_simple_write,
3993 .llseek = default_llseek,
3158}; 3994};
3159 3995
3160 3996
@@ -3171,6 +4007,7 @@ static __init int rb_init_debugfs(void)
3171} 4007}
3172 4008
3173fs_initcall(rb_init_debugfs); 4009fs_initcall(rb_init_debugfs);
4010#endif
3174 4011
3175#ifdef CONFIG_HOTPLUG_CPU 4012#ifdef CONFIG_HOTPLUG_CPU
3176static int rb_cpu_notify(struct notifier_block *self, 4013static int rb_cpu_notify(struct notifier_block *self,
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 573d3cc762c3..302f8a614635 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <asm/local.h>
11 12
12struct rb_page { 13struct rb_page {
13 u64 ts; 14 u64 ts;
@@ -35,6 +36,28 @@ static int disable_reader;
35module_param(disable_reader, uint, 0644); 36module_param(disable_reader, uint, 0644);
36MODULE_PARM_DESC(disable_reader, "only run producer"); 37MODULE_PARM_DESC(disable_reader, "only run producer");
37 38
39static int write_iteration = 50;
40module_param(write_iteration, uint, 0644);
41MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
42
43static int producer_nice = 19;
44static int consumer_nice = 19;
45
46static int producer_fifo = -1;
47static int consumer_fifo = -1;
48
49module_param(producer_nice, uint, 0644);
50MODULE_PARM_DESC(producer_nice, "nice prio for producer");
51
52module_param(consumer_nice, uint, 0644);
53MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
54
55module_param(producer_fifo, uint, 0644);
56MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
57
58module_param(consumer_fifo, uint, 0644);
59MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
60
38static int read_events; 61static int read_events;
39 62
40static int kill_test; 63static int kill_test;
@@ -58,7 +81,7 @@ static enum event_status read_event(int cpu)
58 int *entry; 81 int *entry;
59 u64 ts; 82 u64 ts;
60 83
61 event = ring_buffer_consume(buffer, cpu, &ts); 84 event = ring_buffer_consume(buffer, cpu, &ts, NULL);
62 if (!event) 85 if (!event)
63 return EVENT_DROPPED; 86 return EVENT_DROPPED;
64 87
@@ -90,7 +113,8 @@ static enum event_status read_page(int cpu)
90 ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); 113 ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
91 if (ret >= 0) { 114 if (ret >= 0) {
92 rpage = bpage; 115 rpage = bpage;
93 commit = local_read(&rpage->commit); 116 /* The commit may have missed event flags set, clear them */
117 commit = local_read(&rpage->commit) & 0xfffff;
94 for (i = 0; i < commit && !kill_test; i += inc) { 118 for (i = 0; i < commit && !kill_test; i += inc) {
95 119
96 if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { 120 if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
@@ -208,15 +232,18 @@ static void ring_buffer_producer(void)
208 do { 232 do {
209 struct ring_buffer_event *event; 233 struct ring_buffer_event *event;
210 int *entry; 234 int *entry;
211 235 int i;
212 event = ring_buffer_lock_reserve(buffer, 10); 236
213 if (!event) { 237 for (i = 0; i < write_iteration; i++) {
214 missed++; 238 event = ring_buffer_lock_reserve(buffer, 10);
215 } else { 239 if (!event) {
216 hit++; 240 missed++;
217 entry = ring_buffer_event_data(event); 241 } else {
218 *entry = smp_processor_id(); 242 hit++;
219 ring_buffer_unlock_commit(buffer, event); 243 entry = ring_buffer_event_data(event);
244 *entry = smp_processor_id();
245 ring_buffer_unlock_commit(buffer, event);
246 }
220 } 247 }
221 do_gettimeofday(&end_tv); 248 do_gettimeofday(&end_tv);
222 249
@@ -263,6 +290,27 @@ static void ring_buffer_producer(void)
263 290
264 if (kill_test) 291 if (kill_test)
265 trace_printk("ERROR!\n"); 292 trace_printk("ERROR!\n");
293
294 if (!disable_reader) {
295 if (consumer_fifo < 0)
296 trace_printk("Running Consumer at nice: %d\n",
297 consumer_nice);
298 else
299 trace_printk("Running Consumer at SCHED_FIFO %d\n",
300 consumer_fifo);
301 }
302 if (producer_fifo < 0)
303 trace_printk("Running Producer at nice: %d\n",
304 producer_nice);
305 else
306 trace_printk("Running Producer at SCHED_FIFO %d\n",
307 producer_fifo);
308
309 /* Let the user know that the test is running at low priority */
310 if (producer_fifo < 0 && consumer_fifo < 0 &&
311 producer_nice == 19 && consumer_nice == 19)
312 trace_printk("WARNING!!! This test is running at lowest priority.\n");
313
266 trace_printk("Time: %lld (usecs)\n", time); 314 trace_printk("Time: %lld (usecs)\n", time);
267 trace_printk("Overruns: %lld\n", overruns); 315 trace_printk("Overruns: %lld\n", overruns);
268 if (disable_reader) 316 if (disable_reader)
@@ -392,6 +440,27 @@ static int __init ring_buffer_benchmark_init(void)
392 if (IS_ERR(producer)) 440 if (IS_ERR(producer))
393 goto out_kill; 441 goto out_kill;
394 442
443 /*
444 * Run them as low-prio background tasks by default:
445 */
446 if (!disable_reader) {
447 if (consumer_fifo >= 0) {
448 struct sched_param param = {
449 .sched_priority = consumer_fifo
450 };
451 sched_setscheduler(consumer, SCHED_FIFO, &param);
452 } else
453 set_user_nice(consumer, consumer_nice);
454 }
455
456 if (producer_fifo >= 0) {
457 struct sched_param param = {
458 .sched_priority = consumer_fifo
459 };
460 sched_setscheduler(producer, SCHED_FIFO, &param);
461 } else
462 set_user_nice(producer, producer_nice);
463
395 return 0; 464 return 0;
396 465
397 out_kill: 466 out_kill:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 076fa6f0ee48..dc53ecb80589 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h> 14#include <linux/ring_buffer.h>
15#include <linux/utsrelease.h> 15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h> 16#include <linux/stacktrace.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
@@ -31,10 +31,11 @@
31#include <linux/splice.h> 31#include <linux/splice.h>
32#include <linux/kdebug.h> 32#include <linux/kdebug.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/rwsem.h>
35#include <linux/slab.h>
34#include <linux/ctype.h> 36#include <linux/ctype.h>
35#include <linux/init.h> 37#include <linux/init.h>
36#include <linux/poll.h> 38#include <linux/poll.h>
37#include <linux/gfp.h>
38#include <linux/fs.h> 39#include <linux/fs.h>
39 40
40#include "trace.h" 41#include "trace.h"
@@ -42,14 +43,11 @@
42 43
43#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 44#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
44 45
45unsigned long __read_mostly tracing_max_latency;
46unsigned long __read_mostly tracing_thresh;
47
48/* 46/*
49 * On boot up, the ring buffer is set to the minimum size, so that 47 * On boot up, the ring buffer is set to the minimum size, so that
50 * we do not waste memory on systems that are not using tracing. 48 * we do not waste memory on systems that are not using tracing.
51 */ 49 */
52static int ring_buffer_expanded; 50int ring_buffer_expanded;
53 51
54/* 52/*
55 * We need to change this state when a selftest is running. 53 * We need to change this state when a selftest is running.
@@ -63,7 +61,7 @@ static bool __read_mostly tracing_selftest_running;
63/* 61/*
64 * If a tracer is running, we do not want to run SELFTEST. 62 * If a tracer is running, we do not want to run SELFTEST.
65 */ 63 */
66static bool __read_mostly tracing_selftest_disabled; 64bool __read_mostly tracing_selftest_disabled;
67 65
68/* For tracers that don't implement custom flags */ 66/* For tracers that don't implement custom flags */
69static struct tracer_opt dummy_tracer_opt[] = { 67static struct tracer_opt dummy_tracer_opt[] = {
@@ -88,27 +86,21 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
88 */ 86 */
89static int tracing_disabled = 1; 87static int tracing_disabled = 1;
90 88
91static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 89DEFINE_PER_CPU(int, ftrace_cpu_disabled);
92 90
93static inline void ftrace_disable_cpu(void) 91static inline void ftrace_disable_cpu(void)
94{ 92{
95 preempt_disable(); 93 preempt_disable();
96 local_inc(&__get_cpu_var(ftrace_cpu_disabled)); 94 __this_cpu_inc(ftrace_cpu_disabled);
97} 95}
98 96
99static inline void ftrace_enable_cpu(void) 97static inline void ftrace_enable_cpu(void)
100{ 98{
101 local_dec(&__get_cpu_var(ftrace_cpu_disabled)); 99 __this_cpu_dec(ftrace_cpu_disabled);
102 preempt_enable(); 100 preempt_enable();
103} 101}
104 102
105static cpumask_var_t __read_mostly tracing_buffer_mask; 103cpumask_var_t __read_mostly tracing_buffer_mask;
106
107/* Define which cpu buffers are currently read in trace_pipe */
108static cpumask_var_t tracing_reader_cpumask;
109
110#define for_each_tracing_cpu(cpu) \
111 for_each_cpu(cpu, tracing_buffer_mask)
112 104
113/* 105/*
114 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 106 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
@@ -121,30 +113,42 @@ static cpumask_var_t tracing_reader_cpumask;
121 * 113 *
122 * It is default off, but you can enable it with either specifying 114 * It is default off, but you can enable it with either specifying
123 * "ftrace_dump_on_oops" in the kernel command line, or setting 115 * "ftrace_dump_on_oops" in the kernel command line, or setting
124 * /proc/sys/kernel/ftrace_dump_on_oops to true. 116 * /proc/sys/kernel/ftrace_dump_on_oops
117 * Set 1 if you want to dump buffers of all CPUs
118 * Set 2 if you want to dump the buffer of the CPU that triggered oops
125 */ 119 */
126int ftrace_dump_on_oops; 120
121enum ftrace_dump_mode ftrace_dump_on_oops;
127 122
128static int tracing_set_tracer(const char *buf); 123static int tracing_set_tracer(const char *buf);
129 124
130#define BOOTUP_TRACER_SIZE 100 125#define MAX_TRACER_SIZE 100
131static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; 126static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
132static char *default_bootup_tracer; 127static char *default_bootup_tracer;
133 128
134static int __init set_ftrace(char *str) 129static int __init set_cmdline_ftrace(char *str)
135{ 130{
136 strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); 131 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
137 default_bootup_tracer = bootup_tracer_buf; 132 default_bootup_tracer = bootup_tracer_buf;
138 /* We are using ftrace early, expand it */ 133 /* We are using ftrace early, expand it */
139 ring_buffer_expanded = 1; 134 ring_buffer_expanded = 1;
140 return 1; 135 return 1;
141} 136}
142__setup("ftrace=", set_ftrace); 137__setup("ftrace=", set_cmdline_ftrace);
143 138
144static int __init set_ftrace_dump_on_oops(char *str) 139static int __init set_ftrace_dump_on_oops(char *str)
145{ 140{
146 ftrace_dump_on_oops = 1; 141 if (*str++ != '=' || !*str) {
147 return 1; 142 ftrace_dump_on_oops = DUMP_ALL;
143 return 1;
144 }
145
146 if (!strcmp("orig_cpu", str)) {
147 ftrace_dump_on_oops = DUMP_ORIG;
148 return 1;
149 }
150
151 return 0;
148} 152}
149__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 153__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
150 154
@@ -171,10 +175,11 @@ static struct trace_array global_trace;
171 175
172static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); 176static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
173 177
174int filter_current_check_discard(struct ftrace_event_call *call, void *rec, 178int filter_current_check_discard(struct ring_buffer *buffer,
179 struct ftrace_event_call *call, void *rec,
175 struct ring_buffer_event *event) 180 struct ring_buffer_event *event)
176{ 181{
177 return filter_check_discard(call, rec, global_trace.buffer, event); 182 return filter_check_discard(call, rec, buffer, event);
178} 183}
179EXPORT_SYMBOL_GPL(filter_current_check_discard); 184EXPORT_SYMBOL_GPL(filter_current_check_discard);
180 185
@@ -204,7 +209,7 @@ cycle_t ftrace_now(int cpu)
204 */ 209 */
205static struct trace_array max_tr; 210static struct trace_array max_tr;
206 211
207static DEFINE_PER_CPU(struct trace_array_cpu, max_data); 212static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
208 213
209/* tracer_enabled is used to toggle activation of a tracer */ 214/* tracer_enabled is used to toggle activation of a tracer */
210static int tracer_enabled = 1; 215static int tracer_enabled = 1;
@@ -243,19 +248,91 @@ static struct tracer *trace_types __read_mostly;
243static struct tracer *current_trace __read_mostly; 248static struct tracer *current_trace __read_mostly;
244 249
245/* 250/*
246 * max_tracer_type_len is used to simplify the allocating of 251 * trace_types_lock is used to protect the trace_types list.
247 * buffers to read userspace tracer names. We keep track of
248 * the longest tracer name registered.
249 */ 252 */
250static int max_tracer_type_len; 253static DEFINE_MUTEX(trace_types_lock);
251 254
252/* 255/*
253 * trace_types_lock is used to protect the trace_types list. 256 * serialize the access of the ring buffer
254 * This lock is also used to keep user access serialized. 257 *
255 * Accesses from userspace will grab this lock while userspace 258 * ring buffer serializes readers, but it is low level protection.
256 * activities happen inside the kernel. 259 * The validity of the events (which returns by ring_buffer_peek() ..etc)
260 * are not protected by ring buffer.
261 *
262 * The content of events may become garbage if we allow other process consumes
263 * these events concurrently:
264 * A) the page of the consumed events may become a normal page
265 * (not reader page) in ring buffer, and this page will be rewrited
266 * by events producer.
267 * B) The page of the consumed events may become a page for splice_read,
268 * and this page will be returned to system.
269 *
270 * These primitives allow multi process access to different cpu ring buffer
271 * concurrently.
272 *
273 * These primitives don't distinguish read-only and read-consume access.
274 * Multi read-only access are also serialized.
257 */ 275 */
258static DEFINE_MUTEX(trace_types_lock); 276
277#ifdef CONFIG_SMP
278static DECLARE_RWSEM(all_cpu_access_lock);
279static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
280
281static inline void trace_access_lock(int cpu)
282{
283 if (cpu == TRACE_PIPE_ALL_CPU) {
284 /* gain it for accessing the whole ring buffer. */
285 down_write(&all_cpu_access_lock);
286 } else {
287 /* gain it for accessing a cpu ring buffer. */
288
289 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
290 down_read(&all_cpu_access_lock);
291
292 /* Secondly block other access to this @cpu ring buffer. */
293 mutex_lock(&per_cpu(cpu_access_lock, cpu));
294 }
295}
296
297static inline void trace_access_unlock(int cpu)
298{
299 if (cpu == TRACE_PIPE_ALL_CPU) {
300 up_write(&all_cpu_access_lock);
301 } else {
302 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
303 up_read(&all_cpu_access_lock);
304 }
305}
306
307static inline void trace_access_lock_init(void)
308{
309 int cpu;
310
311 for_each_possible_cpu(cpu)
312 mutex_init(&per_cpu(cpu_access_lock, cpu));
313}
314
315#else
316
317static DEFINE_MUTEX(access_lock);
318
319static inline void trace_access_lock(int cpu)
320{
321 (void)cpu;
322 mutex_lock(&access_lock);
323}
324
325static inline void trace_access_unlock(int cpu)
326{
327 (void)cpu;
328 mutex_unlock(&access_lock);
329}
330
331static inline void trace_access_lock_init(void)
332{
333}
334
335#endif
259 336
260/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 337/* trace_wait is a waitqueue for tasks blocked on trace_poll */
261static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 338static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
@@ -263,7 +340,10 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
263/* trace_flags holds trace_options default values */ 340/* trace_flags holds trace_options default values */
264unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 341unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
265 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 342 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
266 TRACE_ITER_GRAPH_TIME; 343 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
344
345static int trace_stop_count;
346static DEFINE_SPINLOCK(tracing_start_lock);
267 347
268/** 348/**
269 * trace_wake_up - wake up tasks waiting for trace input 349 * trace_wake_up - wake up tasks waiting for trace input
@@ -273,30 +353,50 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
273 */ 353 */
274void trace_wake_up(void) 354void trace_wake_up(void)
275{ 355{
356 int cpu;
357
358 if (trace_flags & TRACE_ITER_BLOCK)
359 return;
276 /* 360 /*
277 * The runqueue_is_locked() can fail, but this is the best we 361 * The runqueue_is_locked() can fail, but this is the best we
278 * have for now: 362 * have for now:
279 */ 363 */
280 if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked()) 364 cpu = get_cpu();
365 if (!runqueue_is_locked(cpu))
281 wake_up(&trace_wait); 366 wake_up(&trace_wait);
367 put_cpu();
282} 368}
283 369
284static int __init set_buf_size(char *str) 370static int __init set_buf_size(char *str)
285{ 371{
286 unsigned long buf_size; 372 unsigned long buf_size;
287 int ret;
288 373
289 if (!str) 374 if (!str)
290 return 0; 375 return 0;
291 ret = strict_strtoul(str, 0, &buf_size); 376 buf_size = memparse(str, &str);
292 /* nr_entries can not be zero */ 377 /* nr_entries can not be zero */
293 if (ret < 0 || buf_size == 0) 378 if (buf_size == 0)
294 return 0; 379 return 0;
295 trace_buf_size = buf_size; 380 trace_buf_size = buf_size;
296 return 1; 381 return 1;
297} 382}
298__setup("trace_buf_size=", set_buf_size); 383__setup("trace_buf_size=", set_buf_size);
299 384
385static int __init set_tracing_thresh(char *str)
386{
387 unsigned long threshhold;
388 int ret;
389
390 if (!str)
391 return 0;
392 ret = strict_strtoul(str, 0, &threshhold);
393 if (ret < 0)
394 return 0;
395 tracing_thresh = threshhold * 1000;
396 return 1;
397}
398__setup("tracing_thresh=", set_tracing_thresh);
399
300unsigned long nsecs_to_usecs(unsigned long nsecs) 400unsigned long nsecs_to_usecs(unsigned long nsecs)
301{ 401{
302 return nsecs / 1000; 402 return nsecs / 1000;
@@ -313,7 +413,6 @@ static const char *trace_options[] = {
313 "bin", 413 "bin",
314 "block", 414 "block",
315 "stacktrace", 415 "stacktrace",
316 "sched-tree",
317 "trace_printk", 416 "trace_printk",
318 "ftrace_preempt", 417 "ftrace_preempt",
319 "branch", 418 "branch",
@@ -323,49 +422,126 @@ static const char *trace_options[] = {
323 "printk-msg-only", 422 "printk-msg-only",
324 "context-info", 423 "context-info",
325 "latency-format", 424 "latency-format",
326 "global-clock",
327 "sleep-time", 425 "sleep-time",
328 "graph-time", 426 "graph-time",
427 "record-cmd",
329 NULL 428 NULL
330}; 429};
331 430
431static struct {
432 u64 (*func)(void);
433 const char *name;
434} trace_clocks[] = {
435 { trace_clock_local, "local" },
436 { trace_clock_global, "global" },
437};
438
439int trace_clock_id;
440
332/* 441/*
333 * ftrace_max_lock is used to protect the swapping of buffers 442 * trace_parser_get_init - gets the buffer for trace parser
334 * when taking a max snapshot. The buffers themselves are
335 * protected by per_cpu spinlocks. But the action of the swap
336 * needs its own lock.
337 *
338 * This is defined as a raw_spinlock_t in order to help
339 * with performance when lockdep debugging is enabled.
340 */ 443 */
341static raw_spinlock_t ftrace_max_lock = 444int trace_parser_get_init(struct trace_parser *parser, int size)
342 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 445{
446 memset(parser, 0, sizeof(*parser));
447
448 parser->buffer = kmalloc(size, GFP_KERNEL);
449 if (!parser->buffer)
450 return 1;
451
452 parser->size = size;
453 return 0;
454}
343 455
344/* 456/*
345 * Copy the new maximum trace into the separate maximum-trace 457 * trace_parser_put - frees the buffer for trace parser
346 * structure. (this way the maximum trace is permanently saved,
347 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
348 */ 458 */
349static void 459void trace_parser_put(struct trace_parser *parser)
350__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
351{ 460{
352 struct trace_array_cpu *data = tr->data[cpu]; 461 kfree(parser->buffer);
462}
353 463
354 max_tr.cpu = cpu; 464/*
355 max_tr.time_start = data->preempt_timestamp; 465 * trace_get_user - reads the user input string separated by space
466 * (matched by isspace(ch))
467 *
468 * For each string found the 'struct trace_parser' is updated,
469 * and the function returns.
470 *
471 * Returns number of bytes read.
472 *
473 * See kernel/trace/trace.h for 'struct trace_parser' details.
474 */
475int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
476 size_t cnt, loff_t *ppos)
477{
478 char ch;
479 size_t read = 0;
480 ssize_t ret;
481
482 if (!*ppos)
483 trace_parser_clear(parser);
356 484
357 data = max_tr.data[cpu]; 485 ret = get_user(ch, ubuf++);
358 data->saved_latency = tracing_max_latency; 486 if (ret)
487 goto out;
359 488
360 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 489 read++;
361 data->pid = tsk->pid; 490 cnt--;
362 data->uid = task_uid(tsk);
363 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
364 data->policy = tsk->policy;
365 data->rt_priority = tsk->rt_priority;
366 491
367 /* record this tasks comm */ 492 /*
368 tracing_record_cmdline(tsk); 493 * The parser is not finished with the last write,
494 * continue reading the user input without skipping spaces.
495 */
496 if (!parser->cont) {
497 /* skip white space */
498 while (cnt && isspace(ch)) {
499 ret = get_user(ch, ubuf++);
500 if (ret)
501 goto out;
502 read++;
503 cnt--;
504 }
505
506 /* only spaces were written */
507 if (isspace(ch)) {
508 *ppos += read;
509 ret = read;
510 goto out;
511 }
512
513 parser->idx = 0;
514 }
515
516 /* read the non-space input */
517 while (cnt && !isspace(ch)) {
518 if (parser->idx < parser->size - 1)
519 parser->buffer[parser->idx++] = ch;
520 else {
521 ret = -EINVAL;
522 goto out;
523 }
524 ret = get_user(ch, ubuf++);
525 if (ret)
526 goto out;
527 read++;
528 cnt--;
529 }
530
531 /* We either got finished input or we have to wait for another call. */
532 if (isspace(ch)) {
533 parser->buffer[parser->idx] = 0;
534 parser->cont = false;
535 } else {
536 parser->cont = true;
537 parser->buffer[parser->idx++] = ch;
538 }
539
540 *ppos += read;
541 ret = read;
542
543out:
544 return ret;
369} 545}
370 546
371ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) 547ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
@@ -411,6 +587,57 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
411 return cnt; 587 return cnt;
412} 588}
413 589
590/*
591 * ftrace_max_lock is used to protect the swapping of buffers
592 * when taking a max snapshot. The buffers themselves are
593 * protected by per_cpu spinlocks. But the action of the swap
594 * needs its own lock.
595 *
596 * This is defined as a arch_spinlock_t in order to help
597 * with performance when lockdep debugging is enabled.
598 *
599 * It is also used in other places outside the update_max_tr
600 * so it needs to be defined outside of the
601 * CONFIG_TRACER_MAX_TRACE.
602 */
603static arch_spinlock_t ftrace_max_lock =
604 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
605
606unsigned long __read_mostly tracing_thresh;
607
608#ifdef CONFIG_TRACER_MAX_TRACE
609unsigned long __read_mostly tracing_max_latency;
610
611/*
612 * Copy the new maximum trace into the separate maximum-trace
613 * structure. (this way the maximum trace is permanently saved,
614 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
615 */
616static void
617__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
618{
619 struct trace_array_cpu *data = tr->data[cpu];
620 struct trace_array_cpu *max_data;
621
622 max_tr.cpu = cpu;
623 max_tr.time_start = data->preempt_timestamp;
624
625 max_data = max_tr.data[cpu];
626 max_data->saved_latency = tracing_max_latency;
627 max_data->critical_start = data->critical_start;
628 max_data->critical_end = data->critical_end;
629
630 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
631 max_data->pid = tsk->pid;
632 max_data->uid = task_uid(tsk);
633 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
634 max_data->policy = tsk->policy;
635 max_data->rt_priority = tsk->rt_priority;
636
637 /* record this tasks comm */
638 tracing_record_cmdline(tsk);
639}
640
414/** 641/**
415 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 642 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
416 * @tr: tracer 643 * @tr: tracer
@@ -425,18 +652,21 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
425{ 652{
426 struct ring_buffer *buf = tr->buffer; 653 struct ring_buffer *buf = tr->buffer;
427 654
655 if (trace_stop_count)
656 return;
657
428 WARN_ON_ONCE(!irqs_disabled()); 658 WARN_ON_ONCE(!irqs_disabled());
429 __raw_spin_lock(&ftrace_max_lock); 659 if (!current_trace->use_max_tr) {
660 WARN_ON_ONCE(1);
661 return;
662 }
663 arch_spin_lock(&ftrace_max_lock);
430 664
431 tr->buffer = max_tr.buffer; 665 tr->buffer = max_tr.buffer;
432 max_tr.buffer = buf; 666 max_tr.buffer = buf;
433 667
434 ftrace_disable_cpu();
435 ring_buffer_reset(tr->buffer);
436 ftrace_enable_cpu();
437
438 __update_max_tr(tr, tsk, cpu); 668 __update_max_tr(tr, tsk, cpu);
439 __raw_spin_unlock(&ftrace_max_lock); 669 arch_spin_unlock(&ftrace_max_lock);
440} 670}
441 671
442/** 672/**
@@ -452,21 +682,40 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
452{ 682{
453 int ret; 683 int ret;
454 684
685 if (trace_stop_count)
686 return;
687
455 WARN_ON_ONCE(!irqs_disabled()); 688 WARN_ON_ONCE(!irqs_disabled());
456 __raw_spin_lock(&ftrace_max_lock); 689 if (!current_trace->use_max_tr) {
690 WARN_ON_ONCE(1);
691 return;
692 }
693
694 arch_spin_lock(&ftrace_max_lock);
457 695
458 ftrace_disable_cpu(); 696 ftrace_disable_cpu();
459 697
460 ring_buffer_reset(max_tr.buffer);
461 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 698 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
462 699
700 if (ret == -EBUSY) {
701 /*
702 * We failed to swap the buffer due to a commit taking
703 * place on this CPU. We fail to record, but we reset
704 * the max trace buffer (no one writes directly to it)
705 * and flag that it failed.
706 */
707 trace_array_printk(&max_tr, _THIS_IP_,
708 "Failed to swap buffers due to commit in progress\n");
709 }
710
463 ftrace_enable_cpu(); 711 ftrace_enable_cpu();
464 712
465 WARN_ON_ONCE(ret && ret != -EAGAIN); 713 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
466 714
467 __update_max_tr(tr, tsk, cpu); 715 __update_max_tr(tr, tsk, cpu);
468 __raw_spin_unlock(&ftrace_max_lock); 716 arch_spin_unlock(&ftrace_max_lock);
469} 717}
718#endif /* CONFIG_TRACER_MAX_TRACE */
470 719
471/** 720/**
472 * register_tracer - register a tracer with the ftrace system. 721 * register_tracer - register a tracer with the ftrace system.
@@ -479,7 +728,6 @@ __releases(kernel_lock)
479__acquires(kernel_lock) 728__acquires(kernel_lock)
480{ 729{
481 struct tracer *t; 730 struct tracer *t;
482 int len;
483 int ret = 0; 731 int ret = 0;
484 732
485 if (!type->name) { 733 if (!type->name) {
@@ -487,13 +735,11 @@ __acquires(kernel_lock)
487 return -1; 735 return -1;
488 } 736 }
489 737
490 /* 738 if (strlen(type->name) >= MAX_TRACER_SIZE) {
491 * When this gets called we hold the BKL which means that 739 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
492 * preemption is disabled. Various trace selftests however 740 return -1;
493 * need to disable and enable preemption for successful tests. 741 }
494 * So we drop the BKL here and grab it after the tests again. 742
495 */
496 unlock_kernel();
497 mutex_lock(&trace_types_lock); 743 mutex_lock(&trace_types_lock);
498 744
499 tracing_selftest_running = true; 745 tracing_selftest_running = true;
@@ -501,7 +747,7 @@ __acquires(kernel_lock)
501 for (t = trace_types; t; t = t->next) { 747 for (t = trace_types; t; t = t->next) {
502 if (strcmp(type->name, t->name) == 0) { 748 if (strcmp(type->name, t->name) == 0) {
503 /* already found */ 749 /* already found */
504 pr_info("Trace %s already registered\n", 750 pr_info("Tracer %s already registered\n",
505 type->name); 751 type->name);
506 ret = -1; 752 ret = -1;
507 goto out; 753 goto out;
@@ -523,7 +769,6 @@ __acquires(kernel_lock)
523 if (type->selftest && !tracing_selftest_disabled) { 769 if (type->selftest && !tracing_selftest_disabled) {
524 struct tracer *saved_tracer = current_trace; 770 struct tracer *saved_tracer = current_trace;
525 struct trace_array *tr = &global_trace; 771 struct trace_array *tr = &global_trace;
526 int i;
527 772
528 /* 773 /*
529 * Run a selftest on this tracer. 774 * Run a selftest on this tracer.
@@ -532,8 +777,7 @@ __acquires(kernel_lock)
532 * internal tracing to verify that everything is in order. 777 * internal tracing to verify that everything is in order.
533 * If we fail, we do not register this tracer. 778 * If we fail, we do not register this tracer.
534 */ 779 */
535 for_each_tracing_cpu(i) 780 tracing_reset_online_cpus(tr);
536 tracing_reset(tr, i);
537 781
538 current_trace = type; 782 current_trace = type;
539 /* the test is responsible for initializing and enabling */ 783 /* the test is responsible for initializing and enabling */
@@ -546,8 +790,7 @@ __acquires(kernel_lock)
546 goto out; 790 goto out;
547 } 791 }
548 /* Only reset on passing, to avoid touching corrupted buffers */ 792 /* Only reset on passing, to avoid touching corrupted buffers */
549 for_each_tracing_cpu(i) 793 tracing_reset_online_cpus(tr);
550 tracing_reset(tr, i);
551 794
552 printk(KERN_CONT "PASSED\n"); 795 printk(KERN_CONT "PASSED\n");
553 } 796 }
@@ -555,9 +798,6 @@ __acquires(kernel_lock)
555 798
556 type->next = trace_types; 799 type->next = trace_types;
557 trace_types = type; 800 trace_types = type;
558 len = strlen(type->name);
559 if (len > max_tracer_type_len)
560 max_tracer_type_len = len;
561 801
562 out: 802 out:
563 tracing_selftest_running = false; 803 tracing_selftest_running = false;
@@ -566,7 +806,7 @@ __acquires(kernel_lock)
566 if (ret || !default_bootup_tracer) 806 if (ret || !default_bootup_tracer)
567 goto out_unlock; 807 goto out_unlock;
568 808
569 if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) 809 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
570 goto out_unlock; 810 goto out_unlock;
571 811
572 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 812 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
@@ -581,21 +821,19 @@ __acquires(kernel_lock)
581#endif 821#endif
582 822
583 out_unlock: 823 out_unlock:
584 lock_kernel();
585 return ret; 824 return ret;
586} 825}
587 826
588void unregister_tracer(struct tracer *type) 827void unregister_tracer(struct tracer *type)
589{ 828{
590 struct tracer **t; 829 struct tracer **t;
591 int len;
592 830
593 mutex_lock(&trace_types_lock); 831 mutex_lock(&trace_types_lock);
594 for (t = &trace_types; *t; t = &(*t)->next) { 832 for (t = &trace_types; *t; t = &(*t)->next) {
595 if (*t == type) 833 if (*t == type)
596 goto found; 834 goto found;
597 } 835 }
598 pr_info("Trace %s not registered\n", type->name); 836 pr_info("Tracer %s not registered\n", type->name);
599 goto out; 837 goto out;
600 838
601 found: 839 found:
@@ -608,35 +846,46 @@ void unregister_tracer(struct tracer *type)
608 current_trace->stop(&global_trace); 846 current_trace->stop(&global_trace);
609 current_trace = &nop_trace; 847 current_trace = &nop_trace;
610 } 848 }
611 849out:
612 if (strlen(type->name) != max_tracer_type_len)
613 goto out;
614
615 max_tracer_type_len = 0;
616 for (t = &trace_types; *t; t = &(*t)->next) {
617 len = strlen((*t)->name);
618 if (len > max_tracer_type_len)
619 max_tracer_type_len = len;
620 }
621 out:
622 mutex_unlock(&trace_types_lock); 850 mutex_unlock(&trace_types_lock);
623} 851}
624 852
625void tracing_reset(struct trace_array *tr, int cpu) 853static void __tracing_reset(struct ring_buffer *buffer, int cpu)
626{ 854{
627 ftrace_disable_cpu(); 855 ftrace_disable_cpu();
628 ring_buffer_reset_cpu(tr->buffer, cpu); 856 ring_buffer_reset_cpu(buffer, cpu);
629 ftrace_enable_cpu(); 857 ftrace_enable_cpu();
630} 858}
631 859
860void tracing_reset(struct trace_array *tr, int cpu)
861{
862 struct ring_buffer *buffer = tr->buffer;
863
864 ring_buffer_record_disable(buffer);
865
866 /* Make sure all commits have finished */
867 synchronize_sched();
868 __tracing_reset(buffer, cpu);
869
870 ring_buffer_record_enable(buffer);
871}
872
632void tracing_reset_online_cpus(struct trace_array *tr) 873void tracing_reset_online_cpus(struct trace_array *tr)
633{ 874{
875 struct ring_buffer *buffer = tr->buffer;
634 int cpu; 876 int cpu;
635 877
878 ring_buffer_record_disable(buffer);
879
880 /* Make sure all commits have finished */
881 synchronize_sched();
882
636 tr->time_start = ftrace_now(tr->cpu); 883 tr->time_start = ftrace_now(tr->cpu);
637 884
638 for_each_online_cpu(cpu) 885 for_each_online_cpu(cpu)
639 tracing_reset(tr, cpu); 886 __tracing_reset(buffer, cpu);
887
888 ring_buffer_record_enable(buffer);
640} 889}
641 890
642void tracing_reset_current(int cpu) 891void tracing_reset_current(int cpu)
@@ -655,7 +904,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
655static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 904static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
656static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 905static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
657static int cmdline_idx; 906static int cmdline_idx;
658static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; 907static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
659 908
660/* temporary disable recording */ 909/* temporary disable recording */
661static atomic_t trace_record_cmdline_disabled __read_mostly; 910static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -667,8 +916,10 @@ static void trace_init_cmdlines(void)
667 cmdline_idx = 0; 916 cmdline_idx = 0;
668} 917}
669 918
670static int trace_stop_count; 919int is_tracing_stopped(void)
671static DEFINE_SPINLOCK(tracing_start_lock); 920{
921 return trace_stop_count;
922}
672 923
673/** 924/**
674 * ftrace_off_permanent - disable all ftrace code permanently 925 * ftrace_off_permanent - disable all ftrace code permanently
@@ -709,6 +960,8 @@ void tracing_start(void)
709 goto out; 960 goto out;
710 } 961 }
711 962
963 /* Prevent the buffers from switching */
964 arch_spin_lock(&ftrace_max_lock);
712 965
713 buffer = global_trace.buffer; 966 buffer = global_trace.buffer;
714 if (buffer) 967 if (buffer)
@@ -718,6 +971,8 @@ void tracing_start(void)
718 if (buffer) 971 if (buffer)
719 ring_buffer_record_enable(buffer); 972 ring_buffer_record_enable(buffer);
720 973
974 arch_spin_unlock(&ftrace_max_lock);
975
721 ftrace_start(); 976 ftrace_start();
722 out: 977 out:
723 spin_unlock_irqrestore(&tracing_start_lock, flags); 978 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -739,6 +994,9 @@ void tracing_stop(void)
739 if (trace_stop_count++) 994 if (trace_stop_count++)
740 goto out; 995 goto out;
741 996
997 /* Prevent the buffers from switching */
998 arch_spin_lock(&ftrace_max_lock);
999
742 buffer = global_trace.buffer; 1000 buffer = global_trace.buffer;
743 if (buffer) 1001 if (buffer)
744 ring_buffer_record_disable(buffer); 1002 ring_buffer_record_disable(buffer);
@@ -747,6 +1005,8 @@ void tracing_stop(void)
747 if (buffer) 1005 if (buffer)
748 ring_buffer_record_disable(buffer); 1006 ring_buffer_record_disable(buffer);
749 1007
1008 arch_spin_unlock(&ftrace_max_lock);
1009
750 out: 1010 out:
751 spin_unlock_irqrestore(&tracing_start_lock, flags); 1011 spin_unlock_irqrestore(&tracing_start_lock, flags);
752} 1012}
@@ -766,7 +1026,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
766 * nor do we want to disable interrupts, 1026 * nor do we want to disable interrupts,
767 * so if we miss here, then better luck next time. 1027 * so if we miss here, then better luck next time.
768 */ 1028 */
769 if (!__raw_spin_trylock(&trace_cmdline_lock)) 1029 if (!arch_spin_trylock(&trace_cmdline_lock))
770 return; 1030 return;
771 1031
772 idx = map_pid_to_cmdline[tsk->pid]; 1032 idx = map_pid_to_cmdline[tsk->pid];
@@ -791,7 +1051,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
791 1051
792 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 1052 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
793 1053
794 __raw_spin_unlock(&trace_cmdline_lock); 1054 arch_spin_unlock(&trace_cmdline_lock);
795} 1055}
796 1056
797void trace_find_cmdline(int pid, char comm[]) 1057void trace_find_cmdline(int pid, char comm[])
@@ -803,20 +1063,25 @@ void trace_find_cmdline(int pid, char comm[])
803 return; 1063 return;
804 } 1064 }
805 1065
1066 if (WARN_ON_ONCE(pid < 0)) {
1067 strcpy(comm, "<XXX>");
1068 return;
1069 }
1070
806 if (pid > PID_MAX_DEFAULT) { 1071 if (pid > PID_MAX_DEFAULT) {
807 strcpy(comm, "<...>"); 1072 strcpy(comm, "<...>");
808 return; 1073 return;
809 } 1074 }
810 1075
811 preempt_disable(); 1076 preempt_disable();
812 __raw_spin_lock(&trace_cmdline_lock); 1077 arch_spin_lock(&trace_cmdline_lock);
813 map = map_pid_to_cmdline[pid]; 1078 map = map_pid_to_cmdline[pid];
814 if (map != NO_CMDLINE_MAP) 1079 if (map != NO_CMDLINE_MAP)
815 strcpy(comm, saved_cmdlines[map]); 1080 strcpy(comm, saved_cmdlines[map]);
816 else 1081 else
817 strcpy(comm, "<...>"); 1082 strcpy(comm, "<...>");
818 1083
819 __raw_spin_unlock(&trace_cmdline_lock); 1084 arch_spin_unlock(&trace_cmdline_lock);
820 preempt_enable(); 1085 preempt_enable();
821} 1086}
822 1087
@@ -837,7 +1102,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
837 1102
838 entry->preempt_count = pc & 0xff; 1103 entry->preempt_count = pc & 0xff;
839 entry->pid = (tsk) ? tsk->pid : 0; 1104 entry->pid = (tsk) ? tsk->pid : 0;
840 entry->tgid = (tsk) ? tsk->tgid : 0; 1105 entry->lock_depth = (tsk) ? tsk->lock_depth : 0;
841 entry->flags = 1106 entry->flags =
842#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 1107#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
843 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 1108 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -848,15 +1113,17 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
848 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 1113 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
849 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 1114 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
850} 1115}
1116EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
851 1117
852struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 1118struct ring_buffer_event *
853 int type, 1119trace_buffer_lock_reserve(struct ring_buffer *buffer,
854 unsigned long len, 1120 int type,
855 unsigned long flags, int pc) 1121 unsigned long len,
1122 unsigned long flags, int pc)
856{ 1123{
857 struct ring_buffer_event *event; 1124 struct ring_buffer_event *event;
858 1125
859 event = ring_buffer_lock_reserve(tr->buffer, len); 1126 event = ring_buffer_lock_reserve(buffer, len);
860 if (event != NULL) { 1127 if (event != NULL) {
861 struct trace_entry *ent = ring_buffer_event_data(event); 1128 struct trace_entry *ent = ring_buffer_event_data(event);
862 1129
@@ -866,58 +1133,60 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
866 1133
867 return event; 1134 return event;
868} 1135}
869static void ftrace_trace_stack(struct trace_array *tr,
870 unsigned long flags, int skip, int pc);
871static void ftrace_trace_userstack(struct trace_array *tr,
872 unsigned long flags, int pc);
873 1136
874static inline void __trace_buffer_unlock_commit(struct trace_array *tr, 1137static inline void
875 struct ring_buffer_event *event, 1138__trace_buffer_unlock_commit(struct ring_buffer *buffer,
876 unsigned long flags, int pc, 1139 struct ring_buffer_event *event,
877 int wake) 1140 unsigned long flags, int pc,
1141 int wake)
878{ 1142{
879 ring_buffer_unlock_commit(tr->buffer, event); 1143 ring_buffer_unlock_commit(buffer, event);
880 1144
881 ftrace_trace_stack(tr, flags, 6, pc); 1145 ftrace_trace_stack(buffer, flags, 6, pc);
882 ftrace_trace_userstack(tr, flags, pc); 1146 ftrace_trace_userstack(buffer, flags, pc);
883 1147
884 if (wake) 1148 if (wake)
885 trace_wake_up(); 1149 trace_wake_up();
886} 1150}
887 1151
888void trace_buffer_unlock_commit(struct trace_array *tr, 1152void trace_buffer_unlock_commit(struct ring_buffer *buffer,
889 struct ring_buffer_event *event, 1153 struct ring_buffer_event *event,
890 unsigned long flags, int pc) 1154 unsigned long flags, int pc)
891{ 1155{
892 __trace_buffer_unlock_commit(tr, event, flags, pc, 1); 1156 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
893} 1157}
894 1158
895struct ring_buffer_event * 1159struct ring_buffer_event *
896trace_current_buffer_lock_reserve(int type, unsigned long len, 1160trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1161 int type, unsigned long len,
897 unsigned long flags, int pc) 1162 unsigned long flags, int pc)
898{ 1163{
899 return trace_buffer_lock_reserve(&global_trace, 1164 *current_rb = global_trace.buffer;
1165 return trace_buffer_lock_reserve(*current_rb,
900 type, len, flags, pc); 1166 type, len, flags, pc);
901} 1167}
902EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); 1168EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
903 1169
904void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 1170void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1171 struct ring_buffer_event *event,
905 unsigned long flags, int pc) 1172 unsigned long flags, int pc)
906{ 1173{
907 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); 1174 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
908} 1175}
909EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); 1176EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
910 1177
911void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, 1178void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
912 unsigned long flags, int pc) 1179 struct ring_buffer_event *event,
1180 unsigned long flags, int pc)
913{ 1181{
914 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); 1182 __trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
915} 1183}
916EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); 1184EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
917 1185
918void trace_current_buffer_discard_commit(struct ring_buffer_event *event) 1186void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1187 struct ring_buffer_event *event)
919{ 1188{
920 ring_buffer_discard_commit(global_trace.buffer, event); 1189 ring_buffer_discard_commit(buffer, event);
921} 1190}
922EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); 1191EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
923 1192
@@ -927,14 +1196,15 @@ trace_function(struct trace_array *tr,
927 int pc) 1196 int pc)
928{ 1197{
929 struct ftrace_event_call *call = &event_function; 1198 struct ftrace_event_call *call = &event_function;
1199 struct ring_buffer *buffer = tr->buffer;
930 struct ring_buffer_event *event; 1200 struct ring_buffer_event *event;
931 struct ftrace_entry *entry; 1201 struct ftrace_entry *entry;
932 1202
933 /* If we are reading the ring buffer, don't trace */ 1203 /* If we are reading the ring buffer, don't trace */
934 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 1204 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
935 return; 1205 return;
936 1206
937 event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), 1207 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
938 flags, pc); 1208 flags, pc);
939 if (!event) 1209 if (!event)
940 return; 1210 return;
@@ -942,57 +1212,9 @@ trace_function(struct trace_array *tr,
942 entry->ip = ip; 1212 entry->ip = ip;
943 entry->parent_ip = parent_ip; 1213 entry->parent_ip = parent_ip;
944 1214
945 if (!filter_check_discard(call, entry, tr->buffer, event)) 1215 if (!filter_check_discard(call, entry, buffer, event))
946 ring_buffer_unlock_commit(tr->buffer, event); 1216 ring_buffer_unlock_commit(buffer, event);
947}
948
949#ifdef CONFIG_FUNCTION_GRAPH_TRACER
950static int __trace_graph_entry(struct trace_array *tr,
951 struct ftrace_graph_ent *trace,
952 unsigned long flags,
953 int pc)
954{
955 struct ftrace_event_call *call = &event_funcgraph_entry;
956 struct ring_buffer_event *event;
957 struct ftrace_graph_ent_entry *entry;
958
959 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
960 return 0;
961
962 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
963 sizeof(*entry), flags, pc);
964 if (!event)
965 return 0;
966 entry = ring_buffer_event_data(event);
967 entry->graph_ent = *trace;
968 if (!filter_current_check_discard(call, entry, event))
969 ring_buffer_unlock_commit(global_trace.buffer, event);
970
971 return 1;
972}
973
974static void __trace_graph_return(struct trace_array *tr,
975 struct ftrace_graph_ret *trace,
976 unsigned long flags,
977 int pc)
978{
979 struct ftrace_event_call *call = &event_funcgraph_exit;
980 struct ring_buffer_event *event;
981 struct ftrace_graph_ret_entry *entry;
982
983 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
984 return;
985
986 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
987 sizeof(*entry), flags, pc);
988 if (!event)
989 return;
990 entry = ring_buffer_event_data(event);
991 entry->ret = *trace;
992 if (!filter_current_check_discard(call, entry, event))
993 ring_buffer_unlock_commit(global_trace.buffer, event);
994} 1217}
995#endif
996 1218
997void 1219void
998ftrace(struct trace_array *tr, struct trace_array_cpu *data, 1220ftrace(struct trace_array *tr, struct trace_array_cpu *data,
@@ -1003,17 +1225,17 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1003 trace_function(tr, ip, parent_ip, flags, pc); 1225 trace_function(tr, ip, parent_ip, flags, pc);
1004} 1226}
1005 1227
1006static void __ftrace_trace_stack(struct trace_array *tr, 1228#ifdef CONFIG_STACKTRACE
1229static void __ftrace_trace_stack(struct ring_buffer *buffer,
1007 unsigned long flags, 1230 unsigned long flags,
1008 int skip, int pc) 1231 int skip, int pc)
1009{ 1232{
1010#ifdef CONFIG_STACKTRACE
1011 struct ftrace_event_call *call = &event_kernel_stack; 1233 struct ftrace_event_call *call = &event_kernel_stack;
1012 struct ring_buffer_event *event; 1234 struct ring_buffer_event *event;
1013 struct stack_entry *entry; 1235 struct stack_entry *entry;
1014 struct stack_trace trace; 1236 struct stack_trace trace;
1015 1237
1016 event = trace_buffer_lock_reserve(tr, TRACE_STACK, 1238 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1017 sizeof(*entry), flags, pc); 1239 sizeof(*entry), flags, pc);
1018 if (!event) 1240 if (!event)
1019 return; 1241 return;
@@ -1026,32 +1248,46 @@ static void __ftrace_trace_stack(struct trace_array *tr,
1026 trace.entries = entry->caller; 1248 trace.entries = entry->caller;
1027 1249
1028 save_stack_trace(&trace); 1250 save_stack_trace(&trace);
1029 if (!filter_check_discard(call, entry, tr->buffer, event)) 1251 if (!filter_check_discard(call, entry, buffer, event))
1030 ring_buffer_unlock_commit(tr->buffer, event); 1252 ring_buffer_unlock_commit(buffer, event);
1031#endif
1032} 1253}
1033 1254
1034static void ftrace_trace_stack(struct trace_array *tr, 1255void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1035 unsigned long flags, 1256 int skip, int pc)
1036 int skip, int pc)
1037{ 1257{
1038 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 1258 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1039 return; 1259 return;
1040 1260
1041 __ftrace_trace_stack(tr, flags, skip, pc); 1261 __ftrace_trace_stack(buffer, flags, skip, pc);
1042} 1262}
1043 1263
1044void __trace_stack(struct trace_array *tr, 1264void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1045 unsigned long flags, 1265 int pc)
1046 int skip, int pc)
1047{ 1266{
1048 __ftrace_trace_stack(tr, flags, skip, pc); 1267 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1049} 1268}
1050 1269
1051static void ftrace_trace_userstack(struct trace_array *tr, 1270/**
1052 unsigned long flags, int pc) 1271 * trace_dump_stack - record a stack back trace in the trace buffer
1272 */
1273void trace_dump_stack(void)
1274{
1275 unsigned long flags;
1276
1277 if (tracing_disabled || tracing_selftest_running)
1278 return;
1279
1280 local_save_flags(flags);
1281
1282 /* skipping 3 traces, seems to get us at the caller of this function */
1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1284}
1285
1286static DEFINE_PER_CPU(int, user_stack_count);
1287
1288void
1289ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1053{ 1290{
1054#ifdef CONFIG_STACKTRACE
1055 struct ftrace_event_call *call = &event_user_stack; 1291 struct ftrace_event_call *call = &event_user_stack;
1056 struct ring_buffer_event *event; 1292 struct ring_buffer_event *event;
1057 struct userstack_entry *entry; 1293 struct userstack_entry *entry;
@@ -1060,12 +1296,30 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1060 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1296 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1061 return; 1297 return;
1062 1298
1063 event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, 1299 /*
1300 * NMIs can not handle page faults, even with fix ups.
1301 * The save user stack can (and often does) fault.
1302 */
1303 if (unlikely(in_nmi()))
1304 return;
1305
1306 /*
1307 * prevent recursion, since the user stack tracing may
1308 * trigger other kernel events.
1309 */
1310 preempt_disable();
1311 if (__this_cpu_read(user_stack_count))
1312 goto out;
1313
1314 __this_cpu_inc(user_stack_count);
1315
1316 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1064 sizeof(*entry), flags, pc); 1317 sizeof(*entry), flags, pc);
1065 if (!event) 1318 if (!event)
1066 return; 1319 goto out_drop_count;
1067 entry = ring_buffer_event_data(event); 1320 entry = ring_buffer_event_data(event);
1068 1321
1322 entry->tgid = current->tgid;
1069 memset(&entry->caller, 0, sizeof(entry->caller)); 1323 memset(&entry->caller, 0, sizeof(entry->caller));
1070 1324
1071 trace.nr_entries = 0; 1325 trace.nr_entries = 0;
@@ -1074,9 +1328,13 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1074 trace.entries = entry->caller; 1328 trace.entries = entry->caller;
1075 1329
1076 save_stack_trace_user(&trace); 1330 save_stack_trace_user(&trace);
1077 if (!filter_check_discard(call, entry, tr->buffer, event)) 1331 if (!filter_check_discard(call, entry, buffer, event))
1078 ring_buffer_unlock_commit(tr->buffer, event); 1332 ring_buffer_unlock_commit(buffer, event);
1079#endif 1333
1334 out_drop_count:
1335 __this_cpu_dec(user_stack_count);
1336 out:
1337 preempt_enable();
1080} 1338}
1081 1339
1082#ifdef UNUSED 1340#ifdef UNUSED
@@ -1086,174 +1344,7 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1086} 1344}
1087#endif /* UNUSED */ 1345#endif /* UNUSED */
1088 1346
1089static void 1347#endif /* CONFIG_STACKTRACE */
1090ftrace_trace_special(void *__tr,
1091 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1092 int pc)
1093{
1094 struct ring_buffer_event *event;
1095 struct trace_array *tr = __tr;
1096 struct special_entry *entry;
1097
1098 event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
1099 sizeof(*entry), 0, pc);
1100 if (!event)
1101 return;
1102 entry = ring_buffer_event_data(event);
1103 entry->arg1 = arg1;
1104 entry->arg2 = arg2;
1105 entry->arg3 = arg3;
1106 trace_buffer_unlock_commit(tr, event, 0, pc);
1107}
1108
1109void
1110__trace_special(void *__tr, void *__data,
1111 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1112{
1113 ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1114}
1115
1116void
1117tracing_sched_switch_trace(struct trace_array *tr,
1118 struct task_struct *prev,
1119 struct task_struct *next,
1120 unsigned long flags, int pc)
1121{
1122 struct ftrace_event_call *call = &event_context_switch;
1123 struct ring_buffer_event *event;
1124 struct ctx_switch_entry *entry;
1125
1126 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1127 sizeof(*entry), flags, pc);
1128 if (!event)
1129 return;
1130 entry = ring_buffer_event_data(event);
1131 entry->prev_pid = prev->pid;
1132 entry->prev_prio = prev->prio;
1133 entry->prev_state = prev->state;
1134 entry->next_pid = next->pid;
1135 entry->next_prio = next->prio;
1136 entry->next_state = next->state;
1137 entry->next_cpu = task_cpu(next);
1138
1139 if (!filter_check_discard(call, entry, tr->buffer, event))
1140 trace_buffer_unlock_commit(tr, event, flags, pc);
1141}
1142
1143void
1144tracing_sched_wakeup_trace(struct trace_array *tr,
1145 struct task_struct *wakee,
1146 struct task_struct *curr,
1147 unsigned long flags, int pc)
1148{
1149 struct ftrace_event_call *call = &event_wakeup;
1150 struct ring_buffer_event *event;
1151 struct ctx_switch_entry *entry;
1152
1153 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1154 sizeof(*entry), flags, pc);
1155 if (!event)
1156 return;
1157 entry = ring_buffer_event_data(event);
1158 entry->prev_pid = curr->pid;
1159 entry->prev_prio = curr->prio;
1160 entry->prev_state = curr->state;
1161 entry->next_pid = wakee->pid;
1162 entry->next_prio = wakee->prio;
1163 entry->next_state = wakee->state;
1164 entry->next_cpu = task_cpu(wakee);
1165
1166 if (!filter_check_discard(call, entry, tr->buffer, event))
1167 ring_buffer_unlock_commit(tr->buffer, event);
1168 ftrace_trace_stack(tr, flags, 6, pc);
1169 ftrace_trace_userstack(tr, flags, pc);
1170}
1171
1172void
1173ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1174{
1175 struct trace_array *tr = &global_trace;
1176 struct trace_array_cpu *data;
1177 unsigned long flags;
1178 int cpu;
1179 int pc;
1180
1181 if (tracing_disabled)
1182 return;
1183
1184 pc = preempt_count();
1185 local_irq_save(flags);
1186 cpu = raw_smp_processor_id();
1187 data = tr->data[cpu];
1188
1189 if (likely(atomic_inc_return(&data->disabled) == 1))
1190 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1191
1192 atomic_dec(&data->disabled);
1193 local_irq_restore(flags);
1194}
1195
1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1197int trace_graph_entry(struct ftrace_graph_ent *trace)
1198{
1199 struct trace_array *tr = &global_trace;
1200 struct trace_array_cpu *data;
1201 unsigned long flags;
1202 long disabled;
1203 int ret;
1204 int cpu;
1205 int pc;
1206
1207 if (!ftrace_trace_task(current))
1208 return 0;
1209
1210 if (!ftrace_graph_addr(trace->func))
1211 return 0;
1212
1213 local_irq_save(flags);
1214 cpu = raw_smp_processor_id();
1215 data = tr->data[cpu];
1216 disabled = atomic_inc_return(&data->disabled);
1217 if (likely(disabled == 1)) {
1218 pc = preempt_count();
1219 ret = __trace_graph_entry(tr, trace, flags, pc);
1220 } else {
1221 ret = 0;
1222 }
1223 /* Only do the atomic if it is not already set */
1224 if (!test_tsk_trace_graph(current))
1225 set_tsk_trace_graph(current);
1226
1227 atomic_dec(&data->disabled);
1228 local_irq_restore(flags);
1229
1230 return ret;
1231}
1232
1233void trace_graph_return(struct ftrace_graph_ret *trace)
1234{
1235 struct trace_array *tr = &global_trace;
1236 struct trace_array_cpu *data;
1237 unsigned long flags;
1238 long disabled;
1239 int cpu;
1240 int pc;
1241
1242 local_irq_save(flags);
1243 cpu = raw_smp_processor_id();
1244 data = tr->data[cpu];
1245 disabled = atomic_inc_return(&data->disabled);
1246 if (likely(disabled == 1)) {
1247 pc = preempt_count();
1248 __trace_graph_return(tr, trace, flags, pc);
1249 }
1250 if (!trace->depth)
1251 clear_tsk_trace_graph(current);
1252 atomic_dec(&data->disabled);
1253 local_irq_restore(flags);
1254}
1255#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1256
1257 1348
1258/** 1349/**
1259 * trace_vbprintk - write binary msg to tracing buffer 1350 * trace_vbprintk - write binary msg to tracing buffer
@@ -1261,18 +1352,18 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1261 */ 1352 */
1262int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1353int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1263{ 1354{
1264 static raw_spinlock_t trace_buf_lock = 1355 static arch_spinlock_t trace_buf_lock =
1265 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1356 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1266 static u32 trace_buf[TRACE_BUF_SIZE]; 1357 static u32 trace_buf[TRACE_BUF_SIZE];
1267 1358
1268 struct ftrace_event_call *call = &event_bprint; 1359 struct ftrace_event_call *call = &event_bprint;
1269 struct ring_buffer_event *event; 1360 struct ring_buffer_event *event;
1361 struct ring_buffer *buffer;
1270 struct trace_array *tr = &global_trace; 1362 struct trace_array *tr = &global_trace;
1271 struct trace_array_cpu *data; 1363 struct trace_array_cpu *data;
1272 struct bprint_entry *entry; 1364 struct bprint_entry *entry;
1273 unsigned long flags; 1365 unsigned long flags;
1274 int disable; 1366 int disable;
1275 int resched;
1276 int cpu, len = 0, size, pc; 1367 int cpu, len = 0, size, pc;
1277 1368
1278 if (unlikely(tracing_selftest_running || tracing_disabled)) 1369 if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1282,7 +1373,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1282 pause_graph_tracing(); 1373 pause_graph_tracing();
1283 1374
1284 pc = preempt_count(); 1375 pc = preempt_count();
1285 resched = ftrace_preempt_disable(); 1376 preempt_disable_notrace();
1286 cpu = raw_smp_processor_id(); 1377 cpu = raw_smp_processor_id();
1287 data = tr->data[cpu]; 1378 data = tr->data[cpu];
1288 1379
@@ -1292,14 +1383,16 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1292 1383
1293 /* Lockdep uses trace_printk for lock tracing */ 1384 /* Lockdep uses trace_printk for lock tracing */
1294 local_irq_save(flags); 1385 local_irq_save(flags);
1295 __raw_spin_lock(&trace_buf_lock); 1386 arch_spin_lock(&trace_buf_lock);
1296 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1387 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1297 1388
1298 if (len > TRACE_BUF_SIZE || len < 0) 1389 if (len > TRACE_BUF_SIZE || len < 0)
1299 goto out_unlock; 1390 goto out_unlock;
1300 1391
1301 size = sizeof(*entry) + sizeof(u32) * len; 1392 size = sizeof(*entry) + sizeof(u32) * len;
1302 event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); 1393 buffer = tr->buffer;
1394 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1395 flags, pc);
1303 if (!event) 1396 if (!event)
1304 goto out_unlock; 1397 goto out_unlock;
1305 entry = ring_buffer_event_data(event); 1398 entry = ring_buffer_event_data(event);
@@ -1307,30 +1400,48 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1307 entry->fmt = fmt; 1400 entry->fmt = fmt;
1308 1401
1309 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1402 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1310 if (!filter_check_discard(call, entry, tr->buffer, event)) 1403 if (!filter_check_discard(call, entry, buffer, event)) {
1311 ring_buffer_unlock_commit(tr->buffer, event); 1404 ring_buffer_unlock_commit(buffer, event);
1405 ftrace_trace_stack(buffer, flags, 6, pc);
1406 }
1312 1407
1313out_unlock: 1408out_unlock:
1314 __raw_spin_unlock(&trace_buf_lock); 1409 arch_spin_unlock(&trace_buf_lock);
1315 local_irq_restore(flags); 1410 local_irq_restore(flags);
1316 1411
1317out: 1412out:
1318 atomic_dec_return(&data->disabled); 1413 atomic_dec_return(&data->disabled);
1319 ftrace_preempt_enable(resched); 1414 preempt_enable_notrace();
1320 unpause_graph_tracing(); 1415 unpause_graph_tracing();
1321 1416
1322 return len; 1417 return len;
1323} 1418}
1324EXPORT_SYMBOL_GPL(trace_vbprintk); 1419EXPORT_SYMBOL_GPL(trace_vbprintk);
1325 1420
1326int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 1421int trace_array_printk(struct trace_array *tr,
1422 unsigned long ip, const char *fmt, ...)
1327{ 1423{
1328 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1424 int ret;
1425 va_list ap;
1426
1427 if (!(trace_flags & TRACE_ITER_PRINTK))
1428 return 0;
1429
1430 va_start(ap, fmt);
1431 ret = trace_array_vprintk(tr, ip, fmt, ap);
1432 va_end(ap);
1433 return ret;
1434}
1435
1436int trace_array_vprintk(struct trace_array *tr,
1437 unsigned long ip, const char *fmt, va_list args)
1438{
1439 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1329 static char trace_buf[TRACE_BUF_SIZE]; 1440 static char trace_buf[TRACE_BUF_SIZE];
1330 1441
1331 struct ftrace_event_call *call = &event_print; 1442 struct ftrace_event_call *call = &event_print;
1332 struct ring_buffer_event *event; 1443 struct ring_buffer_event *event;
1333 struct trace_array *tr = &global_trace; 1444 struct ring_buffer *buffer;
1334 struct trace_array_cpu *data; 1445 struct trace_array_cpu *data;
1335 int cpu, len = 0, size, pc; 1446 int cpu, len = 0, size, pc;
1336 struct print_entry *entry; 1447 struct print_entry *entry;
@@ -1351,26 +1462,27 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1351 1462
1352 pause_graph_tracing(); 1463 pause_graph_tracing();
1353 raw_local_irq_save(irq_flags); 1464 raw_local_irq_save(irq_flags);
1354 __raw_spin_lock(&trace_buf_lock); 1465 arch_spin_lock(&trace_buf_lock);
1355 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1466 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1356 1467
1357 len = min(len, TRACE_BUF_SIZE-1);
1358 trace_buf[len] = 0;
1359
1360 size = sizeof(*entry) + len + 1; 1468 size = sizeof(*entry) + len + 1;
1361 event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); 1469 buffer = tr->buffer;
1470 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1471 irq_flags, pc);
1362 if (!event) 1472 if (!event)
1363 goto out_unlock; 1473 goto out_unlock;
1364 entry = ring_buffer_event_data(event); 1474 entry = ring_buffer_event_data(event);
1365 entry->ip = ip; 1475 entry->ip = ip;
1366 1476
1367 memcpy(&entry->buf, trace_buf, len); 1477 memcpy(&entry->buf, trace_buf, len);
1368 entry->buf[len] = 0; 1478 entry->buf[len] = '\0';
1369 if (!filter_check_discard(call, entry, tr->buffer, event)) 1479 if (!filter_check_discard(call, entry, buffer, event)) {
1370 ring_buffer_unlock_commit(tr->buffer, event); 1480 ring_buffer_unlock_commit(buffer, event);
1481 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1482 }
1371 1483
1372 out_unlock: 1484 out_unlock:
1373 __raw_spin_unlock(&trace_buf_lock); 1485 arch_spin_unlock(&trace_buf_lock);
1374 raw_local_irq_restore(irq_flags); 1486 raw_local_irq_restore(irq_flags);
1375 unpause_graph_tracing(); 1487 unpause_graph_tracing();
1376 out: 1488 out:
@@ -1379,12 +1491,12 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1379 1491
1380 return len; 1492 return len;
1381} 1493}
1382EXPORT_SYMBOL_GPL(trace_vprintk);
1383 1494
1384enum trace_file_type { 1495int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1385 TRACE_FILE_LAT_FMT = 1, 1496{
1386 TRACE_FILE_ANNOTATE = 2, 1497 return trace_array_vprintk(&global_trace, ip, fmt, args);
1387}; 1498}
1499EXPORT_SYMBOL_GPL(trace_vprintk);
1388 1500
1389static void trace_iterator_increment(struct trace_iterator *iter) 1501static void trace_iterator_increment(struct trace_iterator *iter)
1390{ 1502{
@@ -1399,7 +1511,8 @@ static void trace_iterator_increment(struct trace_iterator *iter)
1399} 1511}
1400 1512
1401static struct trace_entry * 1513static struct trace_entry *
1402peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) 1514peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1515 unsigned long *lost_events)
1403{ 1516{
1404 struct ring_buffer_event *event; 1517 struct ring_buffer_event *event;
1405 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 1518 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
@@ -1410,7 +1523,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1410 if (buf_iter) 1523 if (buf_iter)
1411 event = ring_buffer_iter_peek(buf_iter, ts); 1524 event = ring_buffer_iter_peek(buf_iter, ts);
1412 else 1525 else
1413 event = ring_buffer_peek(iter->tr->buffer, cpu, ts); 1526 event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
1527 lost_events);
1414 1528
1415 ftrace_enable_cpu(); 1529 ftrace_enable_cpu();
1416 1530
@@ -1418,10 +1532,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1418} 1532}
1419 1533
1420static struct trace_entry * 1534static struct trace_entry *
1421__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1535__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1536 unsigned long *missing_events, u64 *ent_ts)
1422{ 1537{
1423 struct ring_buffer *buffer = iter->tr->buffer; 1538 struct ring_buffer *buffer = iter->tr->buffer;
1424 struct trace_entry *ent, *next = NULL; 1539 struct trace_entry *ent, *next = NULL;
1540 unsigned long lost_events = 0, next_lost = 0;
1425 int cpu_file = iter->cpu_file; 1541 int cpu_file = iter->cpu_file;
1426 u64 next_ts = 0, ts; 1542 u64 next_ts = 0, ts;
1427 int next_cpu = -1; 1543 int next_cpu = -1;
@@ -1434,7 +1550,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1434 if (cpu_file > TRACE_PIPE_ALL_CPU) { 1550 if (cpu_file > TRACE_PIPE_ALL_CPU) {
1435 if (ring_buffer_empty_cpu(buffer, cpu_file)) 1551 if (ring_buffer_empty_cpu(buffer, cpu_file))
1436 return NULL; 1552 return NULL;
1437 ent = peek_next_entry(iter, cpu_file, ent_ts); 1553 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
1438 if (ent_cpu) 1554 if (ent_cpu)
1439 *ent_cpu = cpu_file; 1555 *ent_cpu = cpu_file;
1440 1556
@@ -1446,7 +1562,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1446 if (ring_buffer_empty_cpu(buffer, cpu)) 1562 if (ring_buffer_empty_cpu(buffer, cpu))
1447 continue; 1563 continue;
1448 1564
1449 ent = peek_next_entry(iter, cpu, &ts); 1565 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
1450 1566
1451 /* 1567 /*
1452 * Pick the entry with the smallest timestamp: 1568 * Pick the entry with the smallest timestamp:
@@ -1455,6 +1571,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1455 next = ent; 1571 next = ent;
1456 next_cpu = cpu; 1572 next_cpu = cpu;
1457 next_ts = ts; 1573 next_ts = ts;
1574 next_lost = lost_events;
1458 } 1575 }
1459 } 1576 }
1460 1577
@@ -1464,6 +1581,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1464 if (ent_ts) 1581 if (ent_ts)
1465 *ent_ts = next_ts; 1582 *ent_ts = next_ts;
1466 1583
1584 if (missing_events)
1585 *missing_events = next_lost;
1586
1467 return next; 1587 return next;
1468} 1588}
1469 1589
@@ -1471,13 +1591,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1471struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 1591struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1472 int *ent_cpu, u64 *ent_ts) 1592 int *ent_cpu, u64 *ent_ts)
1473{ 1593{
1474 return __find_next_entry(iter, ent_cpu, ent_ts); 1594 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
1475} 1595}
1476 1596
1477/* Find the next real entry, and increment the iterator to the next entry */ 1597/* Find the next real entry, and increment the iterator to the next entry */
1478static void *find_next_entry_inc(struct trace_iterator *iter) 1598void *trace_find_next_entry_inc(struct trace_iterator *iter)
1479{ 1599{
1480 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); 1600 iter->ent = __find_next_entry(iter, &iter->cpu,
1601 &iter->lost_events, &iter->ts);
1481 1602
1482 if (iter->ent) 1603 if (iter->ent)
1483 trace_iterator_increment(iter); 1604 trace_iterator_increment(iter);
@@ -1489,7 +1610,8 @@ static void trace_consume(struct trace_iterator *iter)
1489{ 1610{
1490 /* Don't allow ftrace to trace into the ring buffers */ 1611 /* Don't allow ftrace to trace into the ring buffers */
1491 ftrace_disable_cpu(); 1612 ftrace_disable_cpu();
1492 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); 1613 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
1614 &iter->lost_events);
1493 ftrace_enable_cpu(); 1615 ftrace_enable_cpu();
1494} 1616}
1495 1617
@@ -1499,6 +1621,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1499 int i = (int)*pos; 1621 int i = (int)*pos;
1500 void *ent; 1622 void *ent;
1501 1623
1624 WARN_ON_ONCE(iter->leftover);
1625
1502 (*pos)++; 1626 (*pos)++;
1503 1627
1504 /* can't go backwards */ 1628 /* can't go backwards */
@@ -1506,25 +1630,50 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1506 return NULL; 1630 return NULL;
1507 1631
1508 if (iter->idx < 0) 1632 if (iter->idx < 0)
1509 ent = find_next_entry_inc(iter); 1633 ent = trace_find_next_entry_inc(iter);
1510 else 1634 else
1511 ent = iter; 1635 ent = iter;
1512 1636
1513 while (ent && iter->idx < i) 1637 while (ent && iter->idx < i)
1514 ent = find_next_entry_inc(iter); 1638 ent = trace_find_next_entry_inc(iter);
1515 1639
1516 iter->pos = *pos; 1640 iter->pos = *pos;
1517 1641
1518 return ent; 1642 return ent;
1519} 1643}
1520 1644
1645void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1646{
1647 struct trace_array *tr = iter->tr;
1648 struct ring_buffer_event *event;
1649 struct ring_buffer_iter *buf_iter;
1650 unsigned long entries = 0;
1651 u64 ts;
1652
1653 tr->data[cpu]->skipped_entries = 0;
1654
1655 if (!iter->buffer_iter[cpu])
1656 return;
1657
1658 buf_iter = iter->buffer_iter[cpu];
1659 ring_buffer_iter_reset(buf_iter);
1660
1661 /*
1662 * We could have the case with the max latency tracers
1663 * that a reset never took place on a cpu. This is evident
1664 * by the timestamp being before the start of the buffer.
1665 */
1666 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
1667 if (ts >= iter->tr->time_start)
1668 break;
1669 entries++;
1670 ring_buffer_read(buf_iter, NULL);
1671 }
1672
1673 tr->data[cpu]->skipped_entries = entries;
1674}
1675
1521/* 1676/*
1522 * No necessary locking here. The worst thing which can
1523 * happen is loosing events consumed at the same time
1524 * by a trace_pipe reader.
1525 * Other than that, we don't risk to crash the ring buffer
1526 * because it serializes the readers.
1527 *
1528 * The current tracer is copied to avoid a global locking 1677 * The current tracer is copied to avoid a global locking
1529 * all around. 1678 * all around.
1530 */ 1679 */
@@ -1556,28 +1705,40 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1556 1705
1557 if (cpu_file == TRACE_PIPE_ALL_CPU) { 1706 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1558 for_each_tracing_cpu(cpu) 1707 for_each_tracing_cpu(cpu)
1559 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1708 tracing_iter_reset(iter, cpu);
1560 } else 1709 } else
1561 ring_buffer_iter_reset(iter->buffer_iter[cpu_file]); 1710 tracing_iter_reset(iter, cpu_file);
1562
1563 1711
1564 ftrace_enable_cpu(); 1712 ftrace_enable_cpu();
1565 1713
1714 iter->leftover = 0;
1566 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1715 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1567 ; 1716 ;
1568 1717
1569 } else { 1718 } else {
1570 l = *pos - 1; 1719 /*
1571 p = s_next(m, p, &l); 1720 * If we overflowed the seq_file before, then we want
1721 * to just reuse the trace_seq buffer again.
1722 */
1723 if (iter->leftover)
1724 p = iter;
1725 else {
1726 l = *pos - 1;
1727 p = s_next(m, p, &l);
1728 }
1572 } 1729 }
1573 1730
1574 trace_event_read_lock(); 1731 trace_event_read_lock();
1732 trace_access_lock(cpu_file);
1575 return p; 1733 return p;
1576} 1734}
1577 1735
1578static void s_stop(struct seq_file *m, void *p) 1736static void s_stop(struct seq_file *m, void *p)
1579{ 1737{
1738 struct trace_iterator *iter = m->private;
1739
1580 atomic_dec(&trace_record_cmdline_disabled); 1740 atomic_dec(&trace_record_cmdline_disabled);
1741 trace_access_unlock(iter->cpu_file);
1581 trace_event_read_unlock(); 1742 trace_event_read_unlock();
1582} 1743}
1583 1744
@@ -1588,10 +1749,10 @@ static void print_lat_help_header(struct seq_file *m)
1588 seq_puts(m, "# | / _----=> need-resched \n"); 1749 seq_puts(m, "# | / _----=> need-resched \n");
1589 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1750 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1590 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1751 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1591 seq_puts(m, "# |||| / \n"); 1752 seq_puts(m, "# |||| /_--=> lock-depth \n");
1592 seq_puts(m, "# ||||| delay \n"); 1753 seq_puts(m, "# |||||/ delay \n");
1593 seq_puts(m, "# cmd pid ||||| time | caller \n"); 1754 seq_puts(m, "# cmd pid |||||| time | caller \n");
1594 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1755 seq_puts(m, "# \\ / |||||| \\ | / \n");
1595} 1756}
1596 1757
1597static void print_func_help_header(struct seq_file *m) 1758static void print_func_help_header(struct seq_file *m)
@@ -1601,23 +1762,39 @@ static void print_func_help_header(struct seq_file *m)
1601} 1762}
1602 1763
1603 1764
1604static void 1765void
1605print_trace_header(struct seq_file *m, struct trace_iterator *iter) 1766print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1606{ 1767{
1607 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1768 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1608 struct trace_array *tr = iter->tr; 1769 struct trace_array *tr = iter->tr;
1609 struct trace_array_cpu *data = tr->data[tr->cpu]; 1770 struct trace_array_cpu *data = tr->data[tr->cpu];
1610 struct tracer *type = current_trace; 1771 struct tracer *type = current_trace;
1611 unsigned long total; 1772 unsigned long entries = 0;
1612 unsigned long entries; 1773 unsigned long total = 0;
1774 unsigned long count;
1613 const char *name = "preemption"; 1775 const char *name = "preemption";
1776 int cpu;
1614 1777
1615 if (type) 1778 if (type)
1616 name = type->name; 1779 name = type->name;
1617 1780
1618 entries = ring_buffer_entries(iter->tr->buffer); 1781
1619 total = entries + 1782 for_each_tracing_cpu(cpu) {
1620 ring_buffer_overruns(iter->tr->buffer); 1783 count = ring_buffer_entries_cpu(tr->buffer, cpu);
1784 /*
1785 * If this buffer has skipped entries, then we hold all
1786 * entries for the trace and we need to ignore the
1787 * ones before the time stamp.
1788 */
1789 if (tr->data[cpu]->skipped_entries) {
1790 count -= tr->data[cpu]->skipped_entries;
1791 /* total is the same as the entries */
1792 total += count;
1793 } else
1794 total += count +
1795 ring_buffer_overrun_cpu(tr->buffer, cpu);
1796 entries += count;
1797 }
1621 1798
1622 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 1799 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1623 name, UTS_RELEASE); 1800 name, UTS_RELEASE);
@@ -1659,7 +1836,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1659 seq_puts(m, "\n# => ended at: "); 1836 seq_puts(m, "\n# => ended at: ");
1660 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 1837 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1661 trace_print_seq(m, &iter->seq); 1838 trace_print_seq(m, &iter->seq);
1662 seq_puts(m, "#\n"); 1839 seq_puts(m, "\n#\n");
1663 } 1840 }
1664 1841
1665 seq_puts(m, "#\n"); 1842 seq_puts(m, "#\n");
@@ -1678,6 +1855,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1678 if (cpumask_test_cpu(iter->cpu, iter->started)) 1855 if (cpumask_test_cpu(iter->cpu, iter->started))
1679 return; 1856 return;
1680 1857
1858 if (iter->tr->data[iter->cpu]->skipped_entries)
1859 return;
1860
1681 cpumask_set_cpu(iter->cpu, iter->started); 1861 cpumask_set_cpu(iter->cpu, iter->started);
1682 1862
1683 /* Don't print started cpu buffer for the first entry of the trace */ 1863 /* Don't print started cpu buffer for the first entry of the trace */
@@ -1710,7 +1890,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1710 } 1890 }
1711 1891
1712 if (event) 1892 if (event)
1713 return event->trace(iter, sym_flags); 1893 return event->funcs->trace(iter, sym_flags, event);
1714 1894
1715 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) 1895 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1716 goto partial; 1896 goto partial;
@@ -1736,7 +1916,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1736 1916
1737 event = ftrace_find_event(entry->type); 1917 event = ftrace_find_event(entry->type);
1738 if (event) 1918 if (event)
1739 return event->raw(iter, 0); 1919 return event->funcs->raw(iter, 0, event);
1740 1920
1741 if (!trace_seq_printf(s, "%d ?\n", entry->type)) 1921 if (!trace_seq_printf(s, "%d ?\n", entry->type))
1742 goto partial; 1922 goto partial;
@@ -1763,7 +1943,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1763 1943
1764 event = ftrace_find_event(entry->type); 1944 event = ftrace_find_event(entry->type);
1765 if (event) { 1945 if (event) {
1766 enum print_line_t ret = event->hex(iter, 0); 1946 enum print_line_t ret = event->funcs->hex(iter, 0, event);
1767 if (ret != TRACE_TYPE_HANDLED) 1947 if (ret != TRACE_TYPE_HANDLED)
1768 return ret; 1948 return ret;
1769 } 1949 }
@@ -1788,10 +1968,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1788 } 1968 }
1789 1969
1790 event = ftrace_find_event(entry->type); 1970 event = ftrace_find_event(entry->type);
1791 return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; 1971 return event ? event->funcs->binary(iter, 0, event) :
1972 TRACE_TYPE_HANDLED;
1792} 1973}
1793 1974
1794static int trace_empty(struct trace_iterator *iter) 1975int trace_empty(struct trace_iterator *iter)
1795{ 1976{
1796 int cpu; 1977 int cpu;
1797 1978
@@ -1822,10 +2003,14 @@ static int trace_empty(struct trace_iterator *iter)
1822} 2003}
1823 2004
1824/* Called with trace_event_read_lock() held. */ 2005/* Called with trace_event_read_lock() held. */
1825static enum print_line_t print_trace_line(struct trace_iterator *iter) 2006enum print_line_t print_trace_line(struct trace_iterator *iter)
1826{ 2007{
1827 enum print_line_t ret; 2008 enum print_line_t ret;
1828 2009
2010 if (iter->lost_events)
2011 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2012 iter->cpu, iter->lost_events);
2013
1829 if (iter->trace && iter->trace->print_line) { 2014 if (iter->trace && iter->trace->print_line) {
1830 ret = iter->trace->print_line(iter); 2015 ret = iter->trace->print_line(iter);
1831 if (ret != TRACE_TYPE_UNHANDLED) 2016 if (ret != TRACE_TYPE_UNHANDLED)
@@ -1854,9 +2039,27 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1854 return print_trace_fmt(iter); 2039 return print_trace_fmt(iter);
1855} 2040}
1856 2041
2042void trace_default_header(struct seq_file *m)
2043{
2044 struct trace_iterator *iter = m->private;
2045
2046 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2047 /* print nothing if the buffers are empty */
2048 if (trace_empty(iter))
2049 return;
2050 print_trace_header(m, iter);
2051 if (!(trace_flags & TRACE_ITER_VERBOSE))
2052 print_lat_help_header(m);
2053 } else {
2054 if (!(trace_flags & TRACE_ITER_VERBOSE))
2055 print_func_help_header(m);
2056 }
2057}
2058
1857static int s_show(struct seq_file *m, void *v) 2059static int s_show(struct seq_file *m, void *v)
1858{ 2060{
1859 struct trace_iterator *iter = v; 2061 struct trace_iterator *iter = v;
2062 int ret;
1860 2063
1861 if (iter->ent == NULL) { 2064 if (iter->ent == NULL) {
1862 if (iter->tr) { 2065 if (iter->tr) {
@@ -1865,26 +2068,36 @@ static int s_show(struct seq_file *m, void *v)
1865 } 2068 }
1866 if (iter->trace && iter->trace->print_header) 2069 if (iter->trace && iter->trace->print_header)
1867 iter->trace->print_header(m); 2070 iter->trace->print_header(m);
1868 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2071 else
1869 /* print nothing if the buffers are empty */ 2072 trace_default_header(m);
1870 if (trace_empty(iter)) 2073
1871 return 0; 2074 } else if (iter->leftover) {
1872 print_trace_header(m, iter); 2075 /*
1873 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2076 * If we filled the seq_file buffer earlier, we
1874 print_lat_help_header(m); 2077 * want to just show it now.
1875 } else { 2078 */
1876 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2079 ret = trace_print_seq(m, &iter->seq);
1877 print_func_help_header(m); 2080
1878 } 2081 /* ret should this time be zero, but you never know */
2082 iter->leftover = ret;
2083
1879 } else { 2084 } else {
1880 print_trace_line(iter); 2085 print_trace_line(iter);
1881 trace_print_seq(m, &iter->seq); 2086 ret = trace_print_seq(m, &iter->seq);
2087 /*
2088 * If we overflow the seq_file buffer, then it will
2089 * ask us for this data again at start up.
2090 * Use that instead.
2091 * ret is 0 if seq_file write succeeded.
2092 * -1 otherwise.
2093 */
2094 iter->leftover = ret;
1882 } 2095 }
1883 2096
1884 return 0; 2097 return 0;
1885} 2098}
1886 2099
1887static struct seq_operations tracer_seq_ops = { 2100static const struct seq_operations tracer_seq_ops = {
1888 .start = s_start, 2101 .start = s_start,
1889 .next = s_next, 2102 .next = s_next,
1890 .stop = s_stop, 2103 .stop = s_stop,
@@ -1919,11 +2132,9 @@ __tracing_open(struct inode *inode, struct file *file)
1919 if (current_trace) 2132 if (current_trace)
1920 *iter->trace = *current_trace; 2133 *iter->trace = *current_trace;
1921 2134
1922 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) 2135 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
1923 goto fail; 2136 goto fail;
1924 2137
1925 cpumask_clear(iter->started);
1926
1927 if (current_trace && current_trace->print_max) 2138 if (current_trace && current_trace->print_max)
1928 iter->tr = &max_tr; 2139 iter->tr = &max_tr;
1929 else 2140 else
@@ -1940,19 +2151,28 @@ __tracing_open(struct inode *inode, struct file *file)
1940 if (ring_buffer_overruns(iter->tr->buffer)) 2151 if (ring_buffer_overruns(iter->tr->buffer))
1941 iter->iter_flags |= TRACE_FILE_ANNOTATE; 2152 iter->iter_flags |= TRACE_FILE_ANNOTATE;
1942 2153
2154 /* stop the trace while dumping */
2155 tracing_stop();
2156
1943 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 2157 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1944 for_each_tracing_cpu(cpu) { 2158 for_each_tracing_cpu(cpu) {
1945
1946 iter->buffer_iter[cpu] = 2159 iter->buffer_iter[cpu] =
1947 ring_buffer_read_start(iter->tr->buffer, cpu); 2160 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2161 }
2162 ring_buffer_read_prepare_sync();
2163 for_each_tracing_cpu(cpu) {
2164 ring_buffer_read_start(iter->buffer_iter[cpu]);
2165 tracing_iter_reset(iter, cpu);
1948 } 2166 }
1949 } else { 2167 } else {
1950 cpu = iter->cpu_file; 2168 cpu = iter->cpu_file;
1951 iter->buffer_iter[cpu] = 2169 iter->buffer_iter[cpu] =
1952 ring_buffer_read_start(iter->tr->buffer, cpu); 2170 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2171 ring_buffer_read_prepare_sync();
2172 ring_buffer_read_start(iter->buffer_iter[cpu]);
2173 tracing_iter_reset(iter, cpu);
1953 } 2174 }
1954 2175
1955 /* TODO stop tracer */
1956 ret = seq_open(file, &tracer_seq_ops); 2176 ret = seq_open(file, &tracer_seq_ops);
1957 if (ret < 0) { 2177 if (ret < 0) {
1958 fail_ret = ERR_PTR(ret); 2178 fail_ret = ERR_PTR(ret);
@@ -1962,9 +2182,6 @@ __tracing_open(struct inode *inode, struct file *file)
1962 m = file->private_data; 2182 m = file->private_data;
1963 m->private = iter; 2183 m->private = iter;
1964 2184
1965 /* stop the trace while dumping */
1966 tracing_stop();
1967
1968 mutex_unlock(&trace_types_lock); 2185 mutex_unlock(&trace_types_lock);
1969 2186
1970 return iter; 2187 return iter;
@@ -1975,6 +2192,7 @@ __tracing_open(struct inode *inode, struct file *file)
1975 ring_buffer_read_finish(iter->buffer_iter[cpu]); 2192 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1976 } 2193 }
1977 free_cpumask_var(iter->started); 2194 free_cpumask_var(iter->started);
2195 tracing_start();
1978 fail: 2196 fail:
1979 mutex_unlock(&trace_types_lock); 2197 mutex_unlock(&trace_types_lock);
1980 kfree(iter->trace); 2198 kfree(iter->trace);
@@ -1994,7 +2212,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
1994 2212
1995static int tracing_release(struct inode *inode, struct file *file) 2213static int tracing_release(struct inode *inode, struct file *file)
1996{ 2214{
1997 struct seq_file *m = (struct seq_file *)file->private_data; 2215 struct seq_file *m = file->private_data;
1998 struct trace_iterator *iter; 2216 struct trace_iterator *iter;
1999 int cpu; 2217 int cpu;
2000 2218
@@ -2031,7 +2249,7 @@ static int tracing_open(struct inode *inode, struct file *file)
2031 2249
2032 /* If this file was open for write, then erase contents */ 2250 /* If this file was open for write, then erase contents */
2033 if ((file->f_mode & FMODE_WRITE) && 2251 if ((file->f_mode & FMODE_WRITE) &&
2034 !(file->f_flags & O_APPEND)) { 2252 (file->f_flags & O_TRUNC)) {
2035 long cpu = (long) inode->i_private; 2253 long cpu = (long) inode->i_private;
2036 2254
2037 if (cpu == TRACE_PIPE_ALL_CPU) 2255 if (cpu == TRACE_PIPE_ALL_CPU)
@@ -2053,25 +2271,23 @@ static int tracing_open(struct inode *inode, struct file *file)
2053static void * 2271static void *
2054t_next(struct seq_file *m, void *v, loff_t *pos) 2272t_next(struct seq_file *m, void *v, loff_t *pos)
2055{ 2273{
2056 struct tracer *t = m->private; 2274 struct tracer *t = v;
2057 2275
2058 (*pos)++; 2276 (*pos)++;
2059 2277
2060 if (t) 2278 if (t)
2061 t = t->next; 2279 t = t->next;
2062 2280
2063 m->private = t;
2064
2065 return t; 2281 return t;
2066} 2282}
2067 2283
2068static void *t_start(struct seq_file *m, loff_t *pos) 2284static void *t_start(struct seq_file *m, loff_t *pos)
2069{ 2285{
2070 struct tracer *t = m->private; 2286 struct tracer *t;
2071 loff_t l = 0; 2287 loff_t l = 0;
2072 2288
2073 mutex_lock(&trace_types_lock); 2289 mutex_lock(&trace_types_lock);
2074 for (; t && l < *pos; t = t_next(m, t, &l)) 2290 for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2075 ; 2291 ;
2076 2292
2077 return t; 2293 return t;
@@ -2098,7 +2314,7 @@ static int t_show(struct seq_file *m, void *v)
2098 return 0; 2314 return 0;
2099} 2315}
2100 2316
2101static struct seq_operations show_traces_seq_ops = { 2317static const struct seq_operations show_traces_seq_ops = {
2102 .start = t_start, 2318 .start = t_start,
2103 .next = t_next, 2319 .next = t_next,
2104 .stop = t_stop, 2320 .stop = t_stop,
@@ -2107,18 +2323,10 @@ static struct seq_operations show_traces_seq_ops = {
2107 2323
2108static int show_traces_open(struct inode *inode, struct file *file) 2324static int show_traces_open(struct inode *inode, struct file *file)
2109{ 2325{
2110 int ret;
2111
2112 if (tracing_disabled) 2326 if (tracing_disabled)
2113 return -ENODEV; 2327 return -ENODEV;
2114 2328
2115 ret = seq_open(file, &show_traces_seq_ops); 2329 return seq_open(file, &show_traces_seq_ops);
2116 if (!ret) {
2117 struct seq_file *m = file->private_data;
2118 m->private = trace_types;
2119 }
2120
2121 return ret;
2122} 2330}
2123 2331
2124static ssize_t 2332static ssize_t
@@ -2128,11 +2336,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,
2128 return count; 2336 return count;
2129} 2337}
2130 2338
2339static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
2340{
2341 if (file->f_mode & FMODE_READ)
2342 return seq_lseek(file, offset, origin);
2343 else
2344 return 0;
2345}
2346
2131static const struct file_operations tracing_fops = { 2347static const struct file_operations tracing_fops = {
2132 .open = tracing_open, 2348 .open = tracing_open,
2133 .read = seq_read, 2349 .read = seq_read,
2134 .write = tracing_write_stub, 2350 .write = tracing_write_stub,
2135 .llseek = seq_lseek, 2351 .llseek = tracing_seek,
2136 .release = tracing_release, 2352 .release = tracing_release,
2137}; 2353};
2138 2354
@@ -2140,6 +2356,7 @@ static const struct file_operations show_traces_fops = {
2140 .open = show_traces_open, 2356 .open = show_traces_open,
2141 .read = seq_read, 2357 .read = seq_read,
2142 .release = seq_release, 2358 .release = seq_release,
2359 .llseek = seq_lseek,
2143}; 2360};
2144 2361
2145/* 2362/*
@@ -2198,7 +2415,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2198 mutex_lock(&tracing_cpumask_update_lock); 2415 mutex_lock(&tracing_cpumask_update_lock);
2199 2416
2200 local_irq_disable(); 2417 local_irq_disable();
2201 __raw_spin_lock(&ftrace_max_lock); 2418 arch_spin_lock(&ftrace_max_lock);
2202 for_each_tracing_cpu(cpu) { 2419 for_each_tracing_cpu(cpu) {
2203 /* 2420 /*
2204 * Increase/decrease the disabled counter if we are 2421 * Increase/decrease the disabled counter if we are
@@ -2213,7 +2430,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2213 atomic_dec(&global_trace.data[cpu]->disabled); 2430 atomic_dec(&global_trace.data[cpu]->disabled);
2214 } 2431 }
2215 } 2432 }
2216 __raw_spin_unlock(&ftrace_max_lock); 2433 arch_spin_unlock(&ftrace_max_lock);
2217 local_irq_enable(); 2434 local_irq_enable();
2218 2435
2219 cpumask_copy(tracing_cpumask, tracing_cpumask_new); 2436 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2233,103 +2450,70 @@ static const struct file_operations tracing_cpumask_fops = {
2233 .open = tracing_open_generic, 2450 .open = tracing_open_generic,
2234 .read = tracing_cpumask_read, 2451 .read = tracing_cpumask_read,
2235 .write = tracing_cpumask_write, 2452 .write = tracing_cpumask_write,
2453 .llseek = generic_file_llseek,
2236}; 2454};
2237 2455
2238static ssize_t 2456static int tracing_trace_options_show(struct seq_file *m, void *v)
2239tracing_trace_options_read(struct file *filp, char __user *ubuf,
2240 size_t cnt, loff_t *ppos)
2241{ 2457{
2242 struct tracer_opt *trace_opts; 2458 struct tracer_opt *trace_opts;
2243 u32 tracer_flags; 2459 u32 tracer_flags;
2244 int len = 0;
2245 char *buf;
2246 int r = 0;
2247 int i; 2460 int i;
2248 2461
2249
2250 /* calculate max size */
2251 for (i = 0; trace_options[i]; i++) {
2252 len += strlen(trace_options[i]);
2253 len += 3; /* "no" and newline */
2254 }
2255
2256 mutex_lock(&trace_types_lock); 2462 mutex_lock(&trace_types_lock);
2257 tracer_flags = current_trace->flags->val; 2463 tracer_flags = current_trace->flags->val;
2258 trace_opts = current_trace->flags->opts; 2464 trace_opts = current_trace->flags->opts;
2259 2465
2260 /*
2261 * Increase the size with names of options specific
2262 * of the current tracer.
2263 */
2264 for (i = 0; trace_opts[i].name; i++) {
2265 len += strlen(trace_opts[i].name);
2266 len += 3; /* "no" and newline */
2267 }
2268
2269 /* +2 for \n and \0 */
2270 buf = kmalloc(len + 2, GFP_KERNEL);
2271 if (!buf) {
2272 mutex_unlock(&trace_types_lock);
2273 return -ENOMEM;
2274 }
2275
2276 for (i = 0; trace_options[i]; i++) { 2466 for (i = 0; trace_options[i]; i++) {
2277 if (trace_flags & (1 << i)) 2467 if (trace_flags & (1 << i))
2278 r += sprintf(buf + r, "%s\n", trace_options[i]); 2468 seq_printf(m, "%s\n", trace_options[i]);
2279 else 2469 else
2280 r += sprintf(buf + r, "no%s\n", trace_options[i]); 2470 seq_printf(m, "no%s\n", trace_options[i]);
2281 } 2471 }
2282 2472
2283 for (i = 0; trace_opts[i].name; i++) { 2473 for (i = 0; trace_opts[i].name; i++) {
2284 if (tracer_flags & trace_opts[i].bit) 2474 if (tracer_flags & trace_opts[i].bit)
2285 r += sprintf(buf + r, "%s\n", 2475 seq_printf(m, "%s\n", trace_opts[i].name);
2286 trace_opts[i].name);
2287 else 2476 else
2288 r += sprintf(buf + r, "no%s\n", 2477 seq_printf(m, "no%s\n", trace_opts[i].name);
2289 trace_opts[i].name);
2290 } 2478 }
2291 mutex_unlock(&trace_types_lock); 2479 mutex_unlock(&trace_types_lock);
2292 2480
2293 WARN_ON(r >= len + 2); 2481 return 0;
2482}
2294 2483
2295 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2484static int __set_tracer_option(struct tracer *trace,
2485 struct tracer_flags *tracer_flags,
2486 struct tracer_opt *opts, int neg)
2487{
2488 int ret;
2296 2489
2297 kfree(buf); 2490 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2298 return r; 2491 if (ret)
2492 return ret;
2493
2494 if (neg)
2495 tracer_flags->val &= ~opts->bit;
2496 else
2497 tracer_flags->val |= opts->bit;
2498 return 0;
2299} 2499}
2300 2500
2301/* Try to assign a tracer specific option */ 2501/* Try to assign a tracer specific option */
2302static int set_tracer_option(struct tracer *trace, char *cmp, int neg) 2502static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2303{ 2503{
2304 struct tracer_flags *trace_flags = trace->flags; 2504 struct tracer_flags *tracer_flags = trace->flags;
2305 struct tracer_opt *opts = NULL; 2505 struct tracer_opt *opts = NULL;
2306 int ret = 0, i = 0; 2506 int i;
2307 int len;
2308 2507
2309 for (i = 0; trace_flags->opts[i].name; i++) { 2508 for (i = 0; tracer_flags->opts[i].name; i++) {
2310 opts = &trace_flags->opts[i]; 2509 opts = &tracer_flags->opts[i];
2311 len = strlen(opts->name);
2312 2510
2313 if (strncmp(cmp, opts->name, len) == 0) { 2511 if (strcmp(cmp, opts->name) == 0)
2314 ret = trace->set_flag(trace_flags->val, 2512 return __set_tracer_option(trace, trace->flags,
2315 opts->bit, !neg); 2513 opts, neg);
2316 break;
2317 }
2318 } 2514 }
2319 /* Not found */
2320 if (!trace_flags->opts[i].name)
2321 return -EINVAL;
2322
2323 /* Refused to handle */
2324 if (ret)
2325 return ret;
2326 2515
2327 if (neg) 2516 return -EINVAL;
2328 trace_flags->val &= ~opts->bit;
2329 else
2330 trace_flags->val |= opts->bit;
2331
2332 return 0;
2333} 2517}
2334 2518
2335static void set_tracer_flags(unsigned int mask, int enabled) 2519static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2343,21 +2527,8 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2343 else 2527 else
2344 trace_flags &= ~mask; 2528 trace_flags &= ~mask;
2345 2529
2346 if (mask == TRACE_ITER_GLOBAL_CLK) { 2530 if (mask == TRACE_ITER_RECORD_CMD)
2347 u64 (*func)(void); 2531 trace_event_enable_cmd_record(enabled);
2348
2349 if (enabled)
2350 func = trace_clock_global;
2351 else
2352 func = trace_clock_local;
2353
2354 mutex_lock(&trace_types_lock);
2355 ring_buffer_set_clock(global_trace.buffer, func);
2356
2357 if (max_tr.buffer)
2358 ring_buffer_set_clock(max_tr.buffer, func);
2359 mutex_unlock(&trace_types_lock);
2360 }
2361} 2532}
2362 2533
2363static ssize_t 2534static ssize_t
@@ -2365,7 +2536,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2365 size_t cnt, loff_t *ppos) 2536 size_t cnt, loff_t *ppos)
2366{ 2537{
2367 char buf[64]; 2538 char buf[64];
2368 char *cmp = buf; 2539 char *cmp;
2369 int neg = 0; 2540 int neg = 0;
2370 int ret; 2541 int ret;
2371 int i; 2542 int i;
@@ -2377,16 +2548,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2377 return -EFAULT; 2548 return -EFAULT;
2378 2549
2379 buf[cnt] = 0; 2550 buf[cnt] = 0;
2551 cmp = strstrip(buf);
2380 2552
2381 if (strncmp(buf, "no", 2) == 0) { 2553 if (strncmp(cmp, "no", 2) == 0) {
2382 neg = 1; 2554 neg = 1;
2383 cmp += 2; 2555 cmp += 2;
2384 } 2556 }
2385 2557
2386 for (i = 0; trace_options[i]; i++) { 2558 for (i = 0; trace_options[i]; i++) {
2387 int len = strlen(trace_options[i]); 2559 if (strcmp(cmp, trace_options[i]) == 0) {
2388
2389 if (strncmp(cmp, trace_options[i], len) == 0) {
2390 set_tracer_flags(1 << i, !neg); 2560 set_tracer_flags(1 << i, !neg);
2391 break; 2561 break;
2392 } 2562 }
@@ -2401,14 +2571,23 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2401 return ret; 2571 return ret;
2402 } 2572 }
2403 2573
2404 filp->f_pos += cnt; 2574 *ppos += cnt;
2405 2575
2406 return cnt; 2576 return cnt;
2407} 2577}
2408 2578
2579static int tracing_trace_options_open(struct inode *inode, struct file *file)
2580{
2581 if (tracing_disabled)
2582 return -ENODEV;
2583 return single_open(file, tracing_trace_options_show, NULL);
2584}
2585
2409static const struct file_operations tracing_iter_fops = { 2586static const struct file_operations tracing_iter_fops = {
2410 .open = tracing_open_generic, 2587 .open = tracing_trace_options_open,
2411 .read = tracing_trace_options_read, 2588 .read = seq_read,
2589 .llseek = seq_lseek,
2590 .release = single_release,
2412 .write = tracing_trace_options_write, 2591 .write = tracing_trace_options_write,
2413}; 2592};
2414 2593
@@ -2441,6 +2620,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
2441static const struct file_operations tracing_readme_fops = { 2620static const struct file_operations tracing_readme_fops = {
2442 .open = tracing_open_generic, 2621 .open = tracing_open_generic,
2443 .read = tracing_readme_read, 2622 .read = tracing_readme_read,
2623 .llseek = generic_file_llseek,
2444}; 2624};
2445 2625
2446static ssize_t 2626static ssize_t
@@ -2491,6 +2671,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2491static const struct file_operations tracing_saved_cmdlines_fops = { 2671static const struct file_operations tracing_saved_cmdlines_fops = {
2492 .open = tracing_open_generic, 2672 .open = tracing_open_generic,
2493 .read = tracing_saved_cmdlines_read, 2673 .read = tracing_saved_cmdlines_read,
2674 .llseek = generic_file_llseek,
2494}; 2675};
2495 2676
2496static ssize_t 2677static ssize_t
@@ -2543,7 +2724,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2543 } 2724 }
2544 mutex_unlock(&trace_types_lock); 2725 mutex_unlock(&trace_types_lock);
2545 2726
2546 filp->f_pos += cnt; 2727 *ppos += cnt;
2547 2728
2548 return cnt; 2729 return cnt;
2549} 2730}
@@ -2552,7 +2733,7 @@ static ssize_t
2552tracing_set_trace_read(struct file *filp, char __user *ubuf, 2733tracing_set_trace_read(struct file *filp, char __user *ubuf,
2553 size_t cnt, loff_t *ppos) 2734 size_t cnt, loff_t *ppos)
2554{ 2735{
2555 char buf[max_tracer_type_len+2]; 2736 char buf[MAX_TRACER_SIZE+2];
2556 int r; 2737 int r;
2557 2738
2558 mutex_lock(&trace_types_lock); 2739 mutex_lock(&trace_types_lock);
@@ -2586,6 +2767,9 @@ static int tracing_resize_ring_buffer(unsigned long size)
2586 if (ret < 0) 2767 if (ret < 0)
2587 return ret; 2768 return ret;
2588 2769
2770 if (!current_trace->use_max_tr)
2771 goto out;
2772
2589 ret = ring_buffer_resize(max_tr.buffer, size); 2773 ret = ring_buffer_resize(max_tr.buffer, size);
2590 if (ret < 0) { 2774 if (ret < 0) {
2591 int r; 2775 int r;
@@ -2613,11 +2797,14 @@ static int tracing_resize_ring_buffer(unsigned long size)
2613 return ret; 2797 return ret;
2614 } 2798 }
2615 2799
2800 max_tr.entries = size;
2801 out:
2616 global_trace.entries = size; 2802 global_trace.entries = size;
2617 2803
2618 return ret; 2804 return ret;
2619} 2805}
2620 2806
2807
2621/** 2808/**
2622 * tracing_update_buffers - used by tracing facility to expand ring buffers 2809 * tracing_update_buffers - used by tracing facility to expand ring buffers
2623 * 2810 *
@@ -2678,12 +2865,26 @@ static int tracing_set_tracer(const char *buf)
2678 trace_branch_disable(); 2865 trace_branch_disable();
2679 if (current_trace && current_trace->reset) 2866 if (current_trace && current_trace->reset)
2680 current_trace->reset(tr); 2867 current_trace->reset(tr);
2681 2868 if (current_trace && current_trace->use_max_tr) {
2869 /*
2870 * We don't free the ring buffer. instead, resize it because
2871 * The max_tr ring buffer has some state (e.g. ring->clock) and
2872 * we want preserve it.
2873 */
2874 ring_buffer_resize(max_tr.buffer, 1);
2875 max_tr.entries = 1;
2876 }
2682 destroy_trace_option_files(topts); 2877 destroy_trace_option_files(topts);
2683 2878
2684 current_trace = t; 2879 current_trace = t;
2685 2880
2686 topts = create_trace_option_files(current_trace); 2881 topts = create_trace_option_files(current_trace);
2882 if (current_trace->use_max_tr) {
2883 ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
2884 if (ret < 0)
2885 goto out;
2886 max_tr.entries = global_trace.entries;
2887 }
2687 2888
2688 if (t->init) { 2889 if (t->init) {
2689 ret = tracer_init(t, tr); 2890 ret = tracer_init(t, tr);
@@ -2702,15 +2903,15 @@ static ssize_t
2702tracing_set_trace_write(struct file *filp, const char __user *ubuf, 2903tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2703 size_t cnt, loff_t *ppos) 2904 size_t cnt, loff_t *ppos)
2704{ 2905{
2705 char buf[max_tracer_type_len+1]; 2906 char buf[MAX_TRACER_SIZE+1];
2706 int i; 2907 int i;
2707 size_t ret; 2908 size_t ret;
2708 int err; 2909 int err;
2709 2910
2710 ret = cnt; 2911 ret = cnt;
2711 2912
2712 if (cnt > max_tracer_type_len) 2913 if (cnt > MAX_TRACER_SIZE)
2713 cnt = max_tracer_type_len; 2914 cnt = MAX_TRACER_SIZE;
2714 2915
2715 if (copy_from_user(&buf, ubuf, cnt)) 2916 if (copy_from_user(&buf, ubuf, cnt))
2716 return -EFAULT; 2917 return -EFAULT;
@@ -2725,7 +2926,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2725 if (err) 2926 if (err)
2726 return err; 2927 return err;
2727 2928
2728 filp->f_pos += ret; 2929 *ppos += ret;
2729 2930
2730 return ret; 2931 return ret;
2731} 2932}
@@ -2782,22 +2983,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2782 2983
2783 mutex_lock(&trace_types_lock); 2984 mutex_lock(&trace_types_lock);
2784 2985
2785 /* We only allow one reader per cpu */
2786 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2787 if (!cpumask_empty(tracing_reader_cpumask)) {
2788 ret = -EBUSY;
2789 goto out;
2790 }
2791 cpumask_setall(tracing_reader_cpumask);
2792 } else {
2793 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2794 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2795 else {
2796 ret = -EBUSY;
2797 goto out;
2798 }
2799 }
2800
2801 /* create a buffer to store the information to pass to userspace */ 2986 /* create a buffer to store the information to pass to userspace */
2802 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2987 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2803 if (!iter) { 2988 if (!iter) {
@@ -2836,6 +3021,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2836 if (iter->trace->pipe_open) 3021 if (iter->trace->pipe_open)
2837 iter->trace->pipe_open(iter); 3022 iter->trace->pipe_open(iter);
2838 3023
3024 nonseekable_open(inode, filp);
2839out: 3025out:
2840 mutex_unlock(&trace_types_lock); 3026 mutex_unlock(&trace_types_lock);
2841 return ret; 3027 return ret;
@@ -2853,10 +3039,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2853 3039
2854 mutex_lock(&trace_types_lock); 3040 mutex_lock(&trace_types_lock);
2855 3041
2856 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) 3042 if (iter->trace->pipe_close)
2857 cpumask_clear(tracing_reader_cpumask); 3043 iter->trace->pipe_close(iter);
2858 else
2859 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2860 3044
2861 mutex_unlock(&trace_types_lock); 3045 mutex_unlock(&trace_types_lock);
2862 3046
@@ -3016,7 +3200,8 @@ waitagain:
3016 iter->pos = -1; 3200 iter->pos = -1;
3017 3201
3018 trace_event_read_lock(); 3202 trace_event_read_lock();
3019 while (find_next_entry_inc(iter) != NULL) { 3203 trace_access_lock(iter->cpu_file);
3204 while (trace_find_next_entry_inc(iter) != NULL) {
3020 enum print_line_t ret; 3205 enum print_line_t ret;
3021 int len = iter->seq.len; 3206 int len = iter->seq.len;
3022 3207
@@ -3032,6 +3217,7 @@ waitagain:
3032 if (iter->seq.len >= cnt) 3217 if (iter->seq.len >= cnt)
3033 break; 3218 break;
3034 } 3219 }
3220 trace_access_unlock(iter->cpu_file);
3035 trace_event_read_unlock(); 3221 trace_event_read_unlock();
3036 3222
3037 /* Now copy what we have to the user */ 3223 /* Now copy what we have to the user */
@@ -3064,7 +3250,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3064 __free_page(spd->pages[idx]); 3250 __free_page(spd->pages[idx]);
3065} 3251}
3066 3252
3067static struct pipe_buf_operations tracing_pipe_buf_ops = { 3253static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3068 .can_merge = 0, 3254 .can_merge = 0,
3069 .map = generic_pipe_buf_map, 3255 .map = generic_pipe_buf_map,
3070 .unmap = generic_pipe_buf_unmap, 3256 .unmap = generic_pipe_buf_unmap,
@@ -3095,9 +3281,10 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3095 break; 3281 break;
3096 } 3282 }
3097 3283
3098 trace_consume(iter); 3284 if (ret != TRACE_TYPE_NO_CONSUME)
3285 trace_consume(iter);
3099 rem -= count; 3286 rem -= count;
3100 if (!find_next_entry_inc(iter)) { 3287 if (!trace_find_next_entry_inc(iter)) {
3101 rem = 0; 3288 rem = 0;
3102 iter->ent = NULL; 3289 iter->ent = NULL;
3103 break; 3290 break;
@@ -3113,12 +3300,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3113 size_t len, 3300 size_t len,
3114 unsigned int flags) 3301 unsigned int flags)
3115{ 3302{
3116 struct page *pages[PIPE_BUFFERS]; 3303 struct page *pages_def[PIPE_DEF_BUFFERS];
3117 struct partial_page partial[PIPE_BUFFERS]; 3304 struct partial_page partial_def[PIPE_DEF_BUFFERS];
3118 struct trace_iterator *iter = filp->private_data; 3305 struct trace_iterator *iter = filp->private_data;
3119 struct splice_pipe_desc spd = { 3306 struct splice_pipe_desc spd = {
3120 .pages = pages, 3307 .pages = pages_def,
3121 .partial = partial, 3308 .partial = partial_def,
3122 .nr_pages = 0, /* This gets updated below. */ 3309 .nr_pages = 0, /* This gets updated below. */
3123 .flags = flags, 3310 .flags = flags,
3124 .ops = &tracing_pipe_buf_ops, 3311 .ops = &tracing_pipe_buf_ops,
@@ -3129,6 +3316,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3129 size_t rem; 3316 size_t rem;
3130 unsigned int i; 3317 unsigned int i;
3131 3318
3319 if (splice_grow_spd(pipe, &spd))
3320 return -ENOMEM;
3321
3132 /* copy the tracer to avoid using a global lock all around */ 3322 /* copy the tracer to avoid using a global lock all around */
3133 mutex_lock(&trace_types_lock); 3323 mutex_lock(&trace_types_lock);
3134 if (unlikely(old_tracer != current_trace && current_trace)) { 3324 if (unlikely(old_tracer != current_trace && current_trace)) {
@@ -3150,46 +3340,50 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3150 if (ret <= 0) 3340 if (ret <= 0)
3151 goto out_err; 3341 goto out_err;
3152 3342
3153 if (!iter->ent && !find_next_entry_inc(iter)) { 3343 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
3154 ret = -EFAULT; 3344 ret = -EFAULT;
3155 goto out_err; 3345 goto out_err;
3156 } 3346 }
3157 3347
3158 trace_event_read_lock(); 3348 trace_event_read_lock();
3349 trace_access_lock(iter->cpu_file);
3159 3350
3160 /* Fill as many pages as possible. */ 3351 /* Fill as many pages as possible. */
3161 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3352 for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
3162 pages[i] = alloc_page(GFP_KERNEL); 3353 spd.pages[i] = alloc_page(GFP_KERNEL);
3163 if (!pages[i]) 3354 if (!spd.pages[i])
3164 break; 3355 break;
3165 3356
3166 rem = tracing_fill_pipe_page(rem, iter); 3357 rem = tracing_fill_pipe_page(rem, iter);
3167 3358
3168 /* Copy the data into the page, so we can start over. */ 3359 /* Copy the data into the page, so we can start over. */
3169 ret = trace_seq_to_buffer(&iter->seq, 3360 ret = trace_seq_to_buffer(&iter->seq,
3170 page_address(pages[i]), 3361 page_address(spd.pages[i]),
3171 iter->seq.len); 3362 iter->seq.len);
3172 if (ret < 0) { 3363 if (ret < 0) {
3173 __free_page(pages[i]); 3364 __free_page(spd.pages[i]);
3174 break; 3365 break;
3175 } 3366 }
3176 partial[i].offset = 0; 3367 spd.partial[i].offset = 0;
3177 partial[i].len = iter->seq.len; 3368 spd.partial[i].len = iter->seq.len;
3178 3369
3179 trace_seq_init(&iter->seq); 3370 trace_seq_init(&iter->seq);
3180 } 3371 }
3181 3372
3373 trace_access_unlock(iter->cpu_file);
3182 trace_event_read_unlock(); 3374 trace_event_read_unlock();
3183 mutex_unlock(&iter->mutex); 3375 mutex_unlock(&iter->mutex);
3184 3376
3185 spd.nr_pages = i; 3377 spd.nr_pages = i;
3186 3378
3187 return splice_to_pipe(pipe, &spd); 3379 ret = splice_to_pipe(pipe, &spd);
3380out:
3381 splice_shrink_spd(pipe, &spd);
3382 return ret;
3188 3383
3189out_err: 3384out_err:
3190 mutex_unlock(&iter->mutex); 3385 mutex_unlock(&iter->mutex);
3191 3386 goto out;
3192 return ret;
3193} 3387}
3194 3388
3195static ssize_t 3389static ssize_t
@@ -3259,7 +3453,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3259 } 3453 }
3260 } 3454 }
3261 3455
3262 filp->f_pos += cnt; 3456 *ppos += cnt;
3263 3457
3264 /* If check pages failed, return ENOMEM */ 3458 /* If check pages failed, return ENOMEM */
3265 if (tracing_disabled) 3459 if (tracing_disabled)
@@ -3273,7 +3467,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3273 } 3467 }
3274 3468
3275 tracing_start(); 3469 tracing_start();
3276 max_tr.entries = global_trace.entries;
3277 mutex_unlock(&trace_types_lock); 3470 mutex_unlock(&trace_types_lock);
3278 3471
3279 return cnt; 3472 return cnt;
@@ -3294,7 +3487,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3294 size_t cnt, loff_t *fpos) 3487 size_t cnt, loff_t *fpos)
3295{ 3488{
3296 char *buf; 3489 char *buf;
3297 char *end; 3490 size_t written;
3298 3491
3299 if (tracing_disabled) 3492 if (tracing_disabled)
3300 return -EINVAL; 3493 return -EINVAL;
@@ -3302,7 +3495,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3302 if (cnt > TRACE_BUF_SIZE) 3495 if (cnt > TRACE_BUF_SIZE)
3303 cnt = TRACE_BUF_SIZE; 3496 cnt = TRACE_BUF_SIZE;
3304 3497
3305 buf = kmalloc(cnt + 1, GFP_KERNEL); 3498 buf = kmalloc(cnt + 2, GFP_KERNEL);
3306 if (buf == NULL) 3499 if (buf == NULL)
3307 return -ENOMEM; 3500 return -ENOMEM;
3308 3501
@@ -3310,36 +3503,102 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3310 kfree(buf); 3503 kfree(buf);
3311 return -EFAULT; 3504 return -EFAULT;
3312 } 3505 }
3506 if (buf[cnt-1] != '\n') {
3507 buf[cnt] = '\n';
3508 buf[cnt+1] = '\0';
3509 } else
3510 buf[cnt] = '\0';
3313 3511
3314 /* Cut from the first nil or newline. */ 3512 written = mark_printk("%s", buf);
3315 buf[cnt] = '\0';
3316 end = strchr(buf, '\n');
3317 if (end)
3318 *end = '\0';
3319
3320 cnt = mark_printk("%s\n", buf);
3321 kfree(buf); 3513 kfree(buf);
3514 *fpos += written;
3515
3516 /* don't tell userspace we wrote more - it might confuse them */
3517 if (written > cnt)
3518 written = cnt;
3519
3520 return written;
3521}
3522
3523static int tracing_clock_show(struct seq_file *m, void *v)
3524{
3525 int i;
3526
3527 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3528 seq_printf(m,
3529 "%s%s%s%s", i ? " " : "",
3530 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3531 i == trace_clock_id ? "]" : "");
3532 seq_putc(m, '\n');
3533
3534 return 0;
3535}
3536
3537static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3538 size_t cnt, loff_t *fpos)
3539{
3540 char buf[64];
3541 const char *clockstr;
3542 int i;
3543
3544 if (cnt >= sizeof(buf))
3545 return -EINVAL;
3546
3547 if (copy_from_user(&buf, ubuf, cnt))
3548 return -EFAULT;
3549
3550 buf[cnt] = 0;
3551
3552 clockstr = strstrip(buf);
3553
3554 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
3555 if (strcmp(trace_clocks[i].name, clockstr) == 0)
3556 break;
3557 }
3558 if (i == ARRAY_SIZE(trace_clocks))
3559 return -EINVAL;
3560
3561 trace_clock_id = i;
3562
3563 mutex_lock(&trace_types_lock);
3564
3565 ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
3566 if (max_tr.buffer)
3567 ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
3568
3569 mutex_unlock(&trace_types_lock);
3570
3322 *fpos += cnt; 3571 *fpos += cnt;
3323 3572
3324 return cnt; 3573 return cnt;
3325} 3574}
3326 3575
3576static int tracing_clock_open(struct inode *inode, struct file *file)
3577{
3578 if (tracing_disabled)
3579 return -ENODEV;
3580 return single_open(file, tracing_clock_show, NULL);
3581}
3582
3327static const struct file_operations tracing_max_lat_fops = { 3583static const struct file_operations tracing_max_lat_fops = {
3328 .open = tracing_open_generic, 3584 .open = tracing_open_generic,
3329 .read = tracing_max_lat_read, 3585 .read = tracing_max_lat_read,
3330 .write = tracing_max_lat_write, 3586 .write = tracing_max_lat_write,
3587 .llseek = generic_file_llseek,
3331}; 3588};
3332 3589
3333static const struct file_operations tracing_ctrl_fops = { 3590static const struct file_operations tracing_ctrl_fops = {
3334 .open = tracing_open_generic, 3591 .open = tracing_open_generic,
3335 .read = tracing_ctrl_read, 3592 .read = tracing_ctrl_read,
3336 .write = tracing_ctrl_write, 3593 .write = tracing_ctrl_write,
3594 .llseek = generic_file_llseek,
3337}; 3595};
3338 3596
3339static const struct file_operations set_tracer_fops = { 3597static const struct file_operations set_tracer_fops = {
3340 .open = tracing_open_generic, 3598 .open = tracing_open_generic,
3341 .read = tracing_set_trace_read, 3599 .read = tracing_set_trace_read,
3342 .write = tracing_set_trace_write, 3600 .write = tracing_set_trace_write,
3601 .llseek = generic_file_llseek,
3343}; 3602};
3344 3603
3345static const struct file_operations tracing_pipe_fops = { 3604static const struct file_operations tracing_pipe_fops = {
@@ -3348,17 +3607,28 @@ static const struct file_operations tracing_pipe_fops = {
3348 .read = tracing_read_pipe, 3607 .read = tracing_read_pipe,
3349 .splice_read = tracing_splice_read_pipe, 3608 .splice_read = tracing_splice_read_pipe,
3350 .release = tracing_release_pipe, 3609 .release = tracing_release_pipe,
3610 .llseek = no_llseek,
3351}; 3611};
3352 3612
3353static const struct file_operations tracing_entries_fops = { 3613static const struct file_operations tracing_entries_fops = {
3354 .open = tracing_open_generic, 3614 .open = tracing_open_generic,
3355 .read = tracing_entries_read, 3615 .read = tracing_entries_read,
3356 .write = tracing_entries_write, 3616 .write = tracing_entries_write,
3617 .llseek = generic_file_llseek,
3357}; 3618};
3358 3619
3359static const struct file_operations tracing_mark_fops = { 3620static const struct file_operations tracing_mark_fops = {
3360 .open = tracing_open_generic, 3621 .open = tracing_open_generic,
3361 .write = tracing_mark_write, 3622 .write = tracing_mark_write,
3623 .llseek = generic_file_llseek,
3624};
3625
3626static const struct file_operations trace_clock_fops = {
3627 .open = tracing_clock_open,
3628 .read = seq_read,
3629 .llseek = seq_lseek,
3630 .release = single_release,
3631 .write = tracing_clock_write,
3362}; 3632};
3363 3633
3364struct ftrace_buffer_info { 3634struct ftrace_buffer_info {
@@ -3396,7 +3666,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3396 size_t count, loff_t *ppos) 3666 size_t count, loff_t *ppos)
3397{ 3667{
3398 struct ftrace_buffer_info *info = filp->private_data; 3668 struct ftrace_buffer_info *info = filp->private_data;
3399 unsigned int pos;
3400 ssize_t ret; 3669 ssize_t ret;
3401 size_t size; 3670 size_t size;
3402 3671
@@ -3414,18 +3683,15 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3414 3683
3415 info->read = 0; 3684 info->read = 0;
3416 3685
3686 trace_access_lock(info->cpu);
3417 ret = ring_buffer_read_page(info->tr->buffer, 3687 ret = ring_buffer_read_page(info->tr->buffer,
3418 &info->spare, 3688 &info->spare,
3419 count, 3689 count,
3420 info->cpu, 0); 3690 info->cpu, 0);
3691 trace_access_unlock(info->cpu);
3421 if (ret < 0) 3692 if (ret < 0)
3422 return 0; 3693 return 0;
3423 3694
3424 pos = ring_buffer_page_len(info->spare);
3425
3426 if (pos < PAGE_SIZE)
3427 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3428
3429read: 3695read:
3430 size = PAGE_SIZE - info->read; 3696 size = PAGE_SIZE - info->read;
3431 if (size > count) 3697 if (size > count)
@@ -3487,7 +3753,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3487} 3753}
3488 3754
3489/* Pipe buffer operations for a buffer. */ 3755/* Pipe buffer operations for a buffer. */
3490static struct pipe_buf_operations buffer_pipe_buf_ops = { 3756static const struct pipe_buf_operations buffer_pipe_buf_ops = {
3491 .can_merge = 0, 3757 .can_merge = 0,
3492 .map = generic_pipe_buf_map, 3758 .map = generic_pipe_buf_map,
3493 .unmap = generic_pipe_buf_unmap, 3759 .unmap = generic_pipe_buf_unmap,
@@ -3520,11 +3786,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3520 unsigned int flags) 3786 unsigned int flags)
3521{ 3787{
3522 struct ftrace_buffer_info *info = file->private_data; 3788 struct ftrace_buffer_info *info = file->private_data;
3523 struct partial_page partial[PIPE_BUFFERS]; 3789 struct partial_page partial_def[PIPE_DEF_BUFFERS];
3524 struct page *pages[PIPE_BUFFERS]; 3790 struct page *pages_def[PIPE_DEF_BUFFERS];
3525 struct splice_pipe_desc spd = { 3791 struct splice_pipe_desc spd = {
3526 .pages = pages, 3792 .pages = pages_def,
3527 .partial = partial, 3793 .partial = partial_def,
3528 .flags = flags, 3794 .flags = flags,
3529 .ops = &buffer_pipe_buf_ops, 3795 .ops = &buffer_pipe_buf_ops,
3530 .spd_release = buffer_spd_release, 3796 .spd_release = buffer_spd_release,
@@ -3533,21 +3799,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3533 int entries, size, i; 3799 int entries, size, i;
3534 size_t ret; 3800 size_t ret;
3535 3801
3802 if (splice_grow_spd(pipe, &spd))
3803 return -ENOMEM;
3804
3536 if (*ppos & (PAGE_SIZE - 1)) { 3805 if (*ppos & (PAGE_SIZE - 1)) {
3537 WARN_ONCE(1, "Ftrace: previous read must page-align\n"); 3806 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3538 return -EINVAL; 3807 ret = -EINVAL;
3808 goto out;
3539 } 3809 }
3540 3810
3541 if (len & (PAGE_SIZE - 1)) { 3811 if (len & (PAGE_SIZE - 1)) {
3542 WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); 3812 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3543 if (len < PAGE_SIZE) 3813 if (len < PAGE_SIZE) {
3544 return -EINVAL; 3814 ret = -EINVAL;
3815 goto out;
3816 }
3545 len &= PAGE_MASK; 3817 len &= PAGE_MASK;
3546 } 3818 }
3547 3819
3820 trace_access_lock(info->cpu);
3548 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3821 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3549 3822
3550 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3823 for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
3551 struct page *page; 3824 struct page *page;
3552 int r; 3825 int r;
3553 3826
@@ -3592,6 +3865,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3592 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3865 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3593 } 3866 }
3594 3867
3868 trace_access_unlock(info->cpu);
3595 spd.nr_pages = i; 3869 spd.nr_pages = i;
3596 3870
3597 /* did we read anything? */ 3871 /* did we read anything? */
@@ -3601,11 +3875,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3601 else 3875 else
3602 ret = 0; 3876 ret = 0;
3603 /* TODO: block */ 3877 /* TODO: block */
3604 return ret; 3878 goto out;
3605 } 3879 }
3606 3880
3607 ret = splice_to_pipe(pipe, &spd); 3881 ret = splice_to_pipe(pipe, &spd);
3608 3882 splice_shrink_spd(pipe, &spd);
3883out:
3609 return ret; 3884 return ret;
3610} 3885}
3611 3886
@@ -3628,7 +3903,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3628 3903
3629 s = kmalloc(sizeof(*s), GFP_KERNEL); 3904 s = kmalloc(sizeof(*s), GFP_KERNEL);
3630 if (!s) 3905 if (!s)
3631 return ENOMEM; 3906 return -ENOMEM;
3632 3907
3633 trace_seq_init(s); 3908 trace_seq_init(s);
3634 3909
@@ -3641,9 +3916,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3641 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); 3916 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3642 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 3917 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3643 3918
3644 cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
3645 trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
3646
3647 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 3919 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3648 3920
3649 kfree(s); 3921 kfree(s);
@@ -3654,6 +3926,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3654static const struct file_operations tracing_stats_fops = { 3926static const struct file_operations tracing_stats_fops = {
3655 .open = tracing_open_generic, 3927 .open = tracing_open_generic,
3656 .read = tracing_stats_read, 3928 .read = tracing_stats_read,
3929 .llseek = generic_file_llseek,
3657}; 3930};
3658 3931
3659#ifdef CONFIG_DYNAMIC_FTRACE 3932#ifdef CONFIG_DYNAMIC_FTRACE
@@ -3690,6 +3963,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3690static const struct file_operations tracing_dyn_info_fops = { 3963static const struct file_operations tracing_dyn_info_fops = {
3691 .open = tracing_open_generic, 3964 .open = tracing_open_generic,
3692 .read = tracing_read_dyn_info, 3965 .read = tracing_read_dyn_info,
3966 .llseek = generic_file_llseek,
3693}; 3967};
3694#endif 3968#endif
3695 3969
@@ -3746,13 +4020,9 @@ static void tracing_init_debugfs_percpu(long cpu)
3746{ 4020{
3747 struct dentry *d_percpu = tracing_dentry_percpu(); 4021 struct dentry *d_percpu = tracing_dentry_percpu();
3748 struct dentry *d_cpu; 4022 struct dentry *d_cpu;
3749 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ 4023 char cpu_dir[30]; /* 30 characters should be more than enough */
3750 char cpu_dir[7];
3751
3752 if (cpu > 999 || cpu < 0)
3753 return;
3754 4024
3755 sprintf(cpu_dir, "cpu%ld", cpu); 4025 snprintf(cpu_dir, 30, "cpu%ld", cpu);
3756 d_cpu = debugfs_create_dir(cpu_dir, d_percpu); 4026 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3757 if (!d_cpu) { 4027 if (!d_cpu) {
3758 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); 4028 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
@@ -3821,39 +4091,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3821 if (ret < 0) 4091 if (ret < 0)
3822 return ret; 4092 return ret;
3823 4093
3824 ret = 0; 4094 if (val != 0 && val != 1)
3825 switch (val) { 4095 return -EINVAL;
3826 case 0:
3827 /* do nothing if already cleared */
3828 if (!(topt->flags->val & topt->opt->bit))
3829 break;
3830
3831 mutex_lock(&trace_types_lock);
3832 if (current_trace->set_flag)
3833 ret = current_trace->set_flag(topt->flags->val,
3834 topt->opt->bit, 0);
3835 mutex_unlock(&trace_types_lock);
3836 if (ret)
3837 return ret;
3838 topt->flags->val &= ~topt->opt->bit;
3839 break;
3840 case 1:
3841 /* do nothing if already set */
3842 if (topt->flags->val & topt->opt->bit)
3843 break;
3844 4096
4097 if (!!(topt->flags->val & topt->opt->bit) != val) {
3845 mutex_lock(&trace_types_lock); 4098 mutex_lock(&trace_types_lock);
3846 if (current_trace->set_flag) 4099 ret = __set_tracer_option(current_trace, topt->flags,
3847 ret = current_trace->set_flag(topt->flags->val, 4100 topt->opt, !val);
3848 topt->opt->bit, 1);
3849 mutex_unlock(&trace_types_lock); 4101 mutex_unlock(&trace_types_lock);
3850 if (ret) 4102 if (ret)
3851 return ret; 4103 return ret;
3852 topt->flags->val |= topt->opt->bit;
3853 break;
3854
3855 default:
3856 return -EINVAL;
3857 } 4104 }
3858 4105
3859 *ppos += cnt; 4106 *ppos += cnt;
@@ -3866,6 +4113,7 @@ static const struct file_operations trace_options_fops = {
3866 .open = tracing_open_generic, 4113 .open = tracing_open_generic,
3867 .read = trace_options_read, 4114 .read = trace_options_read,
3868 .write = trace_options_write, 4115 .write = trace_options_write,
4116 .llseek = generic_file_llseek,
3869}; 4117};
3870 4118
3871static ssize_t 4119static ssize_t
@@ -3904,17 +4152,9 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3904 if (ret < 0) 4152 if (ret < 0)
3905 return ret; 4153 return ret;
3906 4154
3907 switch (val) { 4155 if (val != 0 && val != 1)
3908 case 0:
3909 trace_flags &= ~(1 << index);
3910 break;
3911 case 1:
3912 trace_flags |= 1 << index;
3913 break;
3914
3915 default:
3916 return -EINVAL; 4156 return -EINVAL;
3917 } 4157 set_tracer_flags(1 << index, val);
3918 4158
3919 *ppos += cnt; 4159 *ppos += cnt;
3920 4160
@@ -3925,6 +4165,7 @@ static const struct file_operations trace_options_core_fops = {
3925 .open = tracing_open_generic, 4165 .open = tracing_open_generic,
3926 .read = trace_options_core_read, 4166 .read = trace_options_core_read,
3927 .write = trace_options_core_write, 4167 .write = trace_options_core_write,
4168 .llseek = generic_file_llseek,
3928}; 4169};
3929 4170
3930struct dentry *trace_create_file(const char *name, 4171struct dentry *trace_create_file(const char *name,
@@ -4062,6 +4303,8 @@ static __init int tracer_init_debugfs(void)
4062 struct dentry *d_tracer; 4303 struct dentry *d_tracer;
4063 int cpu; 4304 int cpu;
4064 4305
4306 trace_access_lock_init();
4307
4065 d_tracer = tracing_init_dentry(); 4308 d_tracer = tracing_init_dentry();
4066 4309
4067 trace_create_file("tracing_enabled", 0644, d_tracer, 4310 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4082,8 +4325,10 @@ static __init int tracer_init_debugfs(void)
4082 trace_create_file("current_tracer", 0644, d_tracer, 4325 trace_create_file("current_tracer", 0644, d_tracer,
4083 &global_trace, &set_tracer_fops); 4326 &global_trace, &set_tracer_fops);
4084 4327
4328#ifdef CONFIG_TRACER_MAX_TRACE
4085 trace_create_file("tracing_max_latency", 0644, d_tracer, 4329 trace_create_file("tracing_max_latency", 0644, d_tracer,
4086 &tracing_max_latency, &tracing_max_lat_fops); 4330 &tracing_max_latency, &tracing_max_lat_fops);
4331#endif
4087 4332
4088 trace_create_file("tracing_thresh", 0644, d_tracer, 4333 trace_create_file("tracing_thresh", 0644, d_tracer,
4089 &tracing_thresh, &tracing_max_lat_fops); 4334 &tracing_thresh, &tracing_max_lat_fops);
@@ -4103,13 +4348,13 @@ static __init int tracer_init_debugfs(void)
4103 trace_create_file("saved_cmdlines", 0444, d_tracer, 4348 trace_create_file("saved_cmdlines", 0444, d_tracer,
4104 NULL, &tracing_saved_cmdlines_fops); 4349 NULL, &tracing_saved_cmdlines_fops);
4105 4350
4351 trace_create_file("trace_clock", 0644, d_tracer, NULL,
4352 &trace_clock_fops);
4353
4106#ifdef CONFIG_DYNAMIC_FTRACE 4354#ifdef CONFIG_DYNAMIC_FTRACE
4107 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4355 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4108 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4356 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4109#endif 4357#endif
4110#ifdef CONFIG_SYSPROF_TRACER
4111 init_tracer_sysprof_debugfs(d_tracer);
4112#endif
4113 4358
4114 create_trace_options_dir(); 4359 create_trace_options_dir();
4115 4360
@@ -4123,7 +4368,7 @@ static int trace_panic_handler(struct notifier_block *this,
4123 unsigned long event, void *unused) 4368 unsigned long event, void *unused)
4124{ 4369{
4125 if (ftrace_dump_on_oops) 4370 if (ftrace_dump_on_oops)
4126 ftrace_dump(); 4371 ftrace_dump(ftrace_dump_on_oops);
4127 return NOTIFY_OK; 4372 return NOTIFY_OK;
4128} 4373}
4129 4374
@@ -4140,7 +4385,7 @@ static int trace_die_handler(struct notifier_block *self,
4140 switch (val) { 4385 switch (val) {
4141 case DIE_OOPS: 4386 case DIE_OOPS:
4142 if (ftrace_dump_on_oops) 4387 if (ftrace_dump_on_oops)
4143 ftrace_dump(); 4388 ftrace_dump(ftrace_dump_on_oops);
4144 break; 4389 break;
4145 default: 4390 default:
4146 break; 4391 break;
@@ -4166,7 +4411,7 @@ static struct notifier_block trace_die_notifier = {
4166 */ 4411 */
4167#define KERN_TRACE KERN_EMERG 4412#define KERN_TRACE KERN_EMERG
4168 4413
4169static void 4414void
4170trace_printk_seq(struct trace_seq *s) 4415trace_printk_seq(struct trace_seq *s)
4171{ 4416{
4172 /* Probably should print a warning here. */ 4417 /* Probably should print a warning here. */
@@ -4181,10 +4426,18 @@ trace_printk_seq(struct trace_seq *s)
4181 trace_seq_init(s); 4426 trace_seq_init(s);
4182} 4427}
4183 4428
4184static void __ftrace_dump(bool disable_tracing) 4429void trace_init_global_iter(struct trace_iterator *iter)
4430{
4431 iter->tr = &global_trace;
4432 iter->trace = current_trace;
4433 iter->cpu_file = TRACE_PIPE_ALL_CPU;
4434}
4435
4436static void
4437__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4185{ 4438{
4186 static raw_spinlock_t ftrace_dump_lock = 4439 static arch_spinlock_t ftrace_dump_lock =
4187 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 4440 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
4188 /* use static because iter can be a bit big for the stack */ 4441 /* use static because iter can be a bit big for the stack */
4189 static struct trace_iterator iter; 4442 static struct trace_iterator iter;
4190 unsigned int old_userobj; 4443 unsigned int old_userobj;
@@ -4194,7 +4447,7 @@ static void __ftrace_dump(bool disable_tracing)
4194 4447
4195 /* only one dump */ 4448 /* only one dump */
4196 local_irq_save(flags); 4449 local_irq_save(flags);
4197 __raw_spin_lock(&ftrace_dump_lock); 4450 arch_spin_lock(&ftrace_dump_lock);
4198 if (dump_ran) 4451 if (dump_ran)
4199 goto out; 4452 goto out;
4200 4453
@@ -4205,8 +4458,10 @@ static void __ftrace_dump(bool disable_tracing)
4205 if (disable_tracing) 4458 if (disable_tracing)
4206 ftrace_kill(); 4459 ftrace_kill();
4207 4460
4461 trace_init_global_iter(&iter);
4462
4208 for_each_tracing_cpu(cpu) { 4463 for_each_tracing_cpu(cpu) {
4209 atomic_inc(&global_trace.data[cpu]->disabled); 4464 atomic_inc(&iter.tr->data[cpu]->disabled);
4210 } 4465 }
4211 4466
4212 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; 4467 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
@@ -4214,12 +4469,25 @@ static void __ftrace_dump(bool disable_tracing)
4214 /* don't look at user memory in panic mode */ 4469 /* don't look at user memory in panic mode */
4215 trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 4470 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
4216 4471
4217 printk(KERN_TRACE "Dumping ftrace buffer:\n");
4218
4219 /* Simulate the iterator */ 4472 /* Simulate the iterator */
4220 iter.tr = &global_trace; 4473 iter.tr = &global_trace;
4221 iter.trace = current_trace; 4474 iter.trace = current_trace;
4222 iter.cpu_file = TRACE_PIPE_ALL_CPU; 4475
4476 switch (oops_dump_mode) {
4477 case DUMP_ALL:
4478 iter.cpu_file = TRACE_PIPE_ALL_CPU;
4479 break;
4480 case DUMP_ORIG:
4481 iter.cpu_file = raw_smp_processor_id();
4482 break;
4483 case DUMP_NONE:
4484 goto out_enable;
4485 default:
4486 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
4487 iter.cpu_file = TRACE_PIPE_ALL_CPU;
4488 }
4489
4490 printk(KERN_TRACE "Dumping ftrace buffer:\n");
4223 4491
4224 /* 4492 /*
4225 * We need to stop all tracing on all CPUS to read the 4493 * We need to stop all tracing on all CPUS to read the
@@ -4242,9 +4510,12 @@ static void __ftrace_dump(bool disable_tracing)
4242 iter.iter_flags |= TRACE_FILE_LAT_FMT; 4510 iter.iter_flags |= TRACE_FILE_LAT_FMT;
4243 iter.pos = -1; 4511 iter.pos = -1;
4244 4512
4245 if (find_next_entry_inc(&iter) != NULL) { 4513 if (trace_find_next_entry_inc(&iter) != NULL) {
4246 print_trace_line(&iter); 4514 int ret;
4247 trace_consume(&iter); 4515
4516 ret = print_trace_line(&iter);
4517 if (ret != TRACE_TYPE_NO_CONSUME)
4518 trace_consume(&iter);
4248 } 4519 }
4249 4520
4250 trace_printk_seq(&iter.seq); 4521 trace_printk_seq(&iter.seq);
@@ -4255,30 +4526,30 @@ static void __ftrace_dump(bool disable_tracing)
4255 else 4526 else
4256 printk(KERN_TRACE "---------------------------------\n"); 4527 printk(KERN_TRACE "---------------------------------\n");
4257 4528
4529 out_enable:
4258 /* Re-enable tracing if requested */ 4530 /* Re-enable tracing if requested */
4259 if (!disable_tracing) { 4531 if (!disable_tracing) {
4260 trace_flags |= old_userobj; 4532 trace_flags |= old_userobj;
4261 4533
4262 for_each_tracing_cpu(cpu) { 4534 for_each_tracing_cpu(cpu) {
4263 atomic_dec(&global_trace.data[cpu]->disabled); 4535 atomic_dec(&iter.tr->data[cpu]->disabled);
4264 } 4536 }
4265 tracing_on(); 4537 tracing_on();
4266 } 4538 }
4267 4539
4268 out: 4540 out:
4269 __raw_spin_unlock(&ftrace_dump_lock); 4541 arch_spin_unlock(&ftrace_dump_lock);
4270 local_irq_restore(flags); 4542 local_irq_restore(flags);
4271} 4543}
4272 4544
4273/* By default: disable tracing after the dump */ 4545/* By default: disable tracing after the dump */
4274void ftrace_dump(void) 4546void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
4275{ 4547{
4276 __ftrace_dump(true); 4548 __ftrace_dump(true, oops_dump_mode);
4277} 4549}
4278 4550
4279__init static int tracer_alloc_buffers(void) 4551__init static int tracer_alloc_buffers(void)
4280{ 4552{
4281 struct trace_array_cpu *data;
4282 int ring_buf_size; 4553 int ring_buf_size;
4283 int i; 4554 int i;
4284 int ret = -ENOMEM; 4555 int ret = -ENOMEM;
@@ -4289,9 +4560,6 @@ __init static int tracer_alloc_buffers(void)
4289 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4560 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4290 goto out_free_buffer_mask; 4561 goto out_free_buffer_mask;
4291 4562
4292 if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4293 goto out_free_tracing_cpumask;
4294
4295 /* To save memory, keep the ring buffer size to its minimum */ 4563 /* To save memory, keep the ring buffer size to its minimum */
4296 if (ring_buffer_expanded) 4564 if (ring_buffer_expanded)
4297 ring_buf_size = trace_buf_size; 4565 ring_buf_size = trace_buf_size;
@@ -4300,7 +4568,6 @@ __init static int tracer_alloc_buffers(void)
4300 4568
4301 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 4569 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
4302 cpumask_copy(tracing_cpumask, cpu_all_mask); 4570 cpumask_copy(tracing_cpumask, cpu_all_mask);
4303 cpumask_clear(tracing_reader_cpumask);
4304 4571
4305 /* TODO: make the number of buffers hot pluggable with CPUS */ 4572 /* TODO: make the number of buffers hot pluggable with CPUS */
4306 global_trace.buffer = ring_buffer_alloc(ring_buf_size, 4573 global_trace.buffer = ring_buffer_alloc(ring_buf_size,
@@ -4314,31 +4581,26 @@ __init static int tracer_alloc_buffers(void)
4314 4581
4315 4582
4316#ifdef CONFIG_TRACER_MAX_TRACE 4583#ifdef CONFIG_TRACER_MAX_TRACE
4317 max_tr.buffer = ring_buffer_alloc(ring_buf_size, 4584 max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
4318 TRACE_BUFFER_FLAGS);
4319 if (!max_tr.buffer) { 4585 if (!max_tr.buffer) {
4320 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); 4586 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4321 WARN_ON(1); 4587 WARN_ON(1);
4322 ring_buffer_free(global_trace.buffer); 4588 ring_buffer_free(global_trace.buffer);
4323 goto out_free_cpumask; 4589 goto out_free_cpumask;
4324 } 4590 }
4325 max_tr.entries = ring_buffer_size(max_tr.buffer); 4591 max_tr.entries = 1;
4326 WARN_ON(max_tr.entries != global_trace.entries);
4327#endif 4592#endif
4328 4593
4329 /* Allocate the first page for all buffers */ 4594 /* Allocate the first page for all buffers */
4330 for_each_tracing_cpu(i) { 4595 for_each_tracing_cpu(i) {
4331 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4596 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4332 max_tr.data[i] = &per_cpu(max_data, i); 4597 max_tr.data[i] = &per_cpu(max_tr_data, i);
4333 } 4598 }
4334 4599
4335 trace_init_cmdlines(); 4600 trace_init_cmdlines();
4336 4601
4337 register_tracer(&nop_trace); 4602 register_tracer(&nop_trace);
4338 current_trace = &nop_trace; 4603 current_trace = &nop_trace;
4339#ifdef CONFIG_BOOT_TRACER
4340 register_tracer(&boot_tracer);
4341#endif
4342 /* All seems OK, enable tracing */ 4604 /* All seems OK, enable tracing */
4343 tracing_disabled = 0; 4605 tracing_disabled = 0;
4344 4606
@@ -4350,8 +4612,6 @@ __init static int tracer_alloc_buffers(void)
4350 return 0; 4612 return 0;
4351 4613
4352out_free_cpumask: 4614out_free_cpumask:
4353 free_cpumask_var(tracing_reader_cpumask);
4354out_free_tracing_cpumask:
4355 free_cpumask_var(tracing_cpumask); 4615 free_cpumask_var(tracing_cpumask);
4356out_free_buffer_mask: 4616out_free_buffer_mask:
4357 free_cpumask_var(tracing_buffer_mask); 4617 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6e735d4771f8..9021f8c0c0c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -7,11 +7,9 @@
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/ring_buffer.h> 8#include <linux/ring_buffer.h>
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/tracepoint.h>
10#include <linux/ftrace.h> 11#include <linux/ftrace.h>
11#include <trace/boot.h> 12#include <linux/hw_breakpoint.h>
12#include <linux/kmemtrace.h>
13#include <trace/power.h>
14
15#include <linux/trace_seq.h> 13#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 14#include <linux/ftrace_event.h>
17 15
@@ -24,177 +22,58 @@ enum trace_type {
24 TRACE_STACK, 22 TRACE_STACK,
25 TRACE_PRINT, 23 TRACE_PRINT,
26 TRACE_BPRINT, 24 TRACE_BPRINT,
27 TRACE_SPECIAL,
28 TRACE_MMIO_RW, 25 TRACE_MMIO_RW,
29 TRACE_MMIO_MAP, 26 TRACE_MMIO_MAP,
30 TRACE_BRANCH, 27 TRACE_BRANCH,
31 TRACE_BOOT_CALL,
32 TRACE_BOOT_RET,
33 TRACE_GRAPH_RET, 28 TRACE_GRAPH_RET,
34 TRACE_GRAPH_ENT, 29 TRACE_GRAPH_ENT,
35 TRACE_USER_STACK, 30 TRACE_USER_STACK,
36 TRACE_HW_BRANCHES,
37 TRACE_SYSCALL_ENTER,
38 TRACE_SYSCALL_EXIT,
39 TRACE_KMEM_ALLOC,
40 TRACE_KMEM_FREE,
41 TRACE_POWER,
42 TRACE_BLK, 31 TRACE_BLK,
43 32
44 __TRACE_LAST_TYPE, 33 __TRACE_LAST_TYPE,
45}; 34};
46 35
47/*
48 * Function trace entry - function address and parent function addres:
49 */
50struct ftrace_entry {
51 struct trace_entry ent;
52 unsigned long ip;
53 unsigned long parent_ip;
54};
55
56/* Function call entry */
57struct ftrace_graph_ent_entry {
58 struct trace_entry ent;
59 struct ftrace_graph_ent graph_ent;
60};
61
62/* Function return entry */
63struct ftrace_graph_ret_entry {
64 struct trace_entry ent;
65 struct ftrace_graph_ret ret;
66};
67extern struct tracer boot_tracer;
68
69/*
70 * Context switch trace entry - which task (and prio) we switched from/to:
71 */
72struct ctx_switch_entry {
73 struct trace_entry ent;
74 unsigned int prev_pid;
75 unsigned char prev_prio;
76 unsigned char prev_state;
77 unsigned int next_pid;
78 unsigned char next_prio;
79 unsigned char next_state;
80 unsigned int next_cpu;
81};
82
83/*
84 * Special (free-form) trace entry:
85 */
86struct special_entry {
87 struct trace_entry ent;
88 unsigned long arg1;
89 unsigned long arg2;
90 unsigned long arg3;
91};
92
93/*
94 * Stack-trace entry:
95 */
96
97#define FTRACE_STACK_ENTRIES 8
98
99struct stack_entry {
100 struct trace_entry ent;
101 unsigned long caller[FTRACE_STACK_ENTRIES];
102};
103
104struct userstack_entry {
105 struct trace_entry ent;
106 unsigned long caller[FTRACE_STACK_ENTRIES];
107};
108
109/*
110 * trace_printk entry:
111 */
112struct bprint_entry {
113 struct trace_entry ent;
114 unsigned long ip;
115 const char *fmt;
116 u32 buf[];
117};
118
119struct print_entry {
120 struct trace_entry ent;
121 unsigned long ip;
122 char buf[];
123};
124
125#define TRACE_OLD_SIZE 88
126 36
127struct trace_field_cont { 37#undef __field
128 unsigned char type; 38#define __field(type, item) type item;
129 /* Temporary till we get rid of this completely */
130 char buf[TRACE_OLD_SIZE - 1];
131};
132 39
133struct trace_mmiotrace_rw { 40#undef __field_struct
134 struct trace_entry ent; 41#define __field_struct(type, item) __field(type, item)
135 struct mmiotrace_rw rw;
136};
137 42
138struct trace_mmiotrace_map { 43#undef __field_desc
139 struct trace_entry ent; 44#define __field_desc(type, container, item)
140 struct mmiotrace_map map;
141};
142 45
143struct trace_boot_call { 46#undef __array
144 struct trace_entry ent; 47#define __array(type, item, size) type item[size];
145 struct boot_trace_call boot_call;
146};
147 48
148struct trace_boot_ret { 49#undef __array_desc
149 struct trace_entry ent; 50#define __array_desc(type, container, item, size)
150 struct boot_trace_ret boot_ret;
151};
152 51
153#define TRACE_FUNC_SIZE 30 52#undef __dynamic_array
154#define TRACE_FILE_SIZE 20 53#define __dynamic_array(type, item) type item[];
155struct trace_branch {
156 struct trace_entry ent;
157 unsigned line;
158 char func[TRACE_FUNC_SIZE+1];
159 char file[TRACE_FILE_SIZE+1];
160 char correct;
161};
162 54
163struct hw_branch_entry { 55#undef F_STRUCT
164 struct trace_entry ent; 56#define F_STRUCT(args...) args
165 u64 from;
166 u64 to;
167};
168 57
169struct trace_power { 58#undef FTRACE_ENTRY
170 struct trace_entry ent; 59#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
171 struct power_trace state_data; 60 struct struct_name { \
172}; 61 struct trace_entry ent; \
62 tstruct \
63 }
173 64
174enum kmemtrace_type_id { 65#undef TP_ARGS
175 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ 66#define TP_ARGS(args...) args
176 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
177 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
178};
179 67
180struct kmemtrace_alloc_entry { 68#undef FTRACE_ENTRY_DUP
181 struct trace_entry ent; 69#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
182 enum kmemtrace_type_id type_id;
183 unsigned long call_site;
184 const void *ptr;
185 size_t bytes_req;
186 size_t bytes_alloc;
187 gfp_t gfp_flags;
188 int node;
189};
190 70
191struct kmemtrace_free_entry { 71#include "trace_entries.h"
192 struct trace_entry ent;
193 enum kmemtrace_type_id type_id;
194 unsigned long call_site;
195 const void *ptr;
196};
197 72
73/*
74 * syscalls are special, and need special handling, this is why
75 * they are not included in trace_entries.h
76 */
198struct syscall_trace_enter { 77struct syscall_trace_enter {
199 struct trace_entry ent; 78 struct trace_entry ent;
200 int nr; 79 int nr;
@@ -204,16 +83,26 @@ struct syscall_trace_enter {
204struct syscall_trace_exit { 83struct syscall_trace_exit {
205 struct trace_entry ent; 84 struct trace_entry ent;
206 int nr; 85 int nr;
207 unsigned long ret; 86 long ret;
208}; 87};
209 88
89struct kprobe_trace_entry_head {
90 struct trace_entry ent;
91 unsigned long ip;
92};
93
94struct kretprobe_trace_entry_head {
95 struct trace_entry ent;
96 unsigned long func;
97 unsigned long ret_ip;
98};
210 99
211/* 100/*
212 * trace_flag_type is an enumeration that holds different 101 * trace_flag_type is an enumeration that holds different
213 * states when a trace occurs. These are: 102 * states when a trace occurs. These are:
214 * IRQS_OFF - interrupts were disabled 103 * IRQS_OFF - interrupts were disabled
215 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags 104 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
216 * NEED_RESCED - reschedule is requested 105 * NEED_RESCHED - reschedule is requested
217 * HARDIRQ - inside an interrupt handler 106 * HARDIRQ - inside an interrupt handler
218 * SOFTIRQ - inside a softirq handler 107 * SOFTIRQ - inside a softirq handler
219 */ 108 */
@@ -236,9 +125,6 @@ struct trace_array_cpu {
236 atomic_t disabled; 125 atomic_t disabled;
237 void *buffer_page; /* ring buffer spare */ 126 void *buffer_page; /* ring buffer spare */
238 127
239 /* these fields get copied into max-trace: */
240 unsigned long trace_idx;
241 unsigned long overrun;
242 unsigned long saved_latency; 128 unsigned long saved_latency;
243 unsigned long critical_start; 129 unsigned long critical_start;
244 unsigned long critical_end; 130 unsigned long critical_end;
@@ -246,6 +132,7 @@ struct trace_array_cpu {
246 unsigned long nice; 132 unsigned long nice;
247 unsigned long policy; 133 unsigned long policy;
248 unsigned long rt_priority; 134 unsigned long rt_priority;
135 unsigned long skipped_entries;
249 cycle_t preempt_timestamp; 136 cycle_t preempt_timestamp;
250 pid_t pid; 137 pid_t pid;
251 uid_t uid; 138 uid_t uid;
@@ -301,28 +188,15 @@ extern void __ftrace_bad_type(void);
301 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ 188 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
302 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 189 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
303 IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ 190 IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
304 IF_ASSIGN(var, ent, struct special_entry, 0); \
305 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 191 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
306 TRACE_MMIO_RW); \ 192 TRACE_MMIO_RW); \
307 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 193 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
308 TRACE_MMIO_MAP); \ 194 TRACE_MMIO_MAP); \
309 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
310 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
311 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ 195 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
312 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ 196 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
313 TRACE_GRAPH_ENT); \ 197 TRACE_GRAPH_ENT); \
314 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 198 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
315 TRACE_GRAPH_RET); \ 199 TRACE_GRAPH_RET); \
316 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
317 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
318 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
319 TRACE_KMEM_ALLOC); \
320 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
321 TRACE_KMEM_FREE); \
322 IF_ASSIGN(var, ent, struct syscall_trace_enter, \
323 TRACE_SYSCALL_ENTER); \
324 IF_ASSIGN(var, ent, struct syscall_trace_exit, \
325 TRACE_SYSCALL_EXIT); \
326 __ftrace_bad_type(); \ 200 __ftrace_bad_type(); \
327 } while (0) 201 } while (0)
328 202
@@ -360,6 +234,7 @@ struct tracer_flags {
360 * @pipe_open: called when the trace_pipe file is opened 234 * @pipe_open: called when the trace_pipe file is opened
361 * @wait_pipe: override how the user waits for traces on trace_pipe 235 * @wait_pipe: override how the user waits for traces on trace_pipe
362 * @close: called when the trace file is released 236 * @close: called when the trace file is released
237 * @pipe_close: called when the trace_pipe file is released
363 * @read: override the default read callback on trace_pipe 238 * @read: override the default read callback on trace_pipe
364 * @splice_read: override the default splice_read callback on trace_pipe 239 * @splice_read: override the default splice_read callback on trace_pipe
365 * @selftest: selftest to run on boot (see trace_selftest.c) 240 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -378,6 +253,7 @@ struct tracer {
378 void (*pipe_open)(struct trace_iterator *iter); 253 void (*pipe_open)(struct trace_iterator *iter);
379 void (*wait_pipe)(struct trace_iterator *iter); 254 void (*wait_pipe)(struct trace_iterator *iter);
380 void (*close)(struct trace_iterator *iter); 255 void (*close)(struct trace_iterator *iter);
256 void (*pipe_close)(struct trace_iterator *iter);
381 ssize_t (*read)(struct trace_iterator *iter, 257 ssize_t (*read)(struct trace_iterator *iter,
382 struct file *filp, char __user *ubuf, 258 struct file *filp, char __user *ubuf,
383 size_t cnt, loff_t *ppos); 259 size_t cnt, loff_t *ppos);
@@ -398,7 +274,7 @@ struct tracer {
398 struct tracer *next; 274 struct tracer *next;
399 int print_max; 275 int print_max;
400 struct tracer_flags *flags; 276 struct tracer_flags *flags;
401 struct tracer_stat *stats; 277 int use_max_tr;
402}; 278};
403 279
404 280
@@ -419,16 +295,16 @@ struct dentry *trace_create_file(const char *name,
419 const struct file_operations *fops); 295 const struct file_operations *fops);
420 296
421struct dentry *tracing_init_dentry(void); 297struct dentry *tracing_init_dentry(void);
422void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
423 298
424struct ring_buffer_event; 299struct ring_buffer_event;
425 300
426struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 301struct ring_buffer_event *
427 int type, 302trace_buffer_lock_reserve(struct ring_buffer *buffer,
428 unsigned long len, 303 int type,
429 unsigned long flags, 304 unsigned long len,
430 int pc); 305 unsigned long flags,
431void trace_buffer_unlock_commit(struct trace_array *tr, 306 int pc);
307void trace_buffer_unlock_commit(struct ring_buffer *buffer,
432 struct ring_buffer_event *event, 308 struct ring_buffer_event *event,
433 unsigned long flags, int pc); 309 unsigned long flags, int pc);
434 310
@@ -438,9 +314,13 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
438struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 314struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
439 int *ent_cpu, u64 *ent_ts); 315 int *ent_cpu, u64 *ent_ts);
440 316
441void tracing_generic_entry_update(struct trace_entry *entry, 317int trace_empty(struct trace_iterator *iter);
442 unsigned long flags, 318
443 int pc); 319void *trace_find_next_entry_inc(struct trace_iterator *iter);
320
321void trace_init_global_iter(struct trace_iterator *iter);
322
323void tracing_iter_reset(struct trace_iterator *iter, int cpu);
444 324
445void default_wait_pipe(struct trace_iterator *iter); 325void default_wait_pipe(struct trace_iterator *iter);
446void poll_wait_pipe(struct trace_iterator *iter); 326void poll_wait_pipe(struct trace_iterator *iter);
@@ -459,18 +339,21 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
459 struct task_struct *wakee, 339 struct task_struct *wakee,
460 struct task_struct *cur, 340 struct task_struct *cur,
461 unsigned long flags, int pc); 341 unsigned long flags, int pc);
462void trace_special(struct trace_array *tr,
463 struct trace_array_cpu *data,
464 unsigned long arg1,
465 unsigned long arg2,
466 unsigned long arg3, int pc);
467void trace_function(struct trace_array *tr, 342void trace_function(struct trace_array *tr,
468 unsigned long ip, 343 unsigned long ip,
469 unsigned long parent_ip, 344 unsigned long parent_ip,
470 unsigned long flags, int pc); 345 unsigned long flags, int pc);
346void trace_graph_function(struct trace_array *tr,
347 unsigned long ip,
348 unsigned long parent_ip,
349 unsigned long flags, int pc);
350void trace_default_header(struct seq_file *m);
351void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
352int trace_empty(struct trace_iterator *iter);
471 353
472void trace_graph_return(struct ftrace_graph_ret *trace); 354void trace_graph_return(struct ftrace_graph_ret *trace);
473int trace_graph_entry(struct ftrace_graph_ent *trace); 355int trace_graph_entry(struct ftrace_graph_ent *trace);
356void set_graph_array(struct trace_array *tr);
474 357
475void tracing_start_cmdline_record(void); 358void tracing_start_cmdline_record(void);
476void tracing_stop_cmdline_record(void); 359void tracing_stop_cmdline_record(void);
@@ -479,35 +362,56 @@ void tracing_stop_sched_switch_record(void);
479void tracing_start_sched_switch_record(void); 362void tracing_start_sched_switch_record(void);
480int register_tracer(struct tracer *type); 363int register_tracer(struct tracer *type);
481void unregister_tracer(struct tracer *type); 364void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void);
366enum trace_file_type {
367 TRACE_FILE_LAT_FMT = 1,
368 TRACE_FILE_ANNOTATE = 2,
369};
370
371extern cpumask_var_t __read_mostly tracing_buffer_mask;
372
373#define for_each_tracing_cpu(cpu) \
374 for_each_cpu(cpu, tracing_buffer_mask)
482 375
483extern unsigned long nsecs_to_usecs(unsigned long nsecs); 376extern unsigned long nsecs_to_usecs(unsigned long nsecs);
484 377
485extern unsigned long tracing_max_latency;
486extern unsigned long tracing_thresh; 378extern unsigned long tracing_thresh;
487 379
380#ifdef CONFIG_TRACER_MAX_TRACE
381extern unsigned long tracing_max_latency;
382
488void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 383void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
489void update_max_tr_single(struct trace_array *tr, 384void update_max_tr_single(struct trace_array *tr,
490 struct task_struct *tsk, int cpu); 385 struct task_struct *tsk, int cpu);
386#endif /* CONFIG_TRACER_MAX_TRACE */
491 387
492void __trace_stack(struct trace_array *tr, 388#ifdef CONFIG_STACKTRACE
493 unsigned long flags, 389void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
494 int skip, int pc); 390 int skip, int pc);
495 391
496extern cycle_t ftrace_now(int cpu); 392void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
393 int pc);
497 394
498#ifdef CONFIG_CONTEXT_SWITCH_TRACER 395void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
499typedef void 396 int pc);
500(*tracer_switch_func_t)(void *private, 397#else
501 void *__rq, 398static inline void ftrace_trace_stack(struct ring_buffer *buffer,
502 struct task_struct *prev, 399 unsigned long flags, int skip, int pc)
503 struct task_struct *next); 400{
504 401}
505struct tracer_switch_ops { 402
506 tracer_switch_func_t func; 403static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
507 void *private; 404 unsigned long flags, int pc)
508 struct tracer_switch_ops *next; 405{
509}; 406}
510#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 407
408static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
409 int skip, int pc)
410{
411}
412#endif /* CONFIG_STACKTRACE */
413
414extern cycle_t ftrace_now(int cpu);
511 415
512extern void trace_find_cmdline(int pid, char comm[]); 416extern void trace_find_cmdline(int pid, char comm[]);
513 417
@@ -517,6 +421,10 @@ extern unsigned long ftrace_update_tot_cnt;
517extern int DYN_FTRACE_TEST_NAME(void); 421extern int DYN_FTRACE_TEST_NAME(void);
518#endif 422#endif
519 423
424extern int ring_buffer_expanded;
425extern bool tracing_selftest_disabled;
426DECLARE_PER_CPU(int, ftrace_cpu_disabled);
427
520#ifdef CONFIG_FTRACE_STARTUP_TEST 428#ifdef CONFIG_FTRACE_STARTUP_TEST
521extern int trace_selftest_startup_function(struct tracer *trace, 429extern int trace_selftest_startup_function(struct tracer *trace,
522 struct trace_array *tr); 430 struct trace_array *tr);
@@ -534,12 +442,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,
534 struct trace_array *tr); 442 struct trace_array *tr);
535extern int trace_selftest_startup_sched_switch(struct tracer *trace, 443extern int trace_selftest_startup_sched_switch(struct tracer *trace,
536 struct trace_array *tr); 444 struct trace_array *tr);
537extern int trace_selftest_startup_sysprof(struct tracer *trace,
538 struct trace_array *tr);
539extern int trace_selftest_startup_branch(struct tracer *trace, 445extern int trace_selftest_startup_branch(struct tracer *trace,
540 struct trace_array *tr); 446 struct trace_array *tr);
541extern int trace_selftest_startup_hw_branches(struct tracer *trace,
542 struct trace_array *tr);
543#endif /* CONFIG_FTRACE_STARTUP_TEST */ 447#endif /* CONFIG_FTRACE_STARTUP_TEST */
544 448
545extern void *head_page(struct trace_array_cpu *data); 449extern void *head_page(struct trace_array_cpu *data);
@@ -548,18 +452,48 @@ extern int
548trace_vbprintk(unsigned long ip, const char *fmt, va_list args); 452trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
549extern int 453extern int
550trace_vprintk(unsigned long ip, const char *fmt, va_list args); 454trace_vprintk(unsigned long ip, const char *fmt, va_list args);
455extern int
456trace_array_vprintk(struct trace_array *tr,
457 unsigned long ip, const char *fmt, va_list args);
458int trace_array_printk(struct trace_array *tr,
459 unsigned long ip, const char *fmt, ...);
460void trace_printk_seq(struct trace_seq *s);
461enum print_line_t print_trace_line(struct trace_iterator *iter);
551 462
552extern unsigned long trace_flags; 463extern unsigned long trace_flags;
553 464
465extern int trace_clock_id;
466
554/* Standard output formatting function used for function return traces */ 467/* Standard output formatting function used for function return traces */
555#ifdef CONFIG_FUNCTION_GRAPH_TRACER 468#ifdef CONFIG_FUNCTION_GRAPH_TRACER
556extern enum print_line_t print_graph_function(struct trace_iterator *iter); 469
470/* Flag options */
471#define TRACE_GRAPH_PRINT_OVERRUN 0x1
472#define TRACE_GRAPH_PRINT_CPU 0x2
473#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
474#define TRACE_GRAPH_PRINT_PROC 0x8
475#define TRACE_GRAPH_PRINT_DURATION 0x10
476#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
477
478extern enum print_line_t
479print_graph_function_flags(struct trace_iterator *iter, u32 flags);
480extern void print_graph_headers_flags(struct seq_file *s, u32 flags);
557extern enum print_line_t 481extern enum print_line_t
558trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); 482trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
483extern void graph_trace_open(struct trace_iterator *iter);
484extern void graph_trace_close(struct trace_iterator *iter);
485extern int __trace_graph_entry(struct trace_array *tr,
486 struct ftrace_graph_ent *trace,
487 unsigned long flags, int pc);
488extern void __trace_graph_return(struct trace_array *tr,
489 struct ftrace_graph_ret *trace,
490 unsigned long flags, int pc);
491
559 492
560#ifdef CONFIG_DYNAMIC_FTRACE 493#ifdef CONFIG_DYNAMIC_FTRACE
561/* TODO: make this variable */ 494/* TODO: make this variable */
562#define FTRACE_GRAPH_MAX_FUNCS 32 495#define FTRACE_GRAPH_MAX_FUNCS 32
496extern int ftrace_graph_filter_enabled;
563extern int ftrace_graph_count; 497extern int ftrace_graph_count;
564extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 498extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
565 499
@@ -567,7 +501,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
567{ 501{
568 int i; 502 int i;
569 503
570 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 504 if (!ftrace_graph_filter_enabled)
571 return 1; 505 return 1;
572 506
573 for (i = 0; i < ftrace_graph_count; i++) { 507 for (i = 0; i < ftrace_graph_count; i++) {
@@ -578,10 +512,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
578 return 0; 512 return 0;
579} 513}
580#else 514#else
581static inline int ftrace_trace_addr(unsigned long addr)
582{
583 return 1;
584}
585static inline int ftrace_graph_addr(unsigned long addr) 515static inline int ftrace_graph_addr(unsigned long addr)
586{ 516{
587 return 1; 517 return 1;
@@ -589,21 +519,63 @@ static inline int ftrace_graph_addr(unsigned long addr)
589#endif /* CONFIG_DYNAMIC_FTRACE */ 519#endif /* CONFIG_DYNAMIC_FTRACE */
590#else /* CONFIG_FUNCTION_GRAPH_TRACER */ 520#else /* CONFIG_FUNCTION_GRAPH_TRACER */
591static inline enum print_line_t 521static inline enum print_line_t
592print_graph_function(struct trace_iterator *iter) 522print_graph_function_flags(struct trace_iterator *iter, u32 flags)
593{ 523{
594 return TRACE_TYPE_UNHANDLED; 524 return TRACE_TYPE_UNHANDLED;
595} 525}
596#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 526#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
597 527
598extern struct pid *ftrace_pid_trace; 528extern struct list_head ftrace_pids;
599 529
530#ifdef CONFIG_FUNCTION_TRACER
600static inline int ftrace_trace_task(struct task_struct *task) 531static inline int ftrace_trace_task(struct task_struct *task)
601{ 532{
602 if (!ftrace_pid_trace) 533 if (list_empty(&ftrace_pids))
603 return 1; 534 return 1;
604 535
605 return test_tsk_trace_trace(task); 536 return test_tsk_trace_trace(task);
606} 537}
538#else
539static inline int ftrace_trace_task(struct task_struct *task)
540{
541 return 1;
542}
543#endif
544
545/*
546 * struct trace_parser - servers for reading the user input separated by spaces
547 * @cont: set if the input is not complete - no final space char was found
548 * @buffer: holds the parsed user input
549 * @idx: user input length
550 * @size: buffer size
551 */
552struct trace_parser {
553 bool cont;
554 char *buffer;
555 unsigned idx;
556 unsigned size;
557};
558
559static inline bool trace_parser_loaded(struct trace_parser *parser)
560{
561 return (parser->idx != 0);
562}
563
564static inline bool trace_parser_cont(struct trace_parser *parser)
565{
566 return parser->cont;
567}
568
569static inline void trace_parser_clear(struct trace_parser *parser)
570{
571 parser->cont = false;
572 parser->idx = 0;
573}
574
575extern int trace_parser_get_init(struct trace_parser *parser, int size);
576extern void trace_parser_put(struct trace_parser *parser);
577extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
578 size_t cnt, loff_t *ppos);
607 579
608/* 580/*
609 * trace_iterator_flags is an enumeration that defines bit 581 * trace_iterator_flags is an enumeration that defines bit
@@ -622,19 +594,18 @@ enum trace_iterator_flags {
622 TRACE_ITER_BIN = 0x40, 594 TRACE_ITER_BIN = 0x40,
623 TRACE_ITER_BLOCK = 0x80, 595 TRACE_ITER_BLOCK = 0x80,
624 TRACE_ITER_STACKTRACE = 0x100, 596 TRACE_ITER_STACKTRACE = 0x100,
625 TRACE_ITER_SCHED_TREE = 0x200, 597 TRACE_ITER_PRINTK = 0x200,
626 TRACE_ITER_PRINTK = 0x400, 598 TRACE_ITER_PREEMPTONLY = 0x400,
627 TRACE_ITER_PREEMPTONLY = 0x800, 599 TRACE_ITER_BRANCH = 0x800,
628 TRACE_ITER_BRANCH = 0x1000, 600 TRACE_ITER_ANNOTATE = 0x1000,
629 TRACE_ITER_ANNOTATE = 0x2000, 601 TRACE_ITER_USERSTACKTRACE = 0x2000,
630 TRACE_ITER_USERSTACKTRACE = 0x4000, 602 TRACE_ITER_SYM_USEROBJ = 0x4000,
631 TRACE_ITER_SYM_USEROBJ = 0x8000, 603 TRACE_ITER_PRINTK_MSGONLY = 0x8000,
632 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 604 TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
633 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 605 TRACE_ITER_LATENCY_FMT = 0x20000,
634 TRACE_ITER_LATENCY_FMT = 0x40000, 606 TRACE_ITER_SLEEP_TIME = 0x40000,
635 TRACE_ITER_GLOBAL_CLK = 0x80000, 607 TRACE_ITER_GRAPH_TIME = 0x80000,
636 TRACE_ITER_SLEEP_TIME = 0x100000, 608 TRACE_ITER_RECORD_CMD = 0x100000,
637 TRACE_ITER_GRAPH_TIME = 0x200000,
638}; 609};
639 610
640/* 611/*
@@ -646,54 +617,6 @@ enum trace_iterator_flags {
646 617
647extern struct tracer nop_trace; 618extern struct tracer nop_trace;
648 619
649/**
650 * ftrace_preempt_disable - disable preemption scheduler safe
651 *
652 * When tracing can happen inside the scheduler, there exists
653 * cases that the tracing might happen before the need_resched
654 * flag is checked. If this happens and the tracer calls
655 * preempt_enable (after a disable), a schedule might take place
656 * causing an infinite recursion.
657 *
658 * To prevent this, we read the need_resched flag before
659 * disabling preemption. When we want to enable preemption we
660 * check the flag, if it is set, then we call preempt_enable_no_resched.
661 * Otherwise, we call preempt_enable.
662 *
663 * The rational for doing the above is that if need_resched is set
664 * and we have yet to reschedule, we are either in an atomic location
665 * (where we do not need to check for scheduling) or we are inside
666 * the scheduler and do not want to resched.
667 */
668static inline int ftrace_preempt_disable(void)
669{
670 int resched;
671
672 resched = need_resched();
673 preempt_disable_notrace();
674
675 return resched;
676}
677
678/**
679 * ftrace_preempt_enable - enable preemption scheduler safe
680 * @resched: the return value from ftrace_preempt_disable
681 *
682 * This is a scheduler safe way to enable preemption and not miss
683 * any preemption checks. The disabled saved the state of preemption.
684 * If resched is set, then we are either inside an atomic or
685 * are inside the scheduler (we would have already scheduled
686 * otherwise). In this case, we do not want to call normal
687 * preempt_enable, but preempt_enable_no_resched instead.
688 */
689static inline void ftrace_preempt_enable(int resched)
690{
691 if (resched)
692 preempt_enable_no_resched_notrace();
693 else
694 preempt_enable_notrace();
695}
696
697#ifdef CONFIG_BRANCH_TRACER 620#ifdef CONFIG_BRANCH_TRACER
698extern int enable_branch_tracing(struct trace_array *tr); 621extern int enable_branch_tracing(struct trace_array *tr);
699extern void disable_branch_tracing(void); 622extern void disable_branch_tracing(void);
@@ -731,6 +654,7 @@ struct ftrace_event_field {
731 struct list_head link; 654 struct list_head link;
732 char *name; 655 char *name;
733 char *type; 656 char *type;
657 int filter_type;
734 int offset; 658 int offset;
735 int size; 659 int size;
736 int is_signed; 660 int is_signed;
@@ -746,26 +670,47 @@ struct event_subsystem {
746 struct list_head list; 670 struct list_head list;
747 const char *name; 671 const char *name;
748 struct dentry *entry; 672 struct dentry *entry;
749 void *filter; 673 struct event_filter *filter;
674 int nr_events;
750}; 675};
751 676
752struct filter_pred; 677struct filter_pred;
678struct regex;
753 679
754typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 680typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
755 int val1, int val2); 681 int val1, int val2);
756 682
683typedef int (*regex_match_func)(char *str, struct regex *r, int len);
684
685enum regex_type {
686 MATCH_FULL = 0,
687 MATCH_FRONT_ONLY,
688 MATCH_MIDDLE_ONLY,
689 MATCH_END_ONLY,
690};
691
692struct regex {
693 char pattern[MAX_FILTER_STR_VAL];
694 int len;
695 int field_len;
696 regex_match_func match;
697};
698
757struct filter_pred { 699struct filter_pred {
758 filter_pred_fn_t fn; 700 filter_pred_fn_t fn;
759 u64 val; 701 u64 val;
760 char str_val[MAX_FILTER_STR_VAL]; 702 struct regex regex;
761 int str_len; 703 char *field_name;
762 char *field_name; 704 int offset;
763 int offset; 705 int not;
764 int not; 706 int op;
765 int op; 707 int pop_n;
766 int pop_n;
767}; 708};
768 709
710extern struct list_head ftrace_common_fields;
711
712extern enum regex_type
713filter_parse_regex(char *buff, int len, char **search, int *not);
769extern void print_event_filter(struct ftrace_event_call *call, 714extern void print_event_filter(struct ftrace_event_call *call,
770 struct trace_seq *s); 715 struct trace_seq *s);
771extern int apply_event_filter(struct ftrace_event_call *call, 716extern int apply_event_filter(struct ftrace_event_call *call,
@@ -774,13 +719,18 @@ extern int apply_subsystem_event_filter(struct event_subsystem *system,
774 char *filter_string); 719 char *filter_string);
775extern void print_subsystem_event_filter(struct event_subsystem *system, 720extern void print_subsystem_event_filter(struct event_subsystem *system,
776 struct trace_seq *s); 721 struct trace_seq *s);
722extern int filter_assign_type(const char *type);
723
724struct list_head *
725trace_get_fields(struct ftrace_event_call *event_call);
777 726
778static inline int 727static inline int
779filter_check_discard(struct ftrace_event_call *call, void *rec, 728filter_check_discard(struct ftrace_event_call *call, void *rec,
780 struct ring_buffer *buffer, 729 struct ring_buffer *buffer,
781 struct ring_buffer_event *event) 730 struct ring_buffer_event *event)
782{ 731{
783 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { 732 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
733 !filter_match_preds(call->filter, rec)) {
784 ring_buffer_discard_commit(buffer, event); 734 ring_buffer_discard_commit(buffer, event);
785 return 1; 735 return 1;
786 } 736 }
@@ -788,46 +738,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
788 return 0; 738 return 0;
789} 739}
790 740
791#define DEFINE_COMPARISON_PRED(type) \ 741extern void trace_event_enable_cmd_record(bool enable);
792static int filter_pred_##type(struct filter_pred *pred, void *event, \
793 int val1, int val2) \
794{ \
795 type *addr = (type *)(event + pred->offset); \
796 type val = (type)pred->val; \
797 int match = 0; \
798 \
799 switch (pred->op) { \
800 case OP_LT: \
801 match = (*addr < val); \
802 break; \
803 case OP_LE: \
804 match = (*addr <= val); \
805 break; \
806 case OP_GT: \
807 match = (*addr > val); \
808 break; \
809 case OP_GE: \
810 match = (*addr >= val); \
811 break; \
812 default: \
813 break; \
814 } \
815 \
816 return match; \
817}
818
819#define DEFINE_EQUALITY_PRED(size) \
820static int filter_pred_##size(struct filter_pred *pred, void *event, \
821 int val1, int val2) \
822{ \
823 u##size *addr = (u##size *)(event + pred->offset); \
824 u##size val = (u##size)pred->val; \
825 int match; \
826 \
827 match = (val == *addr) ^ pred->not; \
828 \
829 return match; \
830}
831 742
832extern struct mutex event_mutex; 743extern struct mutex event_mutex;
833extern struct list_head ftrace_events; 744extern struct list_head ftrace_events;
@@ -835,11 +746,13 @@ extern struct list_head ftrace_events;
835extern const char *__start___trace_bprintk_fmt[]; 746extern const char *__start___trace_bprintk_fmt[];
836extern const char *__stop___trace_bprintk_fmt[]; 747extern const char *__stop___trace_bprintk_fmt[];
837 748
838#undef TRACE_EVENT_FORMAT 749#undef FTRACE_ENTRY
839#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 750#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
840 extern struct ftrace_event_call event_##call; 751 extern struct ftrace_event_call \
841#undef TRACE_EVENT_FORMAT_NOFILTER 752 __attribute__((__aligned__(4))) event_##call;
842#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) 753#undef FTRACE_ENTRY_DUP
843#include "trace_event_types.h" 754#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
755 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
756#include "trace_entries.h"
844 757
845#endif /* _LINUX_KERNEL_TRACE_H */ 758#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644
index a29ef23ffb47..000000000000
--- a/kernel/trace/trace_boot.c
+++ /dev/null
@@ -1,179 +0,0 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12#include <linux/time.h>
13
14#include "trace.h"
15#include "trace_output.h"
16
17static struct trace_array *boot_trace;
18static bool pre_initcalls_finished;
19
20/* Tells the boot tracer that the pre_smp_initcalls are finished.
21 * So we are ready .
22 * It doesn't enable sched events tracing however.
23 * You have to call enable_boot_trace to do so.
24 */
25void start_boot_trace(void)
26{
27 pre_initcalls_finished = true;
28}
29
30void enable_boot_trace(void)
31{
32 if (boot_trace && pre_initcalls_finished)
33 tracing_start_sched_switch_record();
34}
35
36void disable_boot_trace(void)
37{
38 if (boot_trace && pre_initcalls_finished)
39 tracing_stop_sched_switch_record();
40}
41
42static int boot_trace_init(struct trace_array *tr)
43{
44 int cpu;
45 boot_trace = tr;
46
47 if (!tr)
48 return 0;
49
50 for_each_cpu(cpu, cpu_possible_mask)
51 tracing_reset(tr, cpu);
52
53 tracing_sched_switch_assign_trace(tr);
54 return 0;
55}
56
57static enum print_line_t
58initcall_call_print_line(struct trace_iterator *iter)
59{
60 struct trace_entry *entry = iter->ent;
61 struct trace_seq *s = &iter->seq;
62 struct trace_boot_call *field;
63 struct boot_trace_call *call;
64 u64 ts;
65 unsigned long nsec_rem;
66 int ret;
67
68 trace_assign_type(field, entry);
69 call = &field->boot_call;
70 ts = iter->ts;
71 nsec_rem = do_div(ts, NSEC_PER_SEC);
72
73 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
74 (unsigned long)ts, nsec_rem, call->func, call->caller);
75
76 if (!ret)
77 return TRACE_TYPE_PARTIAL_LINE;
78 else
79 return TRACE_TYPE_HANDLED;
80}
81
82static enum print_line_t
83initcall_ret_print_line(struct trace_iterator *iter)
84{
85 struct trace_entry *entry = iter->ent;
86 struct trace_seq *s = &iter->seq;
87 struct trace_boot_ret *field;
88 struct boot_trace_ret *init_ret;
89 u64 ts;
90 unsigned long nsec_rem;
91 int ret;
92
93 trace_assign_type(field, entry);
94 init_ret = &field->boot_ret;
95 ts = iter->ts;
96 nsec_rem = do_div(ts, NSEC_PER_SEC);
97
98 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
99 "returned %d after %llu msecs\n",
100 (unsigned long) ts,
101 nsec_rem,
102 init_ret->func, init_ret->result, init_ret->duration);
103
104 if (!ret)
105 return TRACE_TYPE_PARTIAL_LINE;
106 else
107 return TRACE_TYPE_HANDLED;
108}
109
110static enum print_line_t initcall_print_line(struct trace_iterator *iter)
111{
112 struct trace_entry *entry = iter->ent;
113
114 switch (entry->type) {
115 case TRACE_BOOT_CALL:
116 return initcall_call_print_line(iter);
117 case TRACE_BOOT_RET:
118 return initcall_ret_print_line(iter);
119 default:
120 return TRACE_TYPE_UNHANDLED;
121 }
122}
123
124struct tracer boot_tracer __read_mostly =
125{
126 .name = "initcall",
127 .init = boot_trace_init,
128 .reset = tracing_reset_online_cpus,
129 .print_line = initcall_print_line,
130};
131
132void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
133{
134 struct ring_buffer_event *event;
135 struct trace_boot_call *entry;
136 struct trace_array *tr = boot_trace;
137
138 if (!tr || !pre_initcalls_finished)
139 return;
140
141 /* Get its name now since this function could
142 * disappear because it is in the .init section.
143 */
144 sprint_symbol(bt->func, (unsigned long)fn);
145 preempt_disable();
146
147 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
148 sizeof(*entry), 0, 0);
149 if (!event)
150 goto out;
151 entry = ring_buffer_event_data(event);
152 entry->boot_call = *bt;
153 trace_buffer_unlock_commit(tr, event, 0, 0);
154 out:
155 preempt_enable();
156}
157
158void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
159{
160 struct ring_buffer_event *event;
161 struct trace_boot_ret *entry;
162 struct trace_array *tr = boot_trace;
163
164 if (!tr || !pre_initcalls_finished)
165 return;
166
167 sprint_symbol(bt->func, (unsigned long)fn);
168 preempt_disable();
169
170 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
171 sizeof(*entry), 0, 0);
172 if (!event)
173 goto out;
174 entry = ring_buffer_event_data(event);
175 entry->boot_ret = *bt;
176 trace_buffer_unlock_commit(tr, event, 0, 0);
177 out:
178 preempt_enable();
179}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 7a7a9fd249a9..8d3538b4ea5f 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -34,6 +34,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
34 struct trace_array *tr = branch_tracer; 34 struct trace_array *tr = branch_tracer;
35 struct ring_buffer_event *event; 35 struct ring_buffer_event *event;
36 struct trace_branch *entry; 36 struct trace_branch *entry;
37 struct ring_buffer *buffer;
37 unsigned long flags; 38 unsigned long flags;
38 int cpu, pc; 39 int cpu, pc;
39 const char *p; 40 const char *p;
@@ -54,7 +55,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
54 goto out; 55 goto out;
55 56
56 pc = preempt_count(); 57 pc = preempt_count();
57 event = trace_buffer_lock_reserve(tr, TRACE_BRANCH, 58 buffer = tr->buffer;
59 event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH,
58 sizeof(*entry), flags, pc); 60 sizeof(*entry), flags, pc);
59 if (!event) 61 if (!event)
60 goto out; 62 goto out;
@@ -74,8 +76,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
74 entry->line = f->line; 76 entry->line = f->line;
75 entry->correct = val == expect; 77 entry->correct = val == expect;
76 78
77 if (!filter_check_discard(call, entry, tr->buffer, event)) 79 if (!filter_check_discard(call, entry, buffer, event))
78 ring_buffer_unlock_commit(tr->buffer, event); 80 ring_buffer_unlock_commit(buffer, event);
79 81
80 out: 82 out:
81 atomic_dec(&tr->data[cpu]->disabled); 83 atomic_dec(&tr->data[cpu]->disabled);
@@ -141,7 +143,7 @@ static void branch_trace_reset(struct trace_array *tr)
141} 143}
142 144
143static enum print_line_t trace_branch_print(struct trace_iterator *iter, 145static enum print_line_t trace_branch_print(struct trace_iterator *iter,
144 int flags) 146 int flags, struct trace_event *event)
145{ 147{
146 struct trace_branch *field; 148 struct trace_branch *field;
147 149
@@ -165,9 +167,13 @@ static void branch_print_header(struct seq_file *s)
165 " |\n"); 167 " |\n");
166} 168}
167 169
170static struct trace_event_functions trace_branch_funcs = {
171 .trace = trace_branch_print,
172};
173
168static struct trace_event trace_branch_event = { 174static struct trace_event trace_branch_event = {
169 .type = TRACE_BRANCH, 175 .type = TRACE_BRANCH,
170 .trace = trace_branch_print, 176 .funcs = &trace_branch_funcs,
171}; 177};
172 178
173static struct tracer branch_trace __read_mostly = 179static struct tracer branch_trace __read_mostly =
@@ -305,8 +311,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
305 return -1; 311 return -1;
306 if (percent_a > percent_b) 312 if (percent_a > percent_b)
307 return 1; 313 return 1;
308 else 314
309 return 0; 315 if (a->incorrect < b->incorrect)
316 return -1;
317 if (a->incorrect > b->incorrect)
318 return 1;
319
320 /*
321 * Since the above shows worse (incorrect) cases
322 * first, we continue that by showing best (correct)
323 * cases last.
324 */
325 if (a->correct > b->correct)
326 return -1;
327 if (a->correct < b->correct)
328 return 1;
329
330 return 0;
310} 331}
311 332
312static struct tracer_stat annotated_branch_stats = { 333static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index b588fd81f7f9..685a67d55db0 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
@@ -20,6 +21,8 @@
20#include <linux/ktime.h> 21#include <linux/ktime.h>
21#include <linux/trace_clock.h> 22#include <linux/trace_clock.h>
22 23
24#include "trace.h"
25
23/* 26/*
24 * trace_clock_local(): the simplest and least coherent tracing clock. 27 * trace_clock_local(): the simplest and least coherent tracing clock.
25 * 28 *
@@ -28,7 +31,6 @@
28 */ 31 */
29u64 notrace trace_clock_local(void) 32u64 notrace trace_clock_local(void)
30{ 33{
31 unsigned long flags;
32 u64 clock; 34 u64 clock;
33 35
34 /* 36 /*
@@ -36,9 +38,9 @@ u64 notrace trace_clock_local(void)
36 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
37 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
38 */ 40 */
39 raw_local_irq_save(flags); 41 preempt_disable_notrace();
40 clock = sched_clock(); 42 clock = sched_clock();
41 raw_local_irq_restore(flags); 43 preempt_enable_notrace();
42 44
43 return clock; 45 return clock;
44} 46}
@@ -53,7 +55,7 @@ u64 notrace trace_clock_local(void)
53 */ 55 */
54u64 notrace trace_clock(void) 56u64 notrace trace_clock(void)
55{ 57{
56 return cpu_clock(raw_smp_processor_id()); 58 return local_clock();
57} 59}
58 60
59 61
@@ -66,10 +68,14 @@ u64 notrace trace_clock(void)
66 * Used by plugins that need globally coherent timestamps. 68 * Used by plugins that need globally coherent timestamps.
67 */ 69 */
68 70
69static u64 prev_trace_clock_time; 71/* keep prev_time and lock in the same cacheline. */
70 72static struct {
71static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = 73 u64 prev_time;
72 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 74 arch_spinlock_t lock;
75} trace_clock_struct ____cacheline_aligned_in_smp =
76 {
77 .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
78 };
73 79
74u64 notrace trace_clock_global(void) 80u64 notrace trace_clock_global(void)
75{ 81{
@@ -77,7 +83,7 @@ u64 notrace trace_clock_global(void)
77 int this_cpu; 83 int this_cpu;
78 u64 now; 84 u64 now;
79 85
80 raw_local_irq_save(flags); 86 local_irq_save(flags);
81 87
82 this_cpu = raw_smp_processor_id(); 88 this_cpu = raw_smp_processor_id();
83 now = cpu_clock(this_cpu); 89 now = cpu_clock(this_cpu);
@@ -88,22 +94,22 @@ u64 notrace trace_clock_global(void)
88 if (unlikely(in_nmi())) 94 if (unlikely(in_nmi()))
89 goto out; 95 goto out;
90 96
91 __raw_spin_lock(&trace_clock_lock); 97 arch_spin_lock(&trace_clock_struct.lock);
92 98
93 /* 99 /*
94 * TODO: if this happens often then maybe we should reset 100 * TODO: if this happens often then maybe we should reset
95 * my_scd->clock to prev_trace_clock_time+1, to make sure 101 * my_scd->clock to prev_time+1, to make sure
96 * we start ticking with the local clock from now on? 102 * we start ticking with the local clock from now on?
97 */ 103 */
98 if ((s64)(now - prev_trace_clock_time) < 0) 104 if ((s64)(now - trace_clock_struct.prev_time) < 0)
99 now = prev_trace_clock_time + 1; 105 now = trace_clock_struct.prev_time + 1;
100 106
101 prev_trace_clock_time = now; 107 trace_clock_struct.prev_time = now;
102 108
103 __raw_spin_unlock(&trace_clock_lock); 109 arch_spin_unlock(&trace_clock_struct.lock);
104 110
105 out: 111 out:
106 raw_local_irq_restore(flags); 112 local_irq_restore(flags);
107 113
108 return now; 114 return now;
109} 115}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
new file mode 100644
index 000000000000..e3dfecaf13e6
--- /dev/null
+++ b/kernel/trace/trace_entries.h
@@ -0,0 +1,276 @@
1/*
2 * This file defines the trace event structures that go into the ring
3 * buffer directly. They are created via macros so that changes for them
4 * appear in the format file. Using macros will automate this process.
5 *
6 * The macro used to create a ftrace data structure is:
7 *
8 * FTRACE_ENTRY( name, struct_name, id, structure, print )
9 *
10 * @name: the name used the event name, as well as the name of
11 * the directory that holds the format file.
12 *
13 * @struct_name: the name of the structure that is created.
14 *
15 * @id: The event identifier that is used to detect what event
16 * this is from the ring buffer.
17 *
18 * @structure: the structure layout
19 *
20 * - __field( type, item )
21 * This is equivalent to declaring
22 * type item;
23 * in the structure.
24 * - __array( type, item, size )
25 * This is equivalent to declaring
26 * type item[size];
27 * in the structure.
28 *
29 * * for structures within structures, the format of the internal
30 * structure is layed out. This allows the internal structure
31 * to be deciphered for the format file. Although these macros
32 * may become out of sync with the internal structure, they
33 * will create a compile error if it happens. Since the
34 * internel structures are just tracing helpers, this is not
35 * an issue.
36 *
37 * When an internal structure is used, it should use:
38 *
39 * __field_struct( type, item )
40 *
41 * instead of __field. This will prevent it from being shown in
42 * the output file. The fields in the structure should use.
43 *
44 * __field_desc( type, container, item )
45 * __array_desc( type, container, item, len )
46 *
47 * type, item and len are the same as __field and __array, but
48 * container is added. This is the name of the item in
49 * __field_struct that this is describing.
50 *
51 *
52 * @print: the print format shown to users in the format file.
53 */
54
55/*
56 * Function trace entry - function address and parent function addres:
57 */
58FTRACE_ENTRY(function, ftrace_entry,
59
60 TRACE_FN,
61
62 F_STRUCT(
63 __field( unsigned long, ip )
64 __field( unsigned long, parent_ip )
65 ),
66
67 F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip)
68);
69
70/* Function call entry */
71FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
72
73 TRACE_GRAPH_ENT,
74
75 F_STRUCT(
76 __field_struct( struct ftrace_graph_ent, graph_ent )
77 __field_desc( unsigned long, graph_ent, func )
78 __field_desc( int, graph_ent, depth )
79 ),
80
81 F_printk("--> %lx (%d)", __entry->func, __entry->depth)
82);
83
84/* Function return entry */
85FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
86
87 TRACE_GRAPH_RET,
88
89 F_STRUCT(
90 __field_struct( struct ftrace_graph_ret, ret )
91 __field_desc( unsigned long, ret, func )
92 __field_desc( unsigned long long, ret, calltime)
93 __field_desc( unsigned long long, ret, rettime )
94 __field_desc( unsigned long, ret, overrun )
95 __field_desc( int, ret, depth )
96 ),
97
98 F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
99 __entry->func, __entry->depth,
100 __entry->calltime, __entry->rettime,
101 __entry->depth)
102);
103
104/*
105 * Context switch trace entry - which task (and prio) we switched from/to:
106 *
107 * This is used for both wakeup and context switches. We only want
108 * to create one structure, but we need two outputs for it.
109 */
110#define FTRACE_CTX_FIELDS \
111 __field( unsigned int, prev_pid ) \
112 __field( unsigned char, prev_prio ) \
113 __field( unsigned char, prev_state ) \
114 __field( unsigned int, next_pid ) \
115 __field( unsigned char, next_prio ) \
116 __field( unsigned char, next_state ) \
117 __field( unsigned int, next_cpu )
118
119FTRACE_ENTRY(context_switch, ctx_switch_entry,
120
121 TRACE_CTX,
122
123 F_STRUCT(
124 FTRACE_CTX_FIELDS
125 ),
126
127 F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
128 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
129 __entry->next_pid, __entry->next_prio, __entry->next_state,
130 __entry->next_cpu
131 )
132);
133
134/*
135 * FTRACE_ENTRY_DUP only creates the format file, it will not
136 * create another structure.
137 */
138FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
139
140 TRACE_WAKE,
141
142 F_STRUCT(
143 FTRACE_CTX_FIELDS
144 ),
145
146 F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
147 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
148 __entry->next_pid, __entry->next_prio, __entry->next_state,
149 __entry->next_cpu
150 )
151);
152
153/*
154 * Stack-trace entry:
155 */
156
157#define FTRACE_STACK_ENTRIES 8
158
159FTRACE_ENTRY(kernel_stack, stack_entry,
160
161 TRACE_STACK,
162
163 F_STRUCT(
164 __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
165 ),
166
167 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
168 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
169 __entry->caller[0], __entry->caller[1], __entry->caller[2],
170 __entry->caller[3], __entry->caller[4], __entry->caller[5],
171 __entry->caller[6], __entry->caller[7])
172);
173
174FTRACE_ENTRY(user_stack, userstack_entry,
175
176 TRACE_USER_STACK,
177
178 F_STRUCT(
179 __field( unsigned int, tgid )
180 __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
181 ),
182
183 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
184 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
185 __entry->caller[0], __entry->caller[1], __entry->caller[2],
186 __entry->caller[3], __entry->caller[4], __entry->caller[5],
187 __entry->caller[6], __entry->caller[7])
188);
189
190/*
191 * trace_printk entry:
192 */
193FTRACE_ENTRY(bprint, bprint_entry,
194
195 TRACE_BPRINT,
196
197 F_STRUCT(
198 __field( unsigned long, ip )
199 __field( const char *, fmt )
200 __dynamic_array( u32, buf )
201 ),
202
203 F_printk("%08lx fmt:%p",
204 __entry->ip, __entry->fmt)
205);
206
207FTRACE_ENTRY(print, print_entry,
208
209 TRACE_PRINT,
210
211 F_STRUCT(
212 __field( unsigned long, ip )
213 __dynamic_array( char, buf )
214 ),
215
216 F_printk("%08lx %s",
217 __entry->ip, __entry->buf)
218);
219
220FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
221
222 TRACE_MMIO_RW,
223
224 F_STRUCT(
225 __field_struct( struct mmiotrace_rw, rw )
226 __field_desc( resource_size_t, rw, phys )
227 __field_desc( unsigned long, rw, value )
228 __field_desc( unsigned long, rw, pc )
229 __field_desc( int, rw, map_id )
230 __field_desc( unsigned char, rw, opcode )
231 __field_desc( unsigned char, rw, width )
232 ),
233
234 F_printk("%lx %lx %lx %d %x %x",
235 (unsigned long)__entry->phys, __entry->value, __entry->pc,
236 __entry->map_id, __entry->opcode, __entry->width)
237);
238
239FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
240
241 TRACE_MMIO_MAP,
242
243 F_STRUCT(
244 __field_struct( struct mmiotrace_map, map )
245 __field_desc( resource_size_t, map, phys )
246 __field_desc( unsigned long, map, virt )
247 __field_desc( unsigned long, map, len )
248 __field_desc( int, map, map_id )
249 __field_desc( unsigned char, map, opcode )
250 ),
251
252 F_printk("%lx %lx %lx %d %x",
253 (unsigned long)__entry->phys, __entry->virt, __entry->len,
254 __entry->map_id, __entry->opcode)
255);
256
257
258#define TRACE_FUNC_SIZE 30
259#define TRACE_FILE_SIZE 20
260
261FTRACE_ENTRY(branch, trace_branch,
262
263 TRACE_BRANCH,
264
265 F_STRUCT(
266 __field( unsigned int, line )
267 __array( char, func, TRACE_FUNC_SIZE+1 )
268 __array( char, file, TRACE_FILE_SIZE+1 )
269 __field( char, correct )
270 ),
271
272 F_printk("%u:%s:%s (%u)",
273 __entry->line,
274 __entry->func, __entry->file, __entry->correct)
275);
276
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
new file mode 100644
index 000000000000..19a359d5e6d5
--- /dev/null
+++ b/kernel/trace/trace_event_perf.c
@@ -0,0 +1,216 @@
1/*
2 * trace event based perf event profiling/tracing
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */
7
8#include <linux/module.h>
9#include <linux/kprobes.h>
10#include "trace.h"
11
12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13
14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses
16 * suprises
17 */
18typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
19 perf_trace_t;
20
21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count;
23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
48 struct perf_event *p_event)
49{
50 struct hlist_head __percpu *list;
51 int ret;
52 int cpu;
53
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
58 p_event->tp_event = tp_event;
59 if (tp_event->perf_refcount++ > 0)
60 return 0;
61
62 ret = -ENOMEM;
63
64 list = alloc_percpu(struct hlist_head);
65 if (!list)
66 goto fail;
67
68 for_each_possible_cpu(cpu)
69 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
70
71 tp_event->perf_events = list;
72
73 if (!total_ref_count) {
74 char __percpu *buf;
75 int i;
76
77 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
78 buf = (char __percpu *)alloc_percpu(perf_trace_t);
79 if (!buf)
80 goto fail;
81
82 perf_trace_buf[i] = buf;
83 }
84 }
85
86 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
87 if (ret)
88 goto fail;
89
90 total_ref_count++;
91 return 0;
92
93fail:
94 if (!total_ref_count) {
95 int i;
96
97 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
98 free_percpu(perf_trace_buf[i]);
99 perf_trace_buf[i] = NULL;
100 }
101 }
102
103 if (!--tp_event->perf_refcount) {
104 free_percpu(tp_event->perf_events);
105 tp_event->perf_events = NULL;
106 }
107
108 return ret;
109}
110
111int perf_trace_init(struct perf_event *p_event)
112{
113 struct ftrace_event_call *tp_event;
114 int event_id = p_event->attr.config;
115 int ret = -EINVAL;
116
117 mutex_lock(&event_mutex);
118 list_for_each_entry(tp_event, &ftrace_events, list) {
119 if (tp_event->event.type == event_id &&
120 tp_event->class && tp_event->class->reg &&
121 try_module_get(tp_event->mod)) {
122 ret = perf_trace_event_init(tp_event, p_event);
123 if (ret)
124 module_put(tp_event->mod);
125 break;
126 }
127 }
128 mutex_unlock(&event_mutex);
129
130 return ret;
131}
132
133int perf_trace_add(struct perf_event *p_event, int flags)
134{
135 struct ftrace_event_call *tp_event = p_event->tp_event;
136 struct hlist_head __percpu *pcpu_list;
137 struct hlist_head *list;
138
139 pcpu_list = tp_event->perf_events;
140 if (WARN_ON_ONCE(!pcpu_list))
141 return -EINVAL;
142
143 if (!(flags & PERF_EF_START))
144 p_event->hw.state = PERF_HES_STOPPED;
145
146 list = this_cpu_ptr(pcpu_list);
147 hlist_add_head_rcu(&p_event->hlist_entry, list);
148
149 return 0;
150}
151
152void perf_trace_del(struct perf_event *p_event, int flags)
153{
154 hlist_del_rcu(&p_event->hlist_entry);
155}
156
157void perf_trace_destroy(struct perf_event *p_event)
158{
159 struct ftrace_event_call *tp_event = p_event->tp_event;
160 int i;
161
162 mutex_lock(&event_mutex);
163 if (--tp_event->perf_refcount > 0)
164 goto out;
165
166 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
167
168 /*
169 * Ensure our callback won't be called anymore. The buffers
170 * will be freed after that.
171 */
172 tracepoint_synchronize_unregister();
173
174 free_percpu(tp_event->perf_events);
175 tp_event->perf_events = NULL;
176
177 if (!--total_ref_count) {
178 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
179 free_percpu(perf_trace_buf[i]);
180 perf_trace_buf[i] = NULL;
181 }
182 }
183out:
184 module_put(tp_event->mod);
185 mutex_unlock(&event_mutex);
186}
187
188__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
189 struct pt_regs *regs, int *rctxp)
190{
191 struct trace_entry *entry;
192 unsigned long flags;
193 char *raw_data;
194 int pc;
195
196 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
197
198 pc = preempt_count();
199
200 *rctxp = perf_swevent_get_recursion_context();
201 if (*rctxp < 0)
202 return NULL;
203
204 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
205
206 /* zero the dead bytes from align to not leak stack to user */
207 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
208
209 entry = (struct trace_entry *)raw_data;
210 local_save_flags(flags);
211 tracing_generic_entry_update(entry, flags, pc);
212 entry->type = type;
213
214 return raw_data;
215}
216EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
deleted file mode 100644
index 5b5895afecfe..000000000000
--- a/kernel/trace/trace_event_profile.c
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * trace event based perf counter profiling
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 */
7
8#include "trace.h"
9
10int ftrace_profile_enable(int event_id)
11{
12 struct ftrace_event_call *event;
13 int ret = -EINVAL;
14
15 mutex_lock(&event_mutex);
16 list_for_each_entry(event, &ftrace_events, list) {
17 if (event->id == event_id) {
18 ret = event->profile_enable(event);
19 break;
20 }
21 }
22 mutex_unlock(&event_mutex);
23
24 return ret;
25}
26
27void ftrace_profile_disable(int event_id)
28{
29 struct ftrace_event_call *event;
30
31 mutex_lock(&event_mutex);
32 list_for_each_entry(event, &ftrace_events, list) {
33 if (event->id == event_id) {
34 event->profile_disable(event);
35 break;
36 }
37 }
38 mutex_unlock(&event_mutex);
39}
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
deleted file mode 100644
index 5e32e375134d..000000000000
--- a/kernel/trace/trace_event_types.h
+++ /dev/null
@@ -1,175 +0,0 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ftrace
3
4/*
5 * We cheat and use the proto type field as the ID
6 * and args as the entry type (minus 'struct')
7 */
8TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore,
9 TRACE_STRUCT(
10 TRACE_FIELD(unsigned long, ip, ip)
11 TRACE_FIELD(unsigned long, parent_ip, parent_ip)
12 ),
13 TP_RAW_FMT(" %lx <-- %lx")
14);
15
16TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT,
17 ftrace_graph_ent_entry, ignore,
18 TRACE_STRUCT(
19 TRACE_FIELD(unsigned long, graph_ent.func, func)
20 TRACE_FIELD(int, graph_ent.depth, depth)
21 ),
22 TP_RAW_FMT("--> %lx (%d)")
23);
24
25TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(int, ret.depth, depth)
30 ),
31 TP_RAW_FMT("<-- %lx (%d)")
32);
33
34TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore,
35 TRACE_STRUCT(
36 TRACE_FIELD(unsigned int, prev_pid, prev_pid)
37 TRACE_FIELD(unsigned char, prev_prio, prev_prio)
38 TRACE_FIELD(unsigned char, prev_state, prev_state)
39 TRACE_FIELD(unsigned int, next_pid, next_pid)
40 TRACE_FIELD(unsigned char, next_prio, next_prio)
41 TRACE_FIELD(unsigned char, next_state, next_state)
42 TRACE_FIELD(unsigned int, next_cpu, next_cpu)
43 ),
44 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
45);
46
47TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
48 TRACE_STRUCT(
49 TRACE_FIELD(unsigned int, prev_pid, prev_pid)
50 TRACE_FIELD(unsigned char, prev_prio, prev_prio)
51 TRACE_FIELD(unsigned char, prev_state, prev_state)
52 TRACE_FIELD(unsigned int, next_pid, next_pid)
53 TRACE_FIELD(unsigned char, next_prio, next_prio)
54 TRACE_FIELD(unsigned char, next_state, next_state)
55 TRACE_FIELD(unsigned int, next_cpu, next_cpu)
56 ),
57 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
58);
59
60TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
61 TRACE_STRUCT(
62 TRACE_FIELD(unsigned long, arg1, arg1)
63 TRACE_FIELD(unsigned long, arg2, arg2)
64 TRACE_FIELD(unsigned long, arg3, arg3)
65 ),
66 TP_RAW_FMT("(%08lx) (%08lx) (%08lx)")
67);
68
69/*
70 * Stack-trace entry:
71 */
72
73/* #define FTRACE_STACK_ENTRIES 8 */
74
75TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore,
76 TRACE_STRUCT(
77 TRACE_FIELD(unsigned long, caller[0], stack0)
78 TRACE_FIELD(unsigned long, caller[1], stack1)
79 TRACE_FIELD(unsigned long, caller[2], stack2)
80 TRACE_FIELD(unsigned long, caller[3], stack3)
81 TRACE_FIELD(unsigned long, caller[4], stack4)
82 TRACE_FIELD(unsigned long, caller[5], stack5)
83 TRACE_FIELD(unsigned long, caller[6], stack6)
84 TRACE_FIELD(unsigned long, caller[7], stack7)
85 ),
86 TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
87 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
88);
89
90TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore,
91 TRACE_STRUCT(
92 TRACE_FIELD(unsigned long, caller[0], stack0)
93 TRACE_FIELD(unsigned long, caller[1], stack1)
94 TRACE_FIELD(unsigned long, caller[2], stack2)
95 TRACE_FIELD(unsigned long, caller[3], stack3)
96 TRACE_FIELD(unsigned long, caller[4], stack4)
97 TRACE_FIELD(unsigned long, caller[5], stack5)
98 TRACE_FIELD(unsigned long, caller[6], stack6)
99 TRACE_FIELD(unsigned long, caller[7], stack7)
100 ),
101 TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
102 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
103);
104
105TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
106 TRACE_STRUCT(
107 TRACE_FIELD(unsigned long, ip, ip)
108 TRACE_FIELD(char *, fmt, fmt)
109 TRACE_FIELD_ZERO_CHAR(buf)
110 ),
111 TP_RAW_FMT("%08lx (%d) fmt:%p %s")
112);
113
114TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
115 TRACE_STRUCT(
116 TRACE_FIELD(unsigned long, ip, ip)
117 TRACE_FIELD_ZERO_CHAR(buf)
118 ),
119 TP_RAW_FMT("%08lx (%d) fmt:%p %s")
120);
121
122TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
123 TRACE_STRUCT(
124 TRACE_FIELD(unsigned int, line, line)
125 TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
126 TRACE_FUNC_SIZE+1, func)
127 TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
128 TRACE_FUNC_SIZE+1, file)
129 TRACE_FIELD(char, correct, correct)
130 ),
131 TP_RAW_FMT("%u:%s:%s (%u)")
132);
133
134TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
135 TRACE_STRUCT(
136 TRACE_FIELD(u64, from, from)
137 TRACE_FIELD(u64, to, to)
138 ),
139 TP_RAW_FMT("from: %llx to: %llx")
140);
141
142TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
143 TRACE_STRUCT(
144 TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
145 TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
146 TRACE_FIELD(int, state_data.type, type)
147 TRACE_FIELD(int, state_data.state, state)
148 ),
149 TP_RAW_FMT("%llx->%llx type:%u state:%u")
150);
151
152TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore,
153 TRACE_STRUCT(
154 TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
155 TRACE_FIELD(unsigned long, call_site, call_site)
156 TRACE_FIELD(const void *, ptr, ptr)
157 TRACE_FIELD(size_t, bytes_req, bytes_req)
158 TRACE_FIELD(size_t, bytes_alloc, bytes_alloc)
159 TRACE_FIELD(gfp_t, gfp_flags, gfp_flags)
160 TRACE_FIELD(int, node, node)
161 ),
162 TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu"
163 " flags:%x node:%d")
164);
165
166TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore,
167 TRACE_STRUCT(
168 TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
169 TRACE_FIELD(unsigned long, call_site, call_site)
170 TRACE_FIELD(const void *, ptr, ptr)
171 ),
172 TP_RAW_FMT("type:%u call_site:%lx ptr:%p")
173);
174
175#undef TRACE_SYSTEM
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index aa08be69a1b6..35fde09b81de 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,18 +15,38 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/slab.h>
18#include <linux/delay.h> 19#include <linux/delay.h>
19 20
21#include <asm/setup.h>
22
20#include "trace_output.h" 23#include "trace_output.h"
21 24
25#undef TRACE_SYSTEM
22#define TRACE_SYSTEM "TRACE_SYSTEM" 26#define TRACE_SYSTEM "TRACE_SYSTEM"
23 27
24DEFINE_MUTEX(event_mutex); 28DEFINE_MUTEX(event_mutex);
25 29
30DEFINE_MUTEX(event_storage_mutex);
31EXPORT_SYMBOL_GPL(event_storage_mutex);
32
33char event_storage[EVENT_STORAGE_SIZE];
34EXPORT_SYMBOL_GPL(event_storage);
35
26LIST_HEAD(ftrace_events); 36LIST_HEAD(ftrace_events);
37LIST_HEAD(ftrace_common_fields);
27 38
28int trace_define_field(struct ftrace_event_call *call, char *type, 39struct list_head *
29 char *name, int offset, int size, int is_signed) 40trace_get_fields(struct ftrace_event_call *event_call)
41{
42 if (!event_call->class->get_fields)
43 return &event_call->class->fields;
44 return event_call->class->get_fields(event_call);
45}
46
47static int __trace_define_field(struct list_head *head, const char *type,
48 const char *name, int offset, int size,
49 int is_signed, int filter_type)
30{ 50{
31 struct ftrace_event_field *field; 51 struct ftrace_event_field *field;
32 52
@@ -42,31 +62,72 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
42 if (!field->type) 62 if (!field->type)
43 goto err; 63 goto err;
44 64
65 if (filter_type == FILTER_OTHER)
66 field->filter_type = filter_assign_type(type);
67 else
68 field->filter_type = filter_type;
69
45 field->offset = offset; 70 field->offset = offset;
46 field->size = size; 71 field->size = size;
47 field->is_signed = is_signed; 72 field->is_signed = is_signed;
48 list_add(&field->link, &call->fields); 73
74 list_add(&field->link, head);
49 75
50 return 0; 76 return 0;
51 77
52err: 78err:
53 if (field) { 79 if (field)
54 kfree(field->name); 80 kfree(field->name);
55 kfree(field->type);
56 }
57 kfree(field); 81 kfree(field);
58 82
59 return -ENOMEM; 83 return -ENOMEM;
60} 84}
85
86int trace_define_field(struct ftrace_event_call *call, const char *type,
87 const char *name, int offset, int size, int is_signed,
88 int filter_type)
89{
90 struct list_head *head;
91
92 if (WARN_ON(!call->class))
93 return 0;
94
95 head = trace_get_fields(call);
96 return __trace_define_field(head, type, name, offset, size,
97 is_signed, filter_type);
98}
61EXPORT_SYMBOL_GPL(trace_define_field); 99EXPORT_SYMBOL_GPL(trace_define_field);
62 100
63#ifdef CONFIG_MODULES 101#define __common_field(type, item) \
102 ret = __trace_define_field(&ftrace_common_fields, #type, \
103 "common_" #item, \
104 offsetof(typeof(ent), item), \
105 sizeof(ent.item), \
106 is_signed_type(type), FILTER_OTHER); \
107 if (ret) \
108 return ret;
109
110static int trace_define_common_fields(void)
111{
112 int ret;
113 struct trace_entry ent;
114
115 __common_field(unsigned short, type);
116 __common_field(unsigned char, flags);
117 __common_field(unsigned char, preempt_count);
118 __common_field(int, pid);
119 __common_field(int, lock_depth);
120
121 return ret;
122}
64 123
65static void trace_destroy_fields(struct ftrace_event_call *call) 124void trace_destroy_fields(struct ftrace_event_call *call)
66{ 125{
67 struct ftrace_event_field *field, *next; 126 struct ftrace_event_field *field, *next;
127 struct list_head *head;
68 128
69 list_for_each_entry_safe(field, next, &call->fields, link) { 129 head = trace_get_fields(call);
130 list_for_each_entry_safe(field, next, head, link) {
70 list_del(&field->link); 131 list_del(&field->link);
71 kfree(field->type); 132 kfree(field->type);
72 kfree(field->name); 133 kfree(field->name);
@@ -74,27 +135,102 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
74 } 135 }
75} 136}
76 137
77#endif /* CONFIG_MODULES */ 138int trace_event_raw_init(struct ftrace_event_call *call)
139{
140 int id;
141
142 id = register_ftrace_event(&call->event);
143 if (!id)
144 return -ENODEV;
145
146 return 0;
147}
148EXPORT_SYMBOL_GPL(trace_event_raw_init);
149
150int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
151{
152 switch (type) {
153 case TRACE_REG_REGISTER:
154 return tracepoint_probe_register(call->name,
155 call->class->probe,
156 call);
157 case TRACE_REG_UNREGISTER:
158 tracepoint_probe_unregister(call->name,
159 call->class->probe,
160 call);
161 return 0;
162
163#ifdef CONFIG_PERF_EVENTS
164 case TRACE_REG_PERF_REGISTER:
165 return tracepoint_probe_register(call->name,
166 call->class->perf_probe,
167 call);
168 case TRACE_REG_PERF_UNREGISTER:
169 tracepoint_probe_unregister(call->name,
170 call->class->perf_probe,
171 call);
172 return 0;
173#endif
174 }
175 return 0;
176}
177EXPORT_SYMBOL_GPL(ftrace_event_reg);
178
179void trace_event_enable_cmd_record(bool enable)
180{
181 struct ftrace_event_call *call;
182
183 mutex_lock(&event_mutex);
184 list_for_each_entry(call, &ftrace_events, list) {
185 if (!(call->flags & TRACE_EVENT_FL_ENABLED))
186 continue;
187
188 if (enable) {
189 tracing_start_cmdline_record();
190 call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
191 } else {
192 tracing_stop_cmdline_record();
193 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
194 }
195 }
196 mutex_unlock(&event_mutex);
197}
78 198
79static void ftrace_event_enable_disable(struct ftrace_event_call *call, 199static int ftrace_event_enable_disable(struct ftrace_event_call *call,
80 int enable) 200 int enable)
81{ 201{
202 int ret = 0;
203
82 switch (enable) { 204 switch (enable) {
83 case 0: 205 case 0:
84 if (call->enabled) { 206 if (call->flags & TRACE_EVENT_FL_ENABLED) {
85 call->enabled = 0; 207 call->flags &= ~TRACE_EVENT_FL_ENABLED;
86 tracing_stop_cmdline_record(); 208 if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
87 call->unregfunc(); 209 tracing_stop_cmdline_record();
210 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
211 }
212 call->class->reg(call, TRACE_REG_UNREGISTER);
88 } 213 }
89 break; 214 break;
90 case 1: 215 case 1:
91 if (!call->enabled) { 216 if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
92 call->enabled = 1; 217 if (trace_flags & TRACE_ITER_RECORD_CMD) {
93 tracing_start_cmdline_record(); 218 tracing_start_cmdline_record();
94 call->regfunc(); 219 call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
220 }
221 ret = call->class->reg(call, TRACE_REG_REGISTER);
222 if (ret) {
223 tracing_stop_cmdline_record();
224 pr_info("event trace: Could not enable event "
225 "%s\n", call->name);
226 break;
227 }
228 call->flags |= TRACE_EVENT_FL_ENABLED;
95 } 229 }
96 break; 230 break;
97 } 231 }
232
233 return ret;
98} 234}
99 235
100static void ftrace_clear_events(void) 236static void ftrace_clear_events(void)
@@ -120,15 +256,15 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
120 mutex_lock(&event_mutex); 256 mutex_lock(&event_mutex);
121 list_for_each_entry(call, &ftrace_events, list) { 257 list_for_each_entry(call, &ftrace_events, list) {
122 258
123 if (!call->name || !call->regfunc) 259 if (!call->name || !call->class || !call->class->reg)
124 continue; 260 continue;
125 261
126 if (match && 262 if (match &&
127 strcmp(match, call->name) != 0 && 263 strcmp(match, call->name) != 0 &&
128 strcmp(match, call->system) != 0) 264 strcmp(match, call->class->system) != 0)
129 continue; 265 continue;
130 266
131 if (sub && strcmp(sub, call->system) != 0) 267 if (sub && strcmp(sub, call->class->system) != 0)
132 continue; 268 continue;
133 269
134 if (event && strcmp(event, call->name) != 0) 270 if (event && strcmp(event, call->name) != 0)
@@ -198,73 +334,38 @@ static ssize_t
198ftrace_event_write(struct file *file, const char __user *ubuf, 334ftrace_event_write(struct file *file, const char __user *ubuf,
199 size_t cnt, loff_t *ppos) 335 size_t cnt, loff_t *ppos)
200{ 336{
201 size_t read = 0; 337 struct trace_parser parser;
202 int i, set = 1; 338 ssize_t read, ret;
203 ssize_t ret;
204 char *buf;
205 char ch;
206 339
207 if (!cnt || cnt < 0) 340 if (!cnt)
208 return 0; 341 return 0;
209 342
210 ret = tracing_update_buffers(); 343 ret = tracing_update_buffers();
211 if (ret < 0) 344 if (ret < 0)
212 return ret; 345 return ret;
213 346
214 ret = get_user(ch, ubuf++); 347 if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
215 if (ret)
216 return ret;
217 read++;
218 cnt--;
219
220 /* skip white space */
221 while (cnt && isspace(ch)) {
222 ret = get_user(ch, ubuf++);
223 if (ret)
224 return ret;
225 read++;
226 cnt--;
227 }
228
229 /* Only white space found? */
230 if (isspace(ch)) {
231 file->f_pos += read;
232 ret = read;
233 return ret;
234 }
235
236 buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
237 if (!buf)
238 return -ENOMEM; 348 return -ENOMEM;
239 349
240 if (cnt > EVENT_BUF_SIZE) 350 read = trace_get_user(&parser, ubuf, cnt, ppos);
241 cnt = EVENT_BUF_SIZE;
242 351
243 i = 0; 352 if (read >= 0 && trace_parser_loaded((&parser))) {
244 while (cnt && !isspace(ch)) { 353 int set = 1;
245 if (!i && ch == '!') 354
355 if (*parser.buffer == '!')
246 set = 0; 356 set = 0;
247 else
248 buf[i++] = ch;
249 357
250 ret = get_user(ch, ubuf++); 358 parser.buffer[parser.idx] = 0;
359
360 ret = ftrace_set_clr_event(parser.buffer + !set, set);
251 if (ret) 361 if (ret)
252 goto out_free; 362 goto out_put;
253 read++;
254 cnt--;
255 } 363 }
256 buf[i] = 0;
257
258 file->f_pos += read;
259
260 ret = ftrace_set_clr_event(buf, set);
261 if (ret)
262 goto out_free;
263 364
264 ret = read; 365 ret = read;
265 366
266 out_free: 367 out_put:
267 kfree(buf); 368 trace_parser_put(&parser);
268 369
269 return ret; 370 return ret;
270} 371}
@@ -272,78 +373,75 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
272static void * 373static void *
273t_next(struct seq_file *m, void *v, loff_t *pos) 374t_next(struct seq_file *m, void *v, loff_t *pos)
274{ 375{
275 struct list_head *list = m->private; 376 struct ftrace_event_call *call = v;
276 struct ftrace_event_call *call;
277 377
278 (*pos)++; 378 (*pos)++;
279 379
280 for (;;) { 380 list_for_each_entry_continue(call, &ftrace_events, list) {
281 if (list == &ftrace_events)
282 return NULL;
283
284 call = list_entry(list, struct ftrace_event_call, list);
285
286 /* 381 /*
287 * The ftrace subsystem is for showing formats only. 382 * The ftrace subsystem is for showing formats only.
288 * They can not be enabled or disabled via the event files. 383 * They can not be enabled or disabled via the event files.
289 */ 384 */
290 if (call->regfunc) 385 if (call->class && call->class->reg)
291 break; 386 return call;
292
293 list = list->next;
294 } 387 }
295 388
296 m->private = list->next; 389 return NULL;
297
298 return call;
299} 390}
300 391
301static void *t_start(struct seq_file *m, loff_t *pos) 392static void *t_start(struct seq_file *m, loff_t *pos)
302{ 393{
394 struct ftrace_event_call *call;
395 loff_t l;
396
303 mutex_lock(&event_mutex); 397 mutex_lock(&event_mutex);
304 if (*pos == 0) 398
305 m->private = ftrace_events.next; 399 call = list_entry(&ftrace_events, struct ftrace_event_call, list);
306 return t_next(m, NULL, pos); 400 for (l = 0; l <= *pos; ) {
401 call = t_next(m, call, &l);
402 if (!call)
403 break;
404 }
405 return call;
307} 406}
308 407
309static void * 408static void *
310s_next(struct seq_file *m, void *v, loff_t *pos) 409s_next(struct seq_file *m, void *v, loff_t *pos)
311{ 410{
312 struct list_head *list = m->private; 411 struct ftrace_event_call *call = v;
313 struct ftrace_event_call *call;
314 412
315 (*pos)++; 413 (*pos)++;
316 414
317 retry: 415 list_for_each_entry_continue(call, &ftrace_events, list) {
318 if (list == &ftrace_events) 416 if (call->flags & TRACE_EVENT_FL_ENABLED)
319 return NULL; 417 return call;
320
321 call = list_entry(list, struct ftrace_event_call, list);
322
323 if (!call->enabled) {
324 list = list->next;
325 goto retry;
326 } 418 }
327 419
328 m->private = list->next; 420 return NULL;
329
330 return call;
331} 421}
332 422
333static void *s_start(struct seq_file *m, loff_t *pos) 423static void *s_start(struct seq_file *m, loff_t *pos)
334{ 424{
425 struct ftrace_event_call *call;
426 loff_t l;
427
335 mutex_lock(&event_mutex); 428 mutex_lock(&event_mutex);
336 if (*pos == 0) 429
337 m->private = ftrace_events.next; 430 call = list_entry(&ftrace_events, struct ftrace_event_call, list);
338 return s_next(m, NULL, pos); 431 for (l = 0; l <= *pos; ) {
432 call = s_next(m, call, &l);
433 if (!call)
434 break;
435 }
436 return call;
339} 437}
340 438
341static int t_show(struct seq_file *m, void *v) 439static int t_show(struct seq_file *m, void *v)
342{ 440{
343 struct ftrace_event_call *call = v; 441 struct ftrace_event_call *call = v;
344 442
345 if (strcmp(call->system, TRACE_SYSTEM) != 0) 443 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
346 seq_printf(m, "%s:", call->system); 444 seq_printf(m, "%s:", call->class->system);
347 seq_printf(m, "%s\n", call->name); 445 seq_printf(m, "%s\n", call->name);
348 446
349 return 0; 447 return 0;
@@ -360,7 +458,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
360 const struct seq_operations *seq_ops; 458 const struct seq_operations *seq_ops;
361 459
362 if ((file->f_mode & FMODE_WRITE) && 460 if ((file->f_mode & FMODE_WRITE) &&
363 !(file->f_flags & O_APPEND)) 461 (file->f_flags & O_TRUNC))
364 ftrace_clear_events(); 462 ftrace_clear_events();
365 463
366 seq_ops = inode->i_private; 464 seq_ops = inode->i_private;
@@ -374,7 +472,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
374 struct ftrace_event_call *call = filp->private_data; 472 struct ftrace_event_call *call = filp->private_data;
375 char *buf; 473 char *buf;
376 474
377 if (call->enabled) 475 if (call->flags & TRACE_EVENT_FL_ENABLED)
378 buf = "1\n"; 476 buf = "1\n";
379 else 477 else
380 buf = "0\n"; 478 buf = "0\n";
@@ -411,7 +509,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
411 case 0: 509 case 0:
412 case 1: 510 case 1:
413 mutex_lock(&event_mutex); 511 mutex_lock(&event_mutex);
414 ftrace_event_enable_disable(call, val); 512 ret = ftrace_event_enable_disable(call, val);
415 mutex_unlock(&event_mutex); 513 mutex_unlock(&event_mutex);
416 break; 514 break;
417 515
@@ -421,7 +519,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
421 519
422 *ppos += cnt; 520 *ppos += cnt;
423 521
424 return cnt; 522 return ret ? ret : cnt;
425} 523}
426 524
427static ssize_t 525static ssize_t
@@ -437,10 +535,10 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
437 535
438 mutex_lock(&event_mutex); 536 mutex_lock(&event_mutex);
439 list_for_each_entry(call, &ftrace_events, list) { 537 list_for_each_entry(call, &ftrace_events, list) {
440 if (!call->name || !call->regfunc) 538 if (!call->name || !call->class || !call->class->reg)
441 continue; 539 continue;
442 540
443 if (system && strcmp(call->system, system) != 0) 541 if (system && strcmp(call->class->system, system) != 0)
444 continue; 542 continue;
445 543
446 /* 544 /*
@@ -448,7 +546,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
448 * or if all events or cleared, or if we have 546 * or if all events or cleared, or if we have
449 * a mixture. 547 * a mixture.
450 */ 548 */
451 set |= (1 << !!call->enabled); 549 set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED));
452 550
453 /* 551 /*
454 * If we have a mixture, no need to look further. 552 * If we have a mixture, no need to look further.
@@ -506,74 +604,146 @@ out:
506 return ret; 604 return ret;
507} 605}
508 606
509extern char *__bad_type_size(void); 607enum {
608 FORMAT_HEADER = 1,
609 FORMAT_FIELD_SEPERATOR = 2,
610 FORMAT_PRINTFMT = 3,
611};
612
613static void *f_next(struct seq_file *m, void *v, loff_t *pos)
614{
615 struct ftrace_event_call *call = m->private;
616 struct ftrace_event_field *field;
617 struct list_head *common_head = &ftrace_common_fields;
618 struct list_head *head = trace_get_fields(call);
619
620 (*pos)++;
621
622 switch ((unsigned long)v) {
623 case FORMAT_HEADER:
624 if (unlikely(list_empty(common_head)))
625 return NULL;
626
627 field = list_entry(common_head->prev,
628 struct ftrace_event_field, link);
629 return field;
630
631 case FORMAT_FIELD_SEPERATOR:
632 if (unlikely(list_empty(head)))
633 return NULL;
634
635 field = list_entry(head->prev, struct ftrace_event_field, link);
636 return field;
510 637
511#undef FIELD 638 case FORMAT_PRINTFMT:
512#define FIELD(type, name) \ 639 /* all done */
513 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ 640 return NULL;
514 #type, "common_" #name, offsetof(typeof(field), name), \ 641 }
515 sizeof(field.name) 642
643 field = v;
644 if (field->link.prev == common_head)
645 return (void *)FORMAT_FIELD_SEPERATOR;
646 else if (field->link.prev == head)
647 return (void *)FORMAT_PRINTFMT;
516 648
517static int trace_write_header(struct trace_seq *s) 649 field = list_entry(field->link.prev, struct ftrace_event_field, link);
650
651 return field;
652}
653
654static void *f_start(struct seq_file *m, loff_t *pos)
518{ 655{
519 struct trace_entry field; 656 loff_t l = 0;
520 657 void *p;
521 /* struct trace_entry */ 658
522 return trace_seq_printf(s, 659 /* Start by showing the header */
523 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 660 if (!*pos)
524 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 661 return (void *)FORMAT_HEADER;
525 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 662
526 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 663 p = (void *)FORMAT_HEADER;
527 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 664 do {
528 "\n", 665 p = f_next(m, p, &l);
529 FIELD(unsigned short, type), 666 } while (p && l < *pos);
530 FIELD(unsigned char, flags), 667
531 FIELD(unsigned char, preempt_count), 668 return p;
532 FIELD(int, pid),
533 FIELD(int, tgid));
534} 669}
535 670
536static ssize_t 671static int f_show(struct seq_file *m, void *v)
537event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
538 loff_t *ppos)
539{ 672{
540 struct ftrace_event_call *call = filp->private_data; 673 struct ftrace_event_call *call = m->private;
541 struct trace_seq *s; 674 struct ftrace_event_field *field;
542 char *buf; 675 const char *array_descriptor;
543 int r;
544 676
545 if (*ppos) 677 switch ((unsigned long)v) {
678 case FORMAT_HEADER:
679 seq_printf(m, "name: %s\n", call->name);
680 seq_printf(m, "ID: %d\n", call->event.type);
681 seq_printf(m, "format:\n");
546 return 0; 682 return 0;
547 683
548 s = kmalloc(sizeof(*s), GFP_KERNEL); 684 case FORMAT_FIELD_SEPERATOR:
549 if (!s) 685 seq_putc(m, '\n');
550 return -ENOMEM; 686 return 0;
551 687
552 trace_seq_init(s); 688 case FORMAT_PRINTFMT:
689 seq_printf(m, "\nprint fmt: %s\n",
690 call->print_fmt);
691 return 0;
692 }
553 693
554 /* If any of the first writes fail, so will the show_format. */ 694 field = v;
555 695
556 trace_seq_printf(s, "name: %s\n", call->name); 696 /*
557 trace_seq_printf(s, "ID: %d\n", call->id); 697 * Smartly shows the array type(except dynamic array).
558 trace_seq_printf(s, "format:\n"); 698 * Normal:
559 trace_write_header(s); 699 * field:TYPE VAR
700 * If TYPE := TYPE[LEN], it is shown:
701 * field:TYPE VAR[LEN]
702 */
703 array_descriptor = strchr(field->type, '[');
560 704
561 r = call->show_format(s); 705 if (!strncmp(field->type, "__data_loc", 10))
562 if (!r) { 706 array_descriptor = NULL;
563 /*
564 * ug! The format output is bigger than a PAGE!!
565 */
566 buf = "FORMAT TOO BIG\n";
567 r = simple_read_from_buffer(ubuf, cnt, ppos,
568 buf, strlen(buf));
569 goto out;
570 }
571 707
572 r = simple_read_from_buffer(ubuf, cnt, ppos, 708 if (!array_descriptor)
573 s->buffer, s->len); 709 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
574 out: 710 field->type, field->name, field->offset,
575 kfree(s); 711 field->size, !!field->is_signed);
576 return r; 712 else
713 seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
714 (int)(array_descriptor - field->type),
715 field->type, field->name,
716 array_descriptor, field->offset,
717 field->size, !!field->is_signed);
718
719 return 0;
720}
721
722static void f_stop(struct seq_file *m, void *p)
723{
724}
725
726static const struct seq_operations trace_format_seq_ops = {
727 .start = f_start,
728 .next = f_next,
729 .stop = f_stop,
730 .show = f_show,
731};
732
733static int trace_format_open(struct inode *inode, struct file *file)
734{
735 struct ftrace_event_call *call = inode->i_private;
736 struct seq_file *m;
737 int ret;
738
739 ret = seq_open(file, &trace_format_seq_ops);
740 if (ret < 0)
741 return ret;
742
743 m = file->private_data;
744 m->private = call;
745
746 return 0;
577} 747}
578 748
579static ssize_t 749static ssize_t
@@ -591,7 +761,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
591 return -ENOMEM; 761 return -ENOMEM;
592 762
593 trace_seq_init(s); 763 trace_seq_init(s);
594 trace_seq_printf(s, "%d\n", call->id); 764 trace_seq_printf(s, "%d\n", call->event.type);
595 765
596 r = simple_read_from_buffer(ubuf, cnt, ppos, 766 r = simple_read_from_buffer(ubuf, cnt, ppos,
597 s->buffer, s->len); 767 s->buffer, s->len);
@@ -768,39 +938,47 @@ static const struct file_operations ftrace_enable_fops = {
768 .open = tracing_open_generic, 938 .open = tracing_open_generic,
769 .read = event_enable_read, 939 .read = event_enable_read,
770 .write = event_enable_write, 940 .write = event_enable_write,
941 .llseek = default_llseek,
771}; 942};
772 943
773static const struct file_operations ftrace_event_format_fops = { 944static const struct file_operations ftrace_event_format_fops = {
774 .open = tracing_open_generic, 945 .open = trace_format_open,
775 .read = event_format_read, 946 .read = seq_read,
947 .llseek = seq_lseek,
948 .release = seq_release,
776}; 949};
777 950
778static const struct file_operations ftrace_event_id_fops = { 951static const struct file_operations ftrace_event_id_fops = {
779 .open = tracing_open_generic, 952 .open = tracing_open_generic,
780 .read = event_id_read, 953 .read = event_id_read,
954 .llseek = default_llseek,
781}; 955};
782 956
783static const struct file_operations ftrace_event_filter_fops = { 957static const struct file_operations ftrace_event_filter_fops = {
784 .open = tracing_open_generic, 958 .open = tracing_open_generic,
785 .read = event_filter_read, 959 .read = event_filter_read,
786 .write = event_filter_write, 960 .write = event_filter_write,
961 .llseek = default_llseek,
787}; 962};
788 963
789static const struct file_operations ftrace_subsystem_filter_fops = { 964static const struct file_operations ftrace_subsystem_filter_fops = {
790 .open = tracing_open_generic, 965 .open = tracing_open_generic,
791 .read = subsystem_filter_read, 966 .read = subsystem_filter_read,
792 .write = subsystem_filter_write, 967 .write = subsystem_filter_write,
968 .llseek = default_llseek,
793}; 969};
794 970
795static const struct file_operations ftrace_system_enable_fops = { 971static const struct file_operations ftrace_system_enable_fops = {
796 .open = tracing_open_generic, 972 .open = tracing_open_generic,
797 .read = system_enable_read, 973 .read = system_enable_read,
798 .write = system_enable_write, 974 .write = system_enable_write,
975 .llseek = default_llseek,
799}; 976};
800 977
801static const struct file_operations ftrace_show_header_fops = { 978static const struct file_operations ftrace_show_header_fops = {
802 .open = tracing_open_generic, 979 .open = tracing_open_generic,
803 .read = show_header, 980 .read = show_header,
981 .llseek = default_llseek,
804}; 982};
805 983
806static struct dentry *event_trace_events_dir(void) 984static struct dentry *event_trace_events_dir(void)
@@ -833,8 +1011,10 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
833 1011
834 /* First see if we did not already create this dir */ 1012 /* First see if we did not already create this dir */
835 list_for_each_entry(system, &event_subsystems, list) { 1013 list_for_each_entry(system, &event_subsystems, list) {
836 if (strcmp(system->name, name) == 0) 1014 if (strcmp(system->name, name) == 0) {
1015 system->nr_events++;
837 return system->entry; 1016 return system->entry;
1017 }
838 } 1018 }
839 1019
840 /* need to create new entry */ 1020 /* need to create new entry */
@@ -853,6 +1033,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
853 return d_events; 1033 return d_events;
854 } 1034 }
855 1035
1036 system->nr_events = 1;
856 system->name = kstrdup(name, GFP_KERNEL); 1037 system->name = kstrdup(name, GFP_KERNEL);
857 if (!system->name) { 1038 if (!system->name) {
858 debugfs_remove(system->entry); 1039 debugfs_remove(system->entry);
@@ -880,9 +1061,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
880 "'%s/filter' entry\n", name); 1061 "'%s/filter' entry\n", name);
881 } 1062 }
882 1063
883 entry = trace_create_file("enable", 0644, system->entry, 1064 trace_create_file("enable", 0644, system->entry,
884 (void *)system->name, 1065 (void *)system->name,
885 &ftrace_system_enable_fops); 1066 &ftrace_system_enable_fops);
886 1067
887 return system->entry; 1068 return system->entry;
888} 1069}
@@ -894,24 +1075,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
894 const struct file_operations *filter, 1075 const struct file_operations *filter,
895 const struct file_operations *format) 1076 const struct file_operations *format)
896{ 1077{
897 struct dentry *entry; 1078 struct list_head *head;
898 int ret; 1079 int ret;
899 1080
900 /* 1081 /*
901 * If the trace point header did not define TRACE_SYSTEM 1082 * If the trace point header did not define TRACE_SYSTEM
902 * then the system would be called "TRACE_SYSTEM". 1083 * then the system would be called "TRACE_SYSTEM".
903 */ 1084 */
904 if (strcmp(call->system, TRACE_SYSTEM) != 0) 1085 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
905 d_events = event_subsystem_dir(call->system, d_events); 1086 d_events = event_subsystem_dir(call->class->system, d_events);
906
907 if (call->raw_init) {
908 ret = call->raw_init();
909 if (ret < 0) {
910 pr_warning("Could not initialize trace point"
911 " events/%s\n", call->name);
912 return ret;
913 }
914 }
915 1087
916 call->dir = debugfs_create_dir(call->name, d_events); 1088 call->dir = debugfs_create_dir(call->name, d_events);
917 if (!call->dir) { 1089 if (!call->dir) {
@@ -920,35 +1092,138 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
920 return -1; 1092 return -1;
921 } 1093 }
922 1094
923 if (call->regfunc) 1095 if (call->class->reg)
924 entry = trace_create_file("enable", 0644, call->dir, call, 1096 trace_create_file("enable", 0644, call->dir, call,
925 enable); 1097 enable);
926 1098
927 if (call->id) 1099#ifdef CONFIG_PERF_EVENTS
928 entry = trace_create_file("id", 0444, call->dir, call, 1100 if (call->event.type && call->class->reg)
929 id); 1101 trace_create_file("id", 0444, call->dir, call,
1102 id);
1103#endif
930 1104
931 if (call->define_fields) { 1105 /*
932 ret = call->define_fields(); 1106 * Other events may have the same class. Only update
1107 * the fields if they are not already defined.
1108 */
1109 head = trace_get_fields(call);
1110 if (list_empty(head)) {
1111 ret = call->class->define_fields(call);
933 if (ret < 0) { 1112 if (ret < 0) {
934 pr_warning("Could not initialize trace point" 1113 pr_warning("Could not initialize trace point"
935 " events/%s\n", call->name); 1114 " events/%s\n", call->name);
936 return ret; 1115 return ret;
937 } 1116 }
938 entry = trace_create_file("filter", 0644, call->dir, call,
939 filter);
940 } 1117 }
1118 trace_create_file("filter", 0644, call->dir, call,
1119 filter);
941 1120
942 /* A trace may not want to export its format */ 1121 trace_create_file("format", 0444, call->dir, call,
943 if (!call->show_format) 1122 format);
944 return 0;
945
946 entry = trace_create_file("format", 0444, call->dir, call,
947 format);
948 1123
949 return 0; 1124 return 0;
950} 1125}
951 1126
1127static int
1128__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
1129 const struct file_operations *id,
1130 const struct file_operations *enable,
1131 const struct file_operations *filter,
1132 const struct file_operations *format)
1133{
1134 struct dentry *d_events;
1135 int ret;
1136
1137 /* The linker may leave blanks */
1138 if (!call->name)
1139 return -EINVAL;
1140
1141 if (call->class->raw_init) {
1142 ret = call->class->raw_init(call);
1143 if (ret < 0) {
1144 if (ret != -ENOSYS)
1145 pr_warning("Could not initialize trace events/%s\n",
1146 call->name);
1147 return ret;
1148 }
1149 }
1150
1151 d_events = event_trace_events_dir();
1152 if (!d_events)
1153 return -ENOENT;
1154
1155 ret = event_create_dir(call, d_events, id, enable, filter, format);
1156 if (!ret)
1157 list_add(&call->list, &ftrace_events);
1158 call->mod = mod;
1159
1160 return ret;
1161}
1162
1163/* Add an additional event_call dynamically */
1164int trace_add_event_call(struct ftrace_event_call *call)
1165{
1166 int ret;
1167 mutex_lock(&event_mutex);
1168 ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
1169 &ftrace_enable_fops,
1170 &ftrace_event_filter_fops,
1171 &ftrace_event_format_fops);
1172 mutex_unlock(&event_mutex);
1173 return ret;
1174}
1175
1176static void remove_subsystem_dir(const char *name)
1177{
1178 struct event_subsystem *system;
1179
1180 if (strcmp(name, TRACE_SYSTEM) == 0)
1181 return;
1182
1183 list_for_each_entry(system, &event_subsystems, list) {
1184 if (strcmp(system->name, name) == 0) {
1185 if (!--system->nr_events) {
1186 struct event_filter *filter = system->filter;
1187
1188 debugfs_remove_recursive(system->entry);
1189 list_del(&system->list);
1190 if (filter) {
1191 kfree(filter->filter_string);
1192 kfree(filter);
1193 }
1194 kfree(system->name);
1195 kfree(system);
1196 }
1197 break;
1198 }
1199 }
1200}
1201
1202/*
1203 * Must be called under locking both of event_mutex and trace_event_mutex.
1204 */
1205static void __trace_remove_event_call(struct ftrace_event_call *call)
1206{
1207 ftrace_event_enable_disable(call, 0);
1208 if (call->event.funcs)
1209 __unregister_ftrace_event(&call->event);
1210 debugfs_remove_recursive(call->dir);
1211 list_del(&call->list);
1212 trace_destroy_fields(call);
1213 destroy_preds(call);
1214 remove_subsystem_dir(call->class->system);
1215}
1216
1217/* Remove an event_call */
1218void trace_remove_event_call(struct ftrace_event_call *call)
1219{
1220 mutex_lock(&event_mutex);
1221 down_write(&trace_event_mutex);
1222 __trace_remove_event_call(call);
1223 up_write(&trace_event_mutex);
1224 mutex_unlock(&event_mutex);
1225}
1226
952#define for_each_event(event, start, end) \ 1227#define for_each_event(event, start, end) \
953 for (event = start; \ 1228 for (event = start; \
954 (unsigned long)event < (unsigned long)end; \ 1229 (unsigned long)event < (unsigned long)end; \
@@ -1010,7 +1285,6 @@ static void trace_module_add_events(struct module *mod)
1010{ 1285{
1011 struct ftrace_module_file_ops *file_ops = NULL; 1286 struct ftrace_module_file_ops *file_ops = NULL;
1012 struct ftrace_event_call *call, *start, *end; 1287 struct ftrace_event_call *call, *start, *end;
1013 struct dentry *d_events;
1014 1288
1015 start = mod->trace_events; 1289 start = mod->trace_events;
1016 end = mod->trace_events + mod->num_trace_events; 1290 end = mod->trace_events + mod->num_trace_events;
@@ -1018,29 +1292,14 @@ static void trace_module_add_events(struct module *mod)
1018 if (start == end) 1292 if (start == end)
1019 return; 1293 return;
1020 1294
1021 d_events = event_trace_events_dir(); 1295 file_ops = trace_create_file_ops(mod);
1022 if (!d_events) 1296 if (!file_ops)
1023 return; 1297 return;
1024 1298
1025 for_each_event(call, start, end) { 1299 for_each_event(call, start, end) {
1026 /* The linker may leave blanks */ 1300 __trace_add_event_call(call, mod,
1027 if (!call->name) 1301 &file_ops->id, &file_ops->enable,
1028 continue; 1302 &file_ops->filter, &file_ops->format);
1029
1030 /*
1031 * This module has events, create file ops for this module
1032 * if not already done.
1033 */
1034 if (!file_ops) {
1035 file_ops = trace_create_file_ops(mod);
1036 if (!file_ops)
1037 return;
1038 }
1039 call->mod = mod;
1040 list_add(&call->list, &ftrace_events);
1041 event_create_dir(call, d_events,
1042 &file_ops->id, &file_ops->enable,
1043 &file_ops->filter, &file_ops->format);
1044 } 1303 }
1045} 1304}
1046 1305
@@ -1054,13 +1313,7 @@ static void trace_module_remove_events(struct module *mod)
1054 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1313 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1055 if (call->mod == mod) { 1314 if (call->mod == mod) {
1056 found = true; 1315 found = true;
1057 ftrace_event_enable_disable(call, 0); 1316 __trace_remove_event_call(call);
1058 if (call->event)
1059 __unregister_ftrace_event(call->event);
1060 debugfs_remove_recursive(call->dir);
1061 list_del(&call->list);
1062 trace_destroy_fields(call);
1063 destroy_preds(call);
1064 } 1317 }
1065 } 1318 }
1066 1319
@@ -1109,7 +1362,7 @@ static int trace_module_notify(struct notifier_block *self,
1109} 1362}
1110#endif /* CONFIG_MODULES */ 1363#endif /* CONFIG_MODULES */
1111 1364
1112struct notifier_block trace_module_nb = { 1365static struct notifier_block trace_module_nb = {
1113 .notifier_call = trace_module_notify, 1366 .notifier_call = trace_module_notify,
1114 .priority = 0, 1367 .priority = 0,
1115}; 1368};
@@ -1117,6 +1370,18 @@ struct notifier_block trace_module_nb = {
1117extern struct ftrace_event_call __start_ftrace_events[]; 1370extern struct ftrace_event_call __start_ftrace_events[];
1118extern struct ftrace_event_call __stop_ftrace_events[]; 1371extern struct ftrace_event_call __stop_ftrace_events[];
1119 1372
1373static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1374
1375static __init int setup_trace_event(char *str)
1376{
1377 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1378 ring_buffer_expanded = 1;
1379 tracing_selftest_disabled = 1;
1380
1381 return 1;
1382}
1383__setup("trace_event=", setup_trace_event);
1384
1120static __init int event_trace_init(void) 1385static __init int event_trace_init(void)
1121{ 1386{
1122 struct ftrace_event_call *call; 1387 struct ftrace_event_call *call;
@@ -1124,6 +1389,8 @@ static __init int event_trace_init(void)
1124 struct dentry *entry; 1389 struct dentry *entry;
1125 struct dentry *d_events; 1390 struct dentry *d_events;
1126 int ret; 1391 int ret;
1392 char *buf = bootup_event_buf;
1393 char *token;
1127 1394
1128 d_tracer = tracing_init_dentry(); 1395 d_tracer = tracing_init_dentry();
1129 if (!d_tracer) 1396 if (!d_tracer)
@@ -1159,14 +1426,27 @@ static __init int event_trace_init(void)
1159 trace_create_file("enable", 0644, d_events, 1426 trace_create_file("enable", 0644, d_events,
1160 NULL, &ftrace_system_enable_fops); 1427 NULL, &ftrace_system_enable_fops);
1161 1428
1429 if (trace_define_common_fields())
1430 pr_warning("tracing: Failed to allocate common fields");
1431
1162 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { 1432 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1163 /* The linker may leave blanks */ 1433 __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
1164 if (!call->name) 1434 &ftrace_enable_fops,
1435 &ftrace_event_filter_fops,
1436 &ftrace_event_format_fops);
1437 }
1438
1439 while (true) {
1440 token = strsep(&buf, ",");
1441
1442 if (!token)
1443 break;
1444 if (!*token)
1165 continue; 1445 continue;
1166 list_add(&call->list, &ftrace_events); 1446
1167 event_create_dir(call, d_events, &ftrace_event_id_fops, 1447 ret = ftrace_set_clr_event(token, 1);
1168 &ftrace_enable_fops, &ftrace_event_filter_fops, 1448 if (ret)
1169 &ftrace_event_format_fops); 1449 pr_warning("Failed to enable trace event: %s\n", token);
1170 } 1450 }
1171 1451
1172 ret = register_module_notifier(&trace_module_nb); 1452 ret = register_module_notifier(&trace_module_nb);
@@ -1241,17 +1521,29 @@ static __init void event_trace_self_tests(void)
1241 1521
1242 list_for_each_entry(call, &ftrace_events, list) { 1522 list_for_each_entry(call, &ftrace_events, list) {
1243 1523
1244 /* Only test those that have a regfunc */ 1524 /* Only test those that have a probe */
1245 if (!call->regfunc) 1525 if (!call->class || !call->class->probe)
1246 continue; 1526 continue;
1247 1527
1528/*
1529 * Testing syscall events here is pretty useless, but
1530 * we still do it if configured. But this is time consuming.
1531 * What we really need is a user thread to perform the
1532 * syscalls as we test.
1533 */
1534#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1535 if (call->class->system &&
1536 strcmp(call->class->system, "syscalls") == 0)
1537 continue;
1538#endif
1539
1248 pr_info("Testing event %s: ", call->name); 1540 pr_info("Testing event %s: ", call->name);
1249 1541
1250 /* 1542 /*
1251 * If an event is already enabled, someone is using 1543 * If an event is already enabled, someone is using
1252 * it and the self test should not be on. 1544 * it and the self test should not be on.
1253 */ 1545 */
1254 if (call->enabled) { 1546 if (call->flags & TRACE_EVENT_FL_ENABLED) {
1255 pr_warning("Enabled event during self test!\n"); 1547 pr_warning("Enabled event during self test!\n");
1256 WARN_ON_ONCE(1); 1548 WARN_ON_ONCE(1);
1257 continue; 1549 continue;
@@ -1318,30 +1610,31 @@ static __init void event_trace_self_tests(void)
1318 1610
1319#ifdef CONFIG_FUNCTION_TRACER 1611#ifdef CONFIG_FUNCTION_TRACER
1320 1612
1321static DEFINE_PER_CPU(atomic_t, test_event_disable); 1613static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1322 1614
1323static void 1615static void
1324function_test_events_call(unsigned long ip, unsigned long parent_ip) 1616function_test_events_call(unsigned long ip, unsigned long parent_ip)
1325{ 1617{
1326 struct ring_buffer_event *event; 1618 struct ring_buffer_event *event;
1619 struct ring_buffer *buffer;
1327 struct ftrace_entry *entry; 1620 struct ftrace_entry *entry;
1328 unsigned long flags; 1621 unsigned long flags;
1329 long disabled; 1622 long disabled;
1330 int resched;
1331 int cpu; 1623 int cpu;
1332 int pc; 1624 int pc;
1333 1625
1334 pc = preempt_count(); 1626 pc = preempt_count();
1335 resched = ftrace_preempt_disable(); 1627 preempt_disable_notrace();
1336 cpu = raw_smp_processor_id(); 1628 cpu = raw_smp_processor_id();
1337 disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu)); 1629 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1338 1630
1339 if (disabled != 1) 1631 if (disabled != 1)
1340 goto out; 1632 goto out;
1341 1633
1342 local_save_flags(flags); 1634 local_save_flags(flags);
1343 1635
1344 event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry), 1636 event = trace_current_buffer_lock_reserve(&buffer,
1637 TRACE_FN, sizeof(*entry),
1345 flags, pc); 1638 flags, pc);
1346 if (!event) 1639 if (!event)
1347 goto out; 1640 goto out;
@@ -1349,11 +1642,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1349 entry->ip = ip; 1642 entry->ip = ip;
1350 entry->parent_ip = parent_ip; 1643 entry->parent_ip = parent_ip;
1351 1644
1352 trace_nowake_buffer_unlock_commit(event, flags, pc); 1645 trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1353 1646
1354 out: 1647 out:
1355 atomic_dec(&per_cpu(test_event_disable, cpu)); 1648 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1356 ftrace_preempt_enable(resched); 1649 preempt_enable_notrace();
1357} 1650}
1358 1651
1359static struct ftrace_ops trace_ops __initdata = 1652static struct ftrace_ops trace_ops __initdata =
@@ -1376,10 +1669,10 @@ static __init void event_trace_self_test_with_function(void)
1376 1669
1377static __init int event_trace_self_tests_init(void) 1670static __init int event_trace_self_tests_init(void)
1378{ 1671{
1379 1672 if (!tracing_selftest_disabled) {
1380 event_trace_self_tests(); 1673 event_trace_self_tests();
1381 1674 event_trace_self_test_with_function();
1382 event_trace_self_test_with_function(); 1675 }
1383 1676
1384 return 0; 1677 return 0;
1385} 1678}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 936c621bbf46..36d40104b17f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -18,11 +18,11 @@
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */ 19 */
20 20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/ctype.h> 22#include <linux/ctype.h>
25#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h>
25#include <linux/slab.h>
26 26
27#include "trace.h" 27#include "trace.h"
28#include "trace_output.h" 28#include "trace_output.h"
@@ -31,6 +31,7 @@ enum filter_op_ids
31{ 31{
32 OP_OR, 32 OP_OR,
33 OP_AND, 33 OP_AND,
34 OP_GLOB,
34 OP_NE, 35 OP_NE,
35 OP_EQ, 36 OP_EQ,
36 OP_LT, 37 OP_LT,
@@ -48,16 +49,17 @@ struct filter_op {
48}; 49};
49 50
50static struct filter_op filter_ops[] = { 51static struct filter_op filter_ops[] = {
51 { OP_OR, "||", 1 }, 52 { OP_OR, "||", 1 },
52 { OP_AND, "&&", 2 }, 53 { OP_AND, "&&", 2 },
53 { OP_NE, "!=", 4 }, 54 { OP_GLOB, "~", 4 },
54 { OP_EQ, "==", 4 }, 55 { OP_NE, "!=", 4 },
55 { OP_LT, "<", 5 }, 56 { OP_EQ, "==", 4 },
56 { OP_LE, "<=", 5 }, 57 { OP_LT, "<", 5 },
57 { OP_GT, ">", 5 }, 58 { OP_LE, "<=", 5 },
58 { OP_GE, ">=", 5 }, 59 { OP_GT, ">", 5 },
59 { OP_NONE, "OP_NONE", 0 }, 60 { OP_GE, ">=", 5 },
60 { OP_OPEN_PAREN, "(", 0 }, 61 { OP_NONE, "OP_NONE", 0 },
62 { OP_OPEN_PAREN, "(", 0 },
61}; 63};
62 64
63enum { 65enum {
@@ -121,6 +123,47 @@ struct filter_parse_state {
121 } operand; 123 } operand;
122}; 124};
123 125
126#define DEFINE_COMPARISON_PRED(type) \
127static int filter_pred_##type(struct filter_pred *pred, void *event, \
128 int val1, int val2) \
129{ \
130 type *addr = (type *)(event + pred->offset); \
131 type val = (type)pred->val; \
132 int match = 0; \
133 \
134 switch (pred->op) { \
135 case OP_LT: \
136 match = (*addr < val); \
137 break; \
138 case OP_LE: \
139 match = (*addr <= val); \
140 break; \
141 case OP_GT: \
142 match = (*addr > val); \
143 break; \
144 case OP_GE: \
145 match = (*addr >= val); \
146 break; \
147 default: \
148 break; \
149 } \
150 \
151 return match; \
152}
153
154#define DEFINE_EQUALITY_PRED(size) \
155static int filter_pred_##size(struct filter_pred *pred, void *event, \
156 int val1, int val2) \
157{ \
158 u##size *addr = (u##size *)(event + pred->offset); \
159 u##size val = (u##size)pred->val; \
160 int match; \
161 \
162 match = (val == *addr) ^ pred->not; \
163 \
164 return match; \
165}
166
124DEFINE_COMPARISON_PRED(s64); 167DEFINE_COMPARISON_PRED(s64);
125DEFINE_COMPARISON_PRED(u64); 168DEFINE_COMPARISON_PRED(u64);
126DEFINE_COMPARISON_PRED(s32); 169DEFINE_COMPARISON_PRED(s32);
@@ -156,9 +199,24 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
156 char *addr = (char *)(event + pred->offset); 199 char *addr = (char *)(event + pred->offset);
157 int cmp, match; 200 int cmp, match;
158 201
159 cmp = strncmp(addr, pred->str_val, pred->str_len); 202 cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
160 203
161 match = (!cmp) ^ pred->not; 204 match = cmp ^ pred->not;
205
206 return match;
207}
208
209/* Filter predicate for char * pointers */
210static int filter_pred_pchar(struct filter_pred *pred, void *event,
211 int val1, int val2)
212{
213 char **addr = (char **)(event + pred->offset);
214 int cmp, match;
215 int len = strlen(*addr) + 1; /* including tailing '\0' */
216
217 cmp = pred->regex.match(*addr, &pred->regex, len);
218
219 match = cmp ^ pred->not;
162 220
163 return match; 221 return match;
164} 222}
@@ -176,13 +234,15 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
176static int filter_pred_strloc(struct filter_pred *pred, void *event, 234static int filter_pred_strloc(struct filter_pred *pred, void *event,
177 int val1, int val2) 235 int val1, int val2)
178{ 236{
179 unsigned short str_loc = *(unsigned short *)(event + pred->offset); 237 u32 str_item = *(u32 *)(event + pred->offset);
238 int str_loc = str_item & 0xffff;
239 int str_len = str_item >> 16;
180 char *addr = (char *)(event + str_loc); 240 char *addr = (char *)(event + str_loc);
181 int cmp, match; 241 int cmp, match;
182 242
183 cmp = strncmp(addr, pred->str_val, pred->str_len); 243 cmp = pred->regex.match(addr, &pred->regex, str_len);
184 244
185 match = (!cmp) ^ pred->not; 245 match = cmp ^ pred->not;
186 246
187 return match; 247 return match;
188} 248}
@@ -193,10 +253,133 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
193 return 0; 253 return 0;
194} 254}
195 255
256/*
257 * regex_match_foo - Basic regex callbacks
258 *
259 * @str: the string to be searched
260 * @r: the regex structure containing the pattern string
261 * @len: the length of the string to be searched (including '\0')
262 *
263 * Note:
264 * - @str might not be NULL-terminated if it's of type DYN_STRING
265 * or STATIC_STRING
266 */
267
268static int regex_match_full(char *str, struct regex *r, int len)
269{
270 if (strncmp(str, r->pattern, len) == 0)
271 return 1;
272 return 0;
273}
274
275static int regex_match_front(char *str, struct regex *r, int len)
276{
277 if (strncmp(str, r->pattern, r->len) == 0)
278 return 1;
279 return 0;
280}
281
282static int regex_match_middle(char *str, struct regex *r, int len)
283{
284 if (strnstr(str, r->pattern, len))
285 return 1;
286 return 0;
287}
288
289static int regex_match_end(char *str, struct regex *r, int len)
290{
291 int strlen = len - 1;
292
293 if (strlen >= r->len &&
294 memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
295 return 1;
296 return 0;
297}
298
299/**
300 * filter_parse_regex - parse a basic regex
301 * @buff: the raw regex
302 * @len: length of the regex
303 * @search: will point to the beginning of the string to compare
304 * @not: tell whether the match will have to be inverted
305 *
306 * This passes in a buffer containing a regex and this function will
307 * set search to point to the search part of the buffer and
308 * return the type of search it is (see enum above).
309 * This does modify buff.
310 *
311 * Returns enum type.
312 * search returns the pointer to use for comparison.
313 * not returns 1 if buff started with a '!'
314 * 0 otherwise.
315 */
316enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
317{
318 int type = MATCH_FULL;
319 int i;
320
321 if (buff[0] == '!') {
322 *not = 1;
323 buff++;
324 len--;
325 } else
326 *not = 0;
327
328 *search = buff;
329
330 for (i = 0; i < len; i++) {
331 if (buff[i] == '*') {
332 if (!i) {
333 *search = buff + 1;
334 type = MATCH_END_ONLY;
335 } else {
336 if (type == MATCH_END_ONLY)
337 type = MATCH_MIDDLE_ONLY;
338 else
339 type = MATCH_FRONT_ONLY;
340 buff[i] = 0;
341 break;
342 }
343 }
344 }
345
346 return type;
347}
348
349static void filter_build_regex(struct filter_pred *pred)
350{
351 struct regex *r = &pred->regex;
352 char *search;
353 enum regex_type type = MATCH_FULL;
354 int not = 0;
355
356 if (pred->op == OP_GLOB) {
357 type = filter_parse_regex(r->pattern, r->len, &search, &not);
358 r->len = strlen(search);
359 memmove(r->pattern, search, r->len+1);
360 }
361
362 switch (type) {
363 case MATCH_FULL:
364 r->match = regex_match_full;
365 break;
366 case MATCH_FRONT_ONLY:
367 r->match = regex_match_front;
368 break;
369 case MATCH_MIDDLE_ONLY:
370 r->match = regex_match_middle;
371 break;
372 case MATCH_END_ONLY:
373 r->match = regex_match_end;
374 break;
375 }
376
377 pred->not ^= not;
378}
379
196/* return 1 if event matches, 0 otherwise (discard) */ 380/* return 1 if event matches, 0 otherwise (discard) */
197int filter_match_preds(struct ftrace_event_call *call, void *rec) 381int filter_match_preds(struct event_filter *filter, void *rec)
198{ 382{
199 struct event_filter *filter = call->filter;
200 int match, top = 0, val1 = 0, val2 = 0; 383 int match, top = 0, val1 = 0, val2 = 0;
201 int stack[MAX_FILTER_PRED]; 384 int stack[MAX_FILTER_PRED];
202 struct filter_pred *pred; 385 struct filter_pred *pred;
@@ -293,7 +476,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
293 struct event_filter *filter = call->filter; 476 struct event_filter *filter = call->filter;
294 477
295 mutex_lock(&event_mutex); 478 mutex_lock(&event_mutex);
296 if (filter->filter_string) 479 if (filter && filter->filter_string)
297 trace_seq_printf(s, "%s\n", filter->filter_string); 480 trace_seq_printf(s, "%s\n", filter->filter_string);
298 else 481 else
299 trace_seq_printf(s, "none\n"); 482 trace_seq_printf(s, "none\n");
@@ -306,7 +489,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
306 struct event_filter *filter = system->filter; 489 struct event_filter *filter = system->filter;
307 490
308 mutex_lock(&event_mutex); 491 mutex_lock(&event_mutex);
309 if (filter->filter_string) 492 if (filter && filter->filter_string)
310 trace_seq_printf(s, "%s\n", filter->filter_string); 493 trace_seq_printf(s, "%s\n", filter->filter_string);
311 else 494 else
312 trace_seq_printf(s, "none\n"); 495 trace_seq_printf(s, "none\n");
@@ -314,11 +497,11 @@ void print_subsystem_event_filter(struct event_subsystem *system,
314} 497}
315 498
316static struct ftrace_event_field * 499static struct ftrace_event_field *
317find_event_field(struct ftrace_event_call *call, char *name) 500__find_event_field(struct list_head *head, char *name)
318{ 501{
319 struct ftrace_event_field *field; 502 struct ftrace_event_field *field;
320 503
321 list_for_each_entry(field, &call->fields, link) { 504 list_for_each_entry(field, head, link) {
322 if (!strcmp(field->name, name)) 505 if (!strcmp(field->name, name))
323 return field; 506 return field;
324 } 507 }
@@ -326,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name)
326 return NULL; 509 return NULL;
327} 510}
328 511
512static struct ftrace_event_field *
513find_event_field(struct ftrace_event_call *call, char *name)
514{
515 struct ftrace_event_field *field;
516 struct list_head *head;
517
518 field = __find_event_field(&ftrace_common_fields, name);
519 if (field)
520 return field;
521
522 head = trace_get_fields(call);
523 return __find_event_field(head, name);
524}
525
329static void filter_free_pred(struct filter_pred *pred) 526static void filter_free_pred(struct filter_pred *pred)
330{ 527{
331 if (!pred) 528 if (!pred)
@@ -339,7 +536,7 @@ static void filter_clear_pred(struct filter_pred *pred)
339{ 536{
340 kfree(pred->field_name); 537 kfree(pred->field_name);
341 pred->field_name = NULL; 538 pred->field_name = NULL;
342 pred->str_len = 0; 539 pred->regex.len = 0;
343} 540}
344 541
345static int filter_set_pred(struct filter_pred *dest, 542static int filter_set_pred(struct filter_pred *dest,
@@ -362,18 +559,20 @@ static void filter_disable_preds(struct ftrace_event_call *call)
362 struct event_filter *filter = call->filter; 559 struct event_filter *filter = call->filter;
363 int i; 560 int i;
364 561
365 call->filter_active = 0; 562 call->flags &= ~TRACE_EVENT_FL_FILTERED;
366 filter->n_preds = 0; 563 filter->n_preds = 0;
367 564
368 for (i = 0; i < MAX_FILTER_PRED; i++) 565 for (i = 0; i < MAX_FILTER_PRED; i++)
369 filter->preds[i]->fn = filter_pred_none; 566 filter->preds[i]->fn = filter_pred_none;
370} 567}
371 568
372void destroy_preds(struct ftrace_event_call *call) 569static void __free_preds(struct event_filter *filter)
373{ 570{
374 struct event_filter *filter = call->filter;
375 int i; 571 int i;
376 572
573 if (!filter)
574 return;
575
377 for (i = 0; i < MAX_FILTER_PRED; i++) { 576 for (i = 0; i < MAX_FILTER_PRED; i++) {
378 if (filter->preds[i]) 577 if (filter->preds[i])
379 filter_free_pred(filter->preds[i]); 578 filter_free_pred(filter->preds[i]);
@@ -381,20 +580,25 @@ void destroy_preds(struct ftrace_event_call *call)
381 kfree(filter->preds); 580 kfree(filter->preds);
382 kfree(filter->filter_string); 581 kfree(filter->filter_string);
383 kfree(filter); 582 kfree(filter);
583}
584
585void destroy_preds(struct ftrace_event_call *call)
586{
587 __free_preds(call->filter);
384 call->filter = NULL; 588 call->filter = NULL;
589 call->flags &= ~TRACE_EVENT_FL_FILTERED;
385} 590}
386 591
387int init_preds(struct ftrace_event_call *call) 592static struct event_filter *__alloc_preds(void)
388{ 593{
389 struct event_filter *filter; 594 struct event_filter *filter;
390 struct filter_pred *pred; 595 struct filter_pred *pred;
391 int i; 596 int i;
392 597
393 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); 598 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
394 if (!call->filter) 599 if (!filter)
395 return -ENOMEM; 600 return ERR_PTR(-ENOMEM);
396 601
397 call->filter_active = 0;
398 filter->n_preds = 0; 602 filter->n_preds = 0;
399 603
400 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); 604 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
@@ -409,46 +613,62 @@ int init_preds(struct ftrace_event_call *call)
409 filter->preds[i] = pred; 613 filter->preds[i] = pred;
410 } 614 }
411 615
412 return 0; 616 return filter;
413 617
414oom: 618oom:
415 destroy_preds(call); 619 __free_preds(filter);
620 return ERR_PTR(-ENOMEM);
621}
622
623static int init_preds(struct ftrace_event_call *call)
624{
625 if (call->filter)
626 return 0;
627
628 call->flags &= ~TRACE_EVENT_FL_FILTERED;
629 call->filter = __alloc_preds();
630 if (IS_ERR(call->filter))
631 return PTR_ERR(call->filter);
416 632
417 return -ENOMEM; 633 return 0;
418} 634}
419EXPORT_SYMBOL_GPL(init_preds);
420 635
421static void filter_free_subsystem_preds(struct event_subsystem *system) 636static int init_subsystem_preds(struct event_subsystem *system)
422{ 637{
423 struct event_filter *filter = system->filter;
424 struct ftrace_event_call *call; 638 struct ftrace_event_call *call;
425 int i; 639 int err;
426 640
427 if (filter->n_preds) { 641 list_for_each_entry(call, &ftrace_events, list) {
428 for (i = 0; i < filter->n_preds; i++) 642 if (strcmp(call->class->system, system->name) != 0)
429 filter_free_pred(filter->preds[i]); 643 continue;
430 kfree(filter->preds); 644
431 filter->preds = NULL; 645 err = init_preds(call);
432 filter->n_preds = 0; 646 if (err)
647 return err;
433 } 648 }
434 649
650 return 0;
651}
652
653static void filter_free_subsystem_preds(struct event_subsystem *system)
654{
655 struct ftrace_event_call *call;
656
435 list_for_each_entry(call, &ftrace_events, list) { 657 list_for_each_entry(call, &ftrace_events, list) {
436 if (!call->define_fields) 658 if (strcmp(call->class->system, system->name) != 0)
437 continue; 659 continue;
438 660
439 if (!strcmp(call->system, system->name)) { 661 filter_disable_preds(call);
440 filter_disable_preds(call); 662 remove_filter_string(call->filter);
441 remove_filter_string(call->filter);
442 }
443 } 663 }
444} 664}
445 665
446static int filter_add_pred_fn(struct filter_parse_state *ps, 666static int filter_add_pred_fn(struct filter_parse_state *ps,
447 struct ftrace_event_call *call, 667 struct ftrace_event_call *call,
668 struct event_filter *filter,
448 struct filter_pred *pred, 669 struct filter_pred *pred,
449 filter_pred_fn_t fn) 670 filter_pred_fn_t fn)
450{ 671{
451 struct event_filter *filter = call->filter;
452 int idx, err; 672 int idx, err;
453 673
454 if (filter->n_preds == MAX_FILTER_PRED) { 674 if (filter->n_preds == MAX_FILTER_PRED) {
@@ -463,17 +683,11 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
463 return err; 683 return err;
464 684
465 filter->n_preds++; 685 filter->n_preds++;
466 call->filter_active = 1;
467 686
468 return 0; 687 return 0;
469} 688}
470 689
471enum { 690int filter_assign_type(const char *type)
472 FILTER_STATIC_STRING = 1,
473 FILTER_DYN_STRING
474};
475
476static int is_string_field(const char *type)
477{ 691{
478 if (strstr(type, "__data_loc") && strstr(type, "char")) 692 if (strstr(type, "__data_loc") && strstr(type, "char"))
479 return FILTER_DYN_STRING; 693 return FILTER_DYN_STRING;
@@ -481,12 +695,22 @@ static int is_string_field(const char *type)
481 if (strchr(type, '[') && strstr(type, "char")) 695 if (strchr(type, '[') && strstr(type, "char"))
482 return FILTER_STATIC_STRING; 696 return FILTER_STATIC_STRING;
483 697
484 return 0; 698 return FILTER_OTHER;
699}
700
701static bool is_string_field(struct ftrace_event_field *field)
702{
703 return field->filter_type == FILTER_DYN_STRING ||
704 field->filter_type == FILTER_STATIC_STRING ||
705 field->filter_type == FILTER_PTR_STRING;
485} 706}
486 707
487static int is_legal_op(struct ftrace_event_field *field, int op) 708static int is_legal_op(struct ftrace_event_field *field, int op)
488{ 709{
489 if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE)) 710 if (is_string_field(field) &&
711 (op != OP_EQ && op != OP_NE && op != OP_GLOB))
712 return 0;
713 if (!is_string_field(field) && op == OP_GLOB)
490 return 0; 714 return 0;
491 715
492 return 1; 716 return 1;
@@ -537,22 +761,25 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
537 761
538static int filter_add_pred(struct filter_parse_state *ps, 762static int filter_add_pred(struct filter_parse_state *ps,
539 struct ftrace_event_call *call, 763 struct ftrace_event_call *call,
540 struct filter_pred *pred) 764 struct event_filter *filter,
765 struct filter_pred *pred,
766 bool dry_run)
541{ 767{
542 struct ftrace_event_field *field; 768 struct ftrace_event_field *field;
543 filter_pred_fn_t fn; 769 filter_pred_fn_t fn;
544 unsigned long long val; 770 unsigned long long val;
545 int string_type;
546 int ret; 771 int ret;
547 772
548 pred->fn = filter_pred_none; 773 pred->fn = filter_pred_none;
549 774
550 if (pred->op == OP_AND) { 775 if (pred->op == OP_AND) {
551 pred->pop_n = 2; 776 pred->pop_n = 2;
552 return filter_add_pred_fn(ps, call, pred, filter_pred_and); 777 fn = filter_pred_and;
778 goto add_pred_fn;
553 } else if (pred->op == OP_OR) { 779 } else if (pred->op == OP_OR) {
554 pred->pop_n = 2; 780 pred->pop_n = 2;
555 return filter_add_pred_fn(ps, call, pred, filter_pred_or); 781 fn = filter_pred_or;
782 goto add_pred_fn;
556 } 783 }
557 784
558 field = find_event_field(call, pred->field_name); 785 field = find_event_field(call, pred->field_name);
@@ -568,83 +795,42 @@ static int filter_add_pred(struct filter_parse_state *ps,
568 return -EINVAL; 795 return -EINVAL;
569 } 796 }
570 797
571 string_type = is_string_field(field->type); 798 if (is_string_field(field)) {
572 if (string_type) { 799 filter_build_regex(pred);
573 if (string_type == FILTER_STATIC_STRING) 800
801 if (field->filter_type == FILTER_STATIC_STRING) {
574 fn = filter_pred_string; 802 fn = filter_pred_string;
575 else 803 pred->regex.field_len = field->size;
804 } else if (field->filter_type == FILTER_DYN_STRING)
576 fn = filter_pred_strloc; 805 fn = filter_pred_strloc;
577 pred->str_len = field->size; 806 else
578 if (pred->op == OP_NE) 807 fn = filter_pred_pchar;
579 pred->not = 1;
580 return filter_add_pred_fn(ps, call, pred, fn);
581 } else { 808 } else {
582 if (field->is_signed) 809 if (field->is_signed)
583 ret = strict_strtoll(pred->str_val, 0, &val); 810 ret = strict_strtoll(pred->regex.pattern, 0, &val);
584 else 811 else
585 ret = strict_strtoull(pred->str_val, 0, &val); 812 ret = strict_strtoull(pred->regex.pattern, 0, &val);
586 if (ret) { 813 if (ret) {
587 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 814 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
588 return -EINVAL; 815 return -EINVAL;
589 } 816 }
590 pred->val = val; 817 pred->val = val;
591 }
592 818
593 fn = select_comparison_fn(pred->op, field->size, field->is_signed); 819 fn = select_comparison_fn(pred->op, field->size,
594 if (!fn) { 820 field->is_signed);
595 parse_error(ps, FILT_ERR_INVALID_OP, 0); 821 if (!fn) {
596 return -EINVAL; 822 parse_error(ps, FILT_ERR_INVALID_OP, 0);
823 return -EINVAL;
824 }
597 } 825 }
598 826
599 if (pred->op == OP_NE) 827 if (pred->op == OP_NE)
600 pred->not = 1; 828 pred->not = 1;
601 829
602 return filter_add_pred_fn(ps, call, pred, fn); 830add_pred_fn:
603} 831 if (!dry_run)
604 832 return filter_add_pred_fn(ps, call, filter, pred, fn);
605static int filter_add_subsystem_pred(struct filter_parse_state *ps, 833 return 0;
606 struct event_subsystem *system,
607 struct filter_pred *pred,
608 char *filter_string)
609{
610 struct event_filter *filter = system->filter;
611 struct ftrace_event_call *call;
612 int err = 0;
613
614 if (!filter->preds) {
615 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
616 GFP_KERNEL);
617
618 if (!filter->preds)
619 return -ENOMEM;
620 }
621
622 if (filter->n_preds == MAX_FILTER_PRED) {
623 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
624 return -ENOSPC;
625 }
626
627 filter->preds[filter->n_preds] = pred;
628 filter->n_preds++;
629
630 list_for_each_entry(call, &ftrace_events, list) {
631
632 if (!call->define_fields)
633 continue;
634
635 if (strcmp(call->system, system->name))
636 continue;
637
638 err = filter_add_pred(ps, call, pred);
639 if (err) {
640 filter_free_subsystem_preds(system);
641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
642 goto out;
643 }
644 replace_filter_string(call->filter, filter_string);
645 }
646out:
647 return err;
648} 834}
649 835
650static void parse_init(struct filter_parse_state *ps, 836static void parse_init(struct filter_parse_state *ps,
@@ -844,8 +1030,9 @@ static void postfix_clear(struct filter_parse_state *ps)
844 1030
845 while (!list_empty(&ps->postfix)) { 1031 while (!list_empty(&ps->postfix)) {
846 elt = list_first_entry(&ps->postfix, struct postfix_elt, list); 1032 elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
847 kfree(elt->operand);
848 list_del(&elt->list); 1033 list_del(&elt->list);
1034 kfree(elt->operand);
1035 kfree(elt);
849 } 1036 }
850} 1037}
851 1038
@@ -955,8 +1142,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
955 return NULL; 1142 return NULL;
956 } 1143 }
957 1144
958 strcpy(pred->str_val, operand2); 1145 strcpy(pred->regex.pattern, operand2);
959 pred->str_len = strlen(operand2); 1146 pred->regex.len = strlen(pred->regex.pattern);
960 1147
961 pred->op = op; 1148 pred->op = op;
962 1149
@@ -1000,15 +1187,17 @@ static int check_preds(struct filter_parse_state *ps)
1000 return 0; 1187 return 0;
1001} 1188}
1002 1189
1003static int replace_preds(struct event_subsystem *system, 1190static int replace_preds(struct ftrace_event_call *call,
1004 struct ftrace_event_call *call, 1191 struct event_filter *filter,
1005 struct filter_parse_state *ps, 1192 struct filter_parse_state *ps,
1006 char *filter_string) 1193 char *filter_string,
1194 bool dry_run)
1007{ 1195{
1008 char *operand1 = NULL, *operand2 = NULL; 1196 char *operand1 = NULL, *operand2 = NULL;
1009 struct filter_pred *pred; 1197 struct filter_pred *pred;
1010 struct postfix_elt *elt; 1198 struct postfix_elt *elt;
1011 int err; 1199 int err;
1200 int n_preds = 0;
1012 1201
1013 err = check_preds(ps); 1202 err = check_preds(ps);
1014 if (err) 1203 if (err)
@@ -1027,19 +1216,14 @@ static int replace_preds(struct event_subsystem *system,
1027 continue; 1216 continue;
1028 } 1217 }
1029 1218
1219 if (n_preds++ == MAX_FILTER_PRED) {
1220 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1221 return -ENOSPC;
1222 }
1223
1030 if (elt->op == OP_AND || elt->op == OP_OR) { 1224 if (elt->op == OP_AND || elt->op == OP_OR) {
1031 pred = create_logical_pred(elt->op); 1225 pred = create_logical_pred(elt->op);
1032 if (call) { 1226 goto add_pred;
1033 err = filter_add_pred(ps, call, pred);
1034 filter_free_pred(pred);
1035 } else
1036 err = filter_add_subsystem_pred(ps, system,
1037 pred, filter_string);
1038 if (err)
1039 return err;
1040
1041 operand1 = operand2 = NULL;
1042 continue;
1043 } 1227 }
1044 1228
1045 if (!operand1 || !operand2) { 1229 if (!operand1 || !operand2) {
@@ -1048,12 +1232,11 @@ static int replace_preds(struct event_subsystem *system,
1048 } 1232 }
1049 1233
1050 pred = create_pred(elt->op, operand1, operand2); 1234 pred = create_pred(elt->op, operand1, operand2);
1051 if (call) { 1235add_pred:
1052 err = filter_add_pred(ps, call, pred); 1236 if (!pred)
1053 filter_free_pred(pred); 1237 return -ENOMEM;
1054 } else 1238 err = filter_add_pred(ps, call, filter, pred, dry_run);
1055 err = filter_add_subsystem_pred(ps, system, pred, 1239 filter_free_pred(pred);
1056 filter_string);
1057 if (err) 1240 if (err)
1058 return err; 1241 return err;
1059 1242
@@ -1063,19 +1246,59 @@ static int replace_preds(struct event_subsystem *system,
1063 return 0; 1246 return 0;
1064} 1247}
1065 1248
1066int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1249static int replace_system_preds(struct event_subsystem *system,
1250 struct filter_parse_state *ps,
1251 char *filter_string)
1067{ 1252{
1253 struct ftrace_event_call *call;
1254 bool fail = true;
1068 int err; 1255 int err;
1069 1256
1257 list_for_each_entry(call, &ftrace_events, list) {
1258 struct event_filter *filter = call->filter;
1259
1260 if (strcmp(call->class->system, system->name) != 0)
1261 continue;
1262
1263 /* try to see if the filter can be applied */
1264 err = replace_preds(call, filter, ps, filter_string, true);
1265 if (err)
1266 continue;
1267
1268 /* really apply the filter */
1269 filter_disable_preds(call);
1270 err = replace_preds(call, filter, ps, filter_string, false);
1271 if (err)
1272 filter_disable_preds(call);
1273 else {
1274 call->flags |= TRACE_EVENT_FL_FILTERED;
1275 replace_filter_string(filter, filter_string);
1276 }
1277 fail = false;
1278 }
1279
1280 if (fail) {
1281 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1282 return -EINVAL;
1283 }
1284 return 0;
1285}
1286
1287int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1288{
1289 int err;
1070 struct filter_parse_state *ps; 1290 struct filter_parse_state *ps;
1071 1291
1072 mutex_lock(&event_mutex); 1292 mutex_lock(&event_mutex);
1073 1293
1294 err = init_preds(call);
1295 if (err)
1296 goto out_unlock;
1297
1074 if (!strcmp(strstrip(filter_string), "0")) { 1298 if (!strcmp(strstrip(filter_string), "0")) {
1075 filter_disable_preds(call); 1299 filter_disable_preds(call);
1076 remove_filter_string(call->filter); 1300 remove_filter_string(call->filter);
1077 mutex_unlock(&event_mutex); 1301 goto out_unlock;
1078 return 0;
1079 } 1302 }
1080 1303
1081 err = -ENOMEM; 1304 err = -ENOMEM;
@@ -1093,10 +1316,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1093 goto out; 1316 goto out;
1094 } 1317 }
1095 1318
1096 err = replace_preds(NULL, call, ps, filter_string); 1319 err = replace_preds(call, call->filter, ps, filter_string, false);
1097 if (err) 1320 if (err)
1098 append_filter_err(ps, call->filter); 1321 append_filter_err(ps, call->filter);
1099 1322 else
1323 call->flags |= TRACE_EVENT_FL_FILTERED;
1100out: 1324out:
1101 filter_opstack_clear(ps); 1325 filter_opstack_clear(ps);
1102 postfix_clear(ps); 1326 postfix_clear(ps);
@@ -1111,16 +1335,18 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1111 char *filter_string) 1335 char *filter_string)
1112{ 1336{
1113 int err; 1337 int err;
1114
1115 struct filter_parse_state *ps; 1338 struct filter_parse_state *ps;
1116 1339
1117 mutex_lock(&event_mutex); 1340 mutex_lock(&event_mutex);
1118 1341
1342 err = init_subsystem_preds(system);
1343 if (err)
1344 goto out_unlock;
1345
1119 if (!strcmp(strstrip(filter_string), "0")) { 1346 if (!strcmp(strstrip(filter_string), "0")) {
1120 filter_free_subsystem_preds(system); 1347 filter_free_subsystem_preds(system);
1121 remove_filter_string(system->filter); 1348 remove_filter_string(system->filter);
1122 mutex_unlock(&event_mutex); 1349 goto out_unlock;
1123 return 0;
1124 } 1350 }
1125 1351
1126 err = -ENOMEM; 1352 err = -ENOMEM;
@@ -1128,7 +1354,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1128 if (!ps) 1354 if (!ps)
1129 goto out_unlock; 1355 goto out_unlock;
1130 1356
1131 filter_free_subsystem_preds(system);
1132 replace_filter_string(system->filter, filter_string); 1357 replace_filter_string(system->filter, filter_string);
1133 1358
1134 parse_init(ps, filter_ops, filter_string); 1359 parse_init(ps, filter_ops, filter_string);
@@ -1138,7 +1363,7 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1138 goto out; 1363 goto out;
1139 } 1364 }
1140 1365
1141 err = replace_preds(system, NULL, ps, filter_string); 1366 err = replace_system_preds(system, ps, filter_string);
1142 if (err) 1367 if (err)
1143 append_filter_err(ps, system->filter); 1368 append_filter_err(ps, system->filter);
1144 1369
@@ -1152,3 +1377,73 @@ out_unlock:
1152 return err; 1377 return err;
1153} 1378}
1154 1379
1380#ifdef CONFIG_PERF_EVENTS
1381
1382void ftrace_profile_free_filter(struct perf_event *event)
1383{
1384 struct event_filter *filter = event->filter;
1385
1386 event->filter = NULL;
1387 __free_preds(filter);
1388}
1389
1390int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1391 char *filter_str)
1392{
1393 int err;
1394 struct event_filter *filter;
1395 struct filter_parse_state *ps;
1396 struct ftrace_event_call *call = NULL;
1397
1398 mutex_lock(&event_mutex);
1399
1400 list_for_each_entry(call, &ftrace_events, list) {
1401 if (call->event.type == event_id)
1402 break;
1403 }
1404
1405 err = -EINVAL;
1406 if (&call->list == &ftrace_events)
1407 goto out_unlock;
1408
1409 err = -EEXIST;
1410 if (event->filter)
1411 goto out_unlock;
1412
1413 filter = __alloc_preds();
1414 if (IS_ERR(filter)) {
1415 err = PTR_ERR(filter);
1416 goto out_unlock;
1417 }
1418
1419 err = -ENOMEM;
1420 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1421 if (!ps)
1422 goto free_preds;
1423
1424 parse_init(ps, filter_ops, filter_str);
1425 err = filter_parse(ps);
1426 if (err)
1427 goto free_ps;
1428
1429 err = replace_preds(call, filter, ps, filter_str, false);
1430 if (!err)
1431 event->filter = filter;
1432
1433free_ps:
1434 filter_opstack_clear(ps);
1435 postfix_clear(ps);
1436 kfree(ps);
1437
1438free_preds:
1439 if (err)
1440 __free_preds(filter);
1441
1442out_unlock:
1443 mutex_unlock(&event_mutex);
1444
1445 return err;
1446}
1447
1448#endif /* CONFIG_PERF_EVENTS */
1449
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d06cf898dc86..4b74d71705c0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -15,192 +15,159 @@
15 15
16#include "trace_output.h" 16#include "trace_output.h"
17 17
18#undef TRACE_SYSTEM
19#define TRACE_SYSTEM ftrace
18 20
19#undef TRACE_STRUCT 21/* not needed for this file */
20#define TRACE_STRUCT(args...) args 22#undef __field_struct
21 23#define __field_struct(type, item)
22extern void __bad_type_size(void);
23
24#undef TRACE_FIELD
25#define TRACE_FIELD(type, item, assign) \
26 if (sizeof(type) != sizeof(field.item)) \
27 __bad_type_size(); \
28 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
29 "offset:%u;\tsize:%u;\n", \
30 (unsigned int)offsetof(typeof(field), item), \
31 (unsigned int)sizeof(field.item)); \
32 if (!ret) \
33 return 0;
34
35
36#undef TRACE_FIELD_SPECIAL
37#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
38 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \
39 "offset:%u;\tsize:%u;\n", \
40 (unsigned int)offsetof(typeof(field), item), \
41 (unsigned int)sizeof(field.item)); \
42 if (!ret) \
43 return 0;
44
45#undef TRACE_FIELD_ZERO_CHAR
46#define TRACE_FIELD_ZERO_CHAR(item) \
47 ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \
48 "offset:%u;\tsize:0;\n", \
49 (unsigned int)offsetof(typeof(field), item)); \
50 if (!ret) \
51 return 0;
52
53#undef TRACE_FIELD_SIGN
54#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
55 TRACE_FIELD(type, item, assign)
56
57#undef TP_RAW_FMT
58#define TP_RAW_FMT(args...) args
59
60#undef TRACE_EVENT_FORMAT
61#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
62static int \
63ftrace_format_##call(struct trace_seq *s) \
64{ \
65 struct args field; \
66 int ret; \
67 \
68 tstruct; \
69 \
70 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
71 \
72 return ret; \
73}
74 24
75#undef TRACE_EVENT_FORMAT_NOFILTER 25#undef __field
76#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ 26#define __field(type, item) type item;
77 tpfmt) \
78static int \
79ftrace_format_##call(struct trace_seq *s) \
80{ \
81 struct args field; \
82 int ret; \
83 \
84 tstruct; \
85 \
86 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
87 \
88 return ret; \
89}
90 27
91#include "trace_event_types.h" 28#undef __field_desc
29#define __field_desc(type, container, item) type item;
92 30
93#undef TRACE_ZERO_CHAR 31#undef __array
94#define TRACE_ZERO_CHAR(arg) 32#define __array(type, item, size) type item[size];
95 33
96#undef TRACE_FIELD 34#undef __array_desc
97#define TRACE_FIELD(type, item, assign)\ 35#define __array_desc(type, container, item, size) type item[size];
98 entry->item = assign;
99 36
100#undef TRACE_FIELD 37#undef __dynamic_array
101#define TRACE_FIELD(type, item, assign)\ 38#define __dynamic_array(type, item) type item[];
102 entry->item = assign;
103 39
104#undef TRACE_FIELD_SIGN 40#undef F_STRUCT
105#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ 41#define F_STRUCT(args...) args
106 TRACE_FIELD(type, item, assign)
107 42
108#undef TP_CMD 43#undef F_printk
109#define TP_CMD(cmd...) cmd 44#define F_printk(fmt, args...) fmt, args
110 45
111#undef TRACE_ENTRY 46#undef FTRACE_ENTRY
112#define TRACE_ENTRY entry 47#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
48struct ____ftrace_##name { \
49 tstruct \
50}; \
51static void __always_unused ____ftrace_check_##name(void) \
52{ \
53 struct ____ftrace_##name *__entry = NULL; \
54 \
55 /* force compile-time check on F_printk() */ \
56 printk(print); \
57}
113 58
114#undef TRACE_FIELD_SPECIAL 59#undef FTRACE_ENTRY_DUP
115#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ 60#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
116 cmd; 61 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
117 62
118#undef TRACE_EVENT_FORMAT 63#include "trace_entries.h"
119#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
120int ftrace_define_fields_##call(void); \
121static int ftrace_raw_init_event_##call(void); \
122 \
123struct ftrace_event_call __used \
124__attribute__((__aligned__(4))) \
125__attribute__((section("_ftrace_events"))) event_##call = { \
126 .name = #call, \
127 .id = proto, \
128 .system = __stringify(TRACE_SYSTEM), \
129 .raw_init = ftrace_raw_init_event_##call, \
130 .show_format = ftrace_format_##call, \
131 .define_fields = ftrace_define_fields_##call, \
132}; \
133static int ftrace_raw_init_event_##call(void) \
134{ \
135 INIT_LIST_HEAD(&event_##call.fields); \
136 init_preds(&event_##call); \
137 return 0; \
138} \
139
140#undef TRACE_EVENT_FORMAT_NOFILTER
141#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
142 tpfmt) \
143 \
144struct ftrace_event_call __used \
145__attribute__((__aligned__(4))) \
146__attribute__((section("_ftrace_events"))) event_##call = { \
147 .name = #call, \
148 .id = proto, \
149 .system = __stringify(TRACE_SYSTEM), \
150 .show_format = ftrace_format_##call, \
151};
152 64
153#include "trace_event_types.h" 65#undef __field
154 66#define __field(type, item) \
155#undef TRACE_FIELD
156#define TRACE_FIELD(type, item, assign) \
157 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
158 offsetof(typeof(field), item), \ 68 offsetof(typeof(field), item), \
159 sizeof(field.item), is_signed_type(type)); \ 69 sizeof(field.item), \
70 is_signed_type(type), FILTER_OTHER); \
160 if (ret) \ 71 if (ret) \
161 return ret; 72 return ret;
162 73
163#undef TRACE_FIELD_SPECIAL 74#undef __field_desc
164#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ 75#define __field_desc(type, container, item) \
165 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 76 ret = trace_define_field(event_call, #type, #item, \
77 offsetof(typeof(field), \
78 container.item), \
79 sizeof(field.container.item), \
80 is_signed_type(type), FILTER_OTHER); \
81 if (ret) \
82 return ret;
83
84#undef __array
85#define __array(type, item, len) \
86 do { \
87 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
88 mutex_lock(&event_storage_mutex); \
89 snprintf(event_storage, sizeof(event_storage), \
90 "%s[%d]", #type, len); \
91 ret = trace_define_field(event_call, event_storage, #item, \
166 offsetof(typeof(field), item), \ 92 offsetof(typeof(field), item), \
167 sizeof(field.item), 0); \ 93 sizeof(field.item), \
94 is_signed_type(type), FILTER_OTHER); \
95 mutex_unlock(&event_storage_mutex); \
96 if (ret) \
97 return ret; \
98 } while (0);
99
100#undef __array_desc
101#define __array_desc(type, container, item, len) \
102 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
103 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
104 offsetof(typeof(field), \
105 container.item), \
106 sizeof(field.container.item), \
107 is_signed_type(type), FILTER_OTHER); \
168 if (ret) \ 108 if (ret) \
169 return ret; 109 return ret;
170 110
171#undef TRACE_FIELD_SIGN 111#undef __dynamic_array
172#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ 112#define __dynamic_array(type, item) \
173 ret = trace_define_field(event_call, #type, #item, \ 113 ret = trace_define_field(event_call, #type, #item, \
174 offsetof(typeof(field), item), \ 114 offsetof(typeof(field), item), \
175 sizeof(field.item), is_signed); \ 115 0, is_signed_type(type), FILTER_OTHER);\
176 if (ret) \ 116 if (ret) \
177 return ret; 117 return ret;
178 118
179#undef TRACE_FIELD_ZERO_CHAR 119#undef FTRACE_ENTRY
180#define TRACE_FIELD_ZERO_CHAR(item) 120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
181
182#undef TRACE_EVENT_FORMAT
183#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
184int \ 121int \
185ftrace_define_fields_##call(void) \ 122ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
186{ \ 123{ \
187 struct ftrace_event_call *event_call = &event_##call; \ 124 struct struct_name field; \
188 struct args field; \
189 int ret; \ 125 int ret; \
190 \ 126 \
191 __common_field(unsigned char, type, 0); \
192 __common_field(unsigned char, flags, 0); \
193 __common_field(unsigned char, preempt_count, 0); \
194 __common_field(int, pid, 1); \
195 __common_field(int, tgid, 1); \
196 \
197 tstruct; \ 127 tstruct; \
198 \ 128 \
199 return ret; \ 129 return ret; \
200} 130}
201 131
202#undef TRACE_EVENT_FORMAT_NOFILTER 132#include "trace_entries.h"
203#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ 133
204 tpfmt) 134#undef __entry
135#define __entry REC
136
137#undef __field
138#define __field(type, item)
139
140#undef __field_desc
141#define __field_desc(type, container, item)
142
143#undef __array
144#define __array(type, item, len)
145
146#undef __array_desc
147#define __array_desc(type, container, item, len)
148
149#undef __dynamic_array
150#define __dynamic_array(type, item)
151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
155#undef FTRACE_ENTRY
156#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \
157 \
158struct ftrace_event_class event_class_ftrace_##call = { \
159 .system = __stringify(TRACE_SYSTEM), \
160 .define_fields = ftrace_define_fields_##call, \
161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
162}; \
163 \
164struct ftrace_event_call __used \
165__attribute__((__aligned__(4))) \
166__attribute__((section("_ftrace_events"))) event_##call = { \
167 .name = #call, \
168 .event.type = etype, \
169 .class = &event_class_ftrace_##call, \
170 .print_fmt = print, \
171}; \
205 172
206#include "trace_event_types.h" 173#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 90f134764837..16aee4d44e8f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
54 struct trace_array_cpu *data; 54 struct trace_array_cpu *data;
55 unsigned long flags; 55 unsigned long flags;
56 long disabled; 56 long disabled;
57 int cpu, resched; 57 int cpu;
58 int pc; 58 int pc;
59 59
60 if (unlikely(!ftrace_function_enabled)) 60 if (unlikely(!ftrace_function_enabled))
61 return; 61 return;
62 62
63 pc = preempt_count(); 63 pc = preempt_count();
64 resched = ftrace_preempt_disable(); 64 preempt_disable_notrace();
65 local_save_flags(flags); 65 local_save_flags(flags);
66 cpu = raw_smp_processor_id(); 66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu]; 67 data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
71 trace_function(tr, ip, parent_ip, flags, pc); 71 trace_function(tr, ip, parent_ip, flags, pc);
72 72
73 atomic_dec(&data->disabled); 73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched); 74 preempt_enable_notrace();
75} 75}
76 76
77static void 77static void
@@ -288,11 +288,9 @@ static int
288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, 288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
289 struct ftrace_probe_ops *ops, void *data) 289 struct ftrace_probe_ops *ops, void *data)
290{ 290{
291 char str[KSYM_SYMBOL_LEN];
292 long count = (long)data; 291 long count = (long)data;
293 292
294 kallsyms_lookup(ip, NULL, NULL, NULL, str); 293 seq_printf(m, "%ps:", (void *)ip);
295 seq_printf(m, "%s:", str);
296 294
297 if (ops == &traceon_probe_ops) 295 if (ops == &traceon_probe_ops)
298 seq_printf(m, "traceon"); 296 seq_printf(m, "traceon");
@@ -302,8 +300,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
302 if (count == -1) 300 if (count == -1)
303 seq_printf(m, ":unlimited\n"); 301 seq_printf(m, ":unlimited\n");
304 else 302 else
305 seq_printf(m, ":count=%ld", count); 303 seq_printf(m, ":count=%ld\n", count);
306 seq_putc(m, '\n');
307 304
308 return 0; 305 return 0;
309} 306}
@@ -364,7 +361,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
364 out_reg: 361 out_reg:
365 ret = register_ftrace_function_probe(glob, ops, count); 362 ret = register_ftrace_function_probe(glob, ops, count);
366 363
367 return ret; 364 return ret < 0 ? ret : 0;
368} 365}
369 366
370static struct ftrace_func_command ftrace_traceon_cmd = { 367static struct ftrace_func_command ftrace_traceon_cmd = {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d2249abafb53..76b05980225c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -9,14 +9,31 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/slab.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14#include "trace.h" 15#include "trace.h"
15#include "trace_output.h" 16#include "trace_output.h"
16 17
17struct fgraph_data { 18/* When set, irq functions will be ignored */
19static int ftrace_graph_skip_irqs;
20
21struct fgraph_cpu_data {
18 pid_t last_pid; 22 pid_t last_pid;
19 int depth; 23 int depth;
24 int depth_irq;
25 int ignore;
26 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
27};
28
29struct fgraph_data {
30 struct fgraph_cpu_data __percpu *cpu_data;
31
32 /* Place to preserve last processed entry. */
33 struct ftrace_graph_ent_entry ent;
34 struct ftrace_graph_ret_entry ret;
35 int failed;
36 int cpu;
20}; 37};
21 38
22#define TRACE_GRAPH_INDENT 2 39#define TRACE_GRAPH_INDENT 2
@@ -27,7 +44,8 @@ struct fgraph_data {
27#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 44#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
28#define TRACE_GRAPH_PRINT_PROC 0x8 45#define TRACE_GRAPH_PRINT_PROC 0x8
29#define TRACE_GRAPH_PRINT_DURATION 0x10 46#define TRACE_GRAPH_PRINT_DURATION 0x10
30#define TRACE_GRAPH_PRINT_ABS_TIME 0X20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40
31 49
32static struct tracer_opt trace_opts[] = { 50static struct tracer_opt trace_opts[] = {
33 /* Display overruns? (for self-debug purpose) */ 51 /* Display overruns? (for self-debug purpose) */
@@ -42,17 +60,19 @@ static struct tracer_opt trace_opts[] = {
42 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, 60 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
43 /* Display absolute time of an entry */ 61 /* Display absolute time of an entry */
44 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, 62 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
63 /* Display interrupts */
64 { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
45 { } /* Empty entry */ 65 { } /* Empty entry */
46}; 66};
47 67
48static struct tracer_flags tracer_flags = { 68static struct tracer_flags tracer_flags = {
49 /* Don't display overruns and proc by default */ 69 /* Don't display overruns and proc by default */
50 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | 70 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
51 TRACE_GRAPH_PRINT_DURATION, 71 TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
52 .opts = trace_opts 72 .opts = trace_opts
53}; 73};
54 74
55/* pid on the last trace processed */ 75static struct trace_array *graph_array;
56 76
57 77
58/* Add a function return address to the trace stack on thread info.*/ 78/* Add a function return address to the trace stack on thread info.*/
@@ -124,7 +144,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
124 if (unlikely(current->ret_stack[index].fp != frame_pointer)) { 144 if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
125 ftrace_graph_stop(); 145 ftrace_graph_stop();
126 WARN(1, "Bad frame pointer: expected %lx, received %lx\n" 146 WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
127 " from func %pF return to %lx\n", 147 " from func %ps return to %lx\n",
128 current->ret_stack[index].fp, 148 current->ret_stack[index].fp,
129 frame_pointer, 149 frame_pointer,
130 (void *)current->ret_stack[index].func, 150 (void *)current->ret_stack[index].func,
@@ -166,10 +186,183 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
166 return ret; 186 return ret;
167} 187}
168 188
189int __trace_graph_entry(struct trace_array *tr,
190 struct ftrace_graph_ent *trace,
191 unsigned long flags,
192 int pc)
193{
194 struct ftrace_event_call *call = &event_funcgraph_entry;
195 struct ring_buffer_event *event;
196 struct ring_buffer *buffer = tr->buffer;
197 struct ftrace_graph_ent_entry *entry;
198
199 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
200 return 0;
201
202 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
203 sizeof(*entry), flags, pc);
204 if (!event)
205 return 0;
206 entry = ring_buffer_event_data(event);
207 entry->graph_ent = *trace;
208 if (!filter_current_check_discard(buffer, call, entry, event))
209 ring_buffer_unlock_commit(buffer, event);
210
211 return 1;
212}
213
214static inline int ftrace_graph_ignore_irqs(void)
215{
216 if (!ftrace_graph_skip_irqs)
217 return 0;
218
219 return in_irq();
220}
221
222int trace_graph_entry(struct ftrace_graph_ent *trace)
223{
224 struct trace_array *tr = graph_array;
225 struct trace_array_cpu *data;
226 unsigned long flags;
227 long disabled;
228 int ret;
229 int cpu;
230 int pc;
231
232 if (!ftrace_trace_task(current))
233 return 0;
234
235 /* trace it when it is-nested-in or is a function enabled. */
236 if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
237 ftrace_graph_ignore_irqs())
238 return 0;
239
240 local_irq_save(flags);
241 cpu = raw_smp_processor_id();
242 data = tr->data[cpu];
243 disabled = atomic_inc_return(&data->disabled);
244 if (likely(disabled == 1)) {
245 pc = preempt_count();
246 ret = __trace_graph_entry(tr, trace, flags, pc);
247 } else {
248 ret = 0;
249 }
250
251 atomic_dec(&data->disabled);
252 local_irq_restore(flags);
253
254 return ret;
255}
256
257int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
258{
259 if (tracing_thresh)
260 return 1;
261 else
262 return trace_graph_entry(trace);
263}
264
265static void
266__trace_graph_function(struct trace_array *tr,
267 unsigned long ip, unsigned long flags, int pc)
268{
269 u64 time = trace_clock_local();
270 struct ftrace_graph_ent ent = {
271 .func = ip,
272 .depth = 0,
273 };
274 struct ftrace_graph_ret ret = {
275 .func = ip,
276 .depth = 0,
277 .calltime = time,
278 .rettime = time,
279 };
280
281 __trace_graph_entry(tr, &ent, flags, pc);
282 __trace_graph_return(tr, &ret, flags, pc);
283}
284
285void
286trace_graph_function(struct trace_array *tr,
287 unsigned long ip, unsigned long parent_ip,
288 unsigned long flags, int pc)
289{
290 __trace_graph_function(tr, ip, flags, pc);
291}
292
293void __trace_graph_return(struct trace_array *tr,
294 struct ftrace_graph_ret *trace,
295 unsigned long flags,
296 int pc)
297{
298 struct ftrace_event_call *call = &event_funcgraph_exit;
299 struct ring_buffer_event *event;
300 struct ring_buffer *buffer = tr->buffer;
301 struct ftrace_graph_ret_entry *entry;
302
303 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
304 return;
305
306 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
307 sizeof(*entry), flags, pc);
308 if (!event)
309 return;
310 entry = ring_buffer_event_data(event);
311 entry->ret = *trace;
312 if (!filter_current_check_discard(buffer, call, entry, event))
313 ring_buffer_unlock_commit(buffer, event);
314}
315
316void trace_graph_return(struct ftrace_graph_ret *trace)
317{
318 struct trace_array *tr = graph_array;
319 struct trace_array_cpu *data;
320 unsigned long flags;
321 long disabled;
322 int cpu;
323 int pc;
324
325 local_irq_save(flags);
326 cpu = raw_smp_processor_id();
327 data = tr->data[cpu];
328 disabled = atomic_inc_return(&data->disabled);
329 if (likely(disabled == 1)) {
330 pc = preempt_count();
331 __trace_graph_return(tr, trace, flags, pc);
332 }
333 atomic_dec(&data->disabled);
334 local_irq_restore(flags);
335}
336
337void set_graph_array(struct trace_array *tr)
338{
339 graph_array = tr;
340
341 /* Make graph_array visible before we start tracing */
342
343 smp_mb();
344}
345
346void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
347{
348 if (tracing_thresh &&
349 (trace->rettime - trace->calltime < tracing_thresh))
350 return;
351 else
352 trace_graph_return(trace);
353}
354
169static int graph_trace_init(struct trace_array *tr) 355static int graph_trace_init(struct trace_array *tr)
170{ 356{
171 int ret = register_ftrace_graph(&trace_graph_return, 357 int ret;
172 &trace_graph_entry); 358
359 set_graph_array(tr);
360 if (tracing_thresh)
361 ret = register_ftrace_graph(&trace_graph_thresh_return,
362 &trace_graph_thresh_entry);
363 else
364 ret = register_ftrace_graph(&trace_graph_return,
365 &trace_graph_entry);
173 if (ret) 366 if (ret)
174 return ret; 367 return ret;
175 tracing_start_cmdline_record(); 368 tracing_start_cmdline_record();
@@ -183,43 +376,19 @@ static void graph_trace_reset(struct trace_array *tr)
183 unregister_ftrace_graph(); 376 unregister_ftrace_graph();
184} 377}
185 378
186static inline int log10_cpu(int nb) 379static int max_bytes_for_cpu;
187{
188 if (nb / 100)
189 return 3;
190 if (nb / 10)
191 return 2;
192 return 1;
193}
194 380
195static enum print_line_t 381static enum print_line_t
196print_graph_cpu(struct trace_seq *s, int cpu) 382print_graph_cpu(struct trace_seq *s, int cpu)
197{ 383{
198 int i;
199 int ret; 384 int ret;
200 int log10_this = log10_cpu(cpu);
201 int log10_all = log10_cpu(cpumask_weight(cpu_online_mask));
202
203 385
204 /* 386 /*
205 * Start with a space character - to make it stand out 387 * Start with a space character - to make it stand out
206 * to the right a bit when trace output is pasted into 388 * to the right a bit when trace output is pasted into
207 * email: 389 * email:
208 */ 390 */
209 ret = trace_seq_printf(s, " "); 391 ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
210
211 /*
212 * Tricky - we space the CPU field according to the max
213 * number of online CPUs. On a 2-cpu system it would take
214 * a maximum of 1 digit - on a 128 cpu system it would
215 * take up to 3 digits:
216 */
217 for (i = 0; i < log10_all - log10_this; i++) {
218 ret = trace_seq_printf(s, " ");
219 if (!ret)
220 return TRACE_TYPE_PARTIAL_LINE;
221 }
222 ret = trace_seq_printf(s, "%d) ", cpu);
223 if (!ret) 392 if (!ret)
224 return TRACE_TYPE_PARTIAL_LINE; 393 return TRACE_TYPE_PARTIAL_LINE;
225 394
@@ -270,6 +439,15 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
270} 439}
271 440
272 441
442static enum print_line_t
443print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
444{
445 if (!trace_seq_putc(s, ' '))
446 return 0;
447
448 return trace_print_lat_fmt(s, entry);
449}
450
273/* If the pid changed since the last trace, output this event */ 451/* If the pid changed since the last trace, output this event */
274static enum print_line_t 452static enum print_line_t
275verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) 453verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
@@ -281,7 +459,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
281 if (!data) 459 if (!data)
282 return TRACE_TYPE_HANDLED; 460 return TRACE_TYPE_HANDLED;
283 461
284 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 462 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
285 463
286 if (*last_pid == pid) 464 if (*last_pid == pid)
287 return TRACE_TYPE_HANDLED; 465 return TRACE_TYPE_HANDLED;
@@ -332,27 +510,59 @@ static struct ftrace_graph_ret_entry *
332get_return_for_leaf(struct trace_iterator *iter, 510get_return_for_leaf(struct trace_iterator *iter,
333 struct ftrace_graph_ent_entry *curr) 511 struct ftrace_graph_ent_entry *curr)
334{ 512{
335 struct ring_buffer_iter *ring_iter; 513 struct fgraph_data *data = iter->private;
514 struct ring_buffer_iter *ring_iter = NULL;
336 struct ring_buffer_event *event; 515 struct ring_buffer_event *event;
337 struct ftrace_graph_ret_entry *next; 516 struct ftrace_graph_ret_entry *next;
338 517
339 ring_iter = iter->buffer_iter[iter->cpu]; 518 /*
519 * If the previous output failed to write to the seq buffer,
520 * then we just reuse the data from before.
521 */
522 if (data && data->failed) {
523 curr = &data->ent;
524 next = &data->ret;
525 } else {
526
527 ring_iter = iter->buffer_iter[iter->cpu];
528
529 /* First peek to compare current entry and the next one */
530 if (ring_iter)
531 event = ring_buffer_iter_peek(ring_iter, NULL);
532 else {
533 /*
534 * We need to consume the current entry to see
535 * the next one.
536 */
537 ring_buffer_consume(iter->tr->buffer, iter->cpu,
538 NULL, NULL);
539 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
540 NULL, NULL);
541 }
340 542
341 /* First peek to compare current entry and the next one */ 543 if (!event)
342 if (ring_iter) 544 return NULL;
343 event = ring_buffer_iter_peek(ring_iter, NULL); 545
344 else { 546 next = ring_buffer_event_data(event);
345 /* We need to consume the current entry to see the next one */ 547
346 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 548 if (data) {
347 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 549 /*
348 NULL); 550 * Save current and next entries for later reference
551 * if the output fails.
552 */
553 data->ent = *curr;
554 /*
555 * If the next event is not a return type, then
556 * we only care about what type it is. Otherwise we can
557 * safely copy the entire event.
558 */
559 if (next->ent.type == TRACE_GRAPH_RET)
560 data->ret = *next;
561 else
562 data->ret.ent.type = next->ent.type;
563 }
349 } 564 }
350 565
351 if (!event)
352 return NULL;
353
354 next = ring_buffer_event_data(event);
355
356 if (next->ent.type != TRACE_GRAPH_RET) 566 if (next->ent.type != TRACE_GRAPH_RET)
357 return NULL; 567 return NULL;
358 568
@@ -369,17 +579,18 @@ get_return_for_leaf(struct trace_iterator *iter,
369 579
370/* Signal a overhead of time execution to the output */ 580/* Signal a overhead of time execution to the output */
371static int 581static int
372print_graph_overhead(unsigned long long duration, struct trace_seq *s) 582print_graph_overhead(unsigned long long duration, struct trace_seq *s,
583 u32 flags)
373{ 584{
374 /* If duration disappear, we don't need anything */ 585 /* If duration disappear, we don't need anything */
375 if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)) 586 if (!(flags & TRACE_GRAPH_PRINT_DURATION))
376 return 1; 587 return 1;
377 588
378 /* Non nested entry or return */ 589 /* Non nested entry or return */
379 if (duration == -1) 590 if (duration == -1)
380 return trace_seq_printf(s, " "); 591 return trace_seq_printf(s, " ");
381 592
382 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 593 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
383 /* Duration exceeded 100 msecs */ 594 /* Duration exceeded 100 msecs */
384 if (duration > 100000ULL) 595 if (duration > 100000ULL)
385 return trace_seq_printf(s, "! "); 596 return trace_seq_printf(s, "! ");
@@ -405,7 +616,7 @@ static int print_graph_abs_time(u64 t, struct trace_seq *s)
405 616
406static enum print_line_t 617static enum print_line_t
407print_graph_irq(struct trace_iterator *iter, unsigned long addr, 618print_graph_irq(struct trace_iterator *iter, unsigned long addr,
408 enum trace_type type, int cpu, pid_t pid) 619 enum trace_type type, int cpu, pid_t pid, u32 flags)
409{ 620{
410 int ret; 621 int ret;
411 struct trace_seq *s = &iter->seq; 622 struct trace_seq *s = &iter->seq;
@@ -415,20 +626,21 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
415 return TRACE_TYPE_UNHANDLED; 626 return TRACE_TYPE_UNHANDLED;
416 627
417 /* Absolute time */ 628 /* Absolute time */
418 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { 629 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
419 ret = print_graph_abs_time(iter->ts, s); 630 ret = print_graph_abs_time(iter->ts, s);
420 if (!ret) 631 if (!ret)
421 return TRACE_TYPE_PARTIAL_LINE; 632 return TRACE_TYPE_PARTIAL_LINE;
422 } 633 }
423 634
424 /* Cpu */ 635 /* Cpu */
425 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 636 if (flags & TRACE_GRAPH_PRINT_CPU) {
426 ret = print_graph_cpu(s, cpu); 637 ret = print_graph_cpu(s, cpu);
427 if (ret == TRACE_TYPE_PARTIAL_LINE) 638 if (ret == TRACE_TYPE_PARTIAL_LINE)
428 return TRACE_TYPE_PARTIAL_LINE; 639 return TRACE_TYPE_PARTIAL_LINE;
429 } 640 }
641
430 /* Proc */ 642 /* Proc */
431 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 643 if (flags & TRACE_GRAPH_PRINT_PROC) {
432 ret = print_graph_proc(s, pid); 644 ret = print_graph_proc(s, pid);
433 if (ret == TRACE_TYPE_PARTIAL_LINE) 645 if (ret == TRACE_TYPE_PARTIAL_LINE)
434 return TRACE_TYPE_PARTIAL_LINE; 646 return TRACE_TYPE_PARTIAL_LINE;
@@ -438,7 +650,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
438 } 650 }
439 651
440 /* No overhead */ 652 /* No overhead */
441 ret = print_graph_overhead(-1, s); 653 ret = print_graph_overhead(-1, s, flags);
442 if (!ret) 654 if (!ret)
443 return TRACE_TYPE_PARTIAL_LINE; 655 return TRACE_TYPE_PARTIAL_LINE;
444 656
@@ -451,7 +663,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
451 return TRACE_TYPE_PARTIAL_LINE; 663 return TRACE_TYPE_PARTIAL_LINE;
452 664
453 /* Don't close the duration column if haven't one */ 665 /* Don't close the duration column if haven't one */
454 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 666 if (flags & TRACE_GRAPH_PRINT_DURATION)
455 trace_seq_printf(s, " |"); 667 trace_seq_printf(s, " |");
456 ret = trace_seq_printf(s, "\n"); 668 ret = trace_seq_printf(s, "\n");
457 669
@@ -481,7 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
481 693
482 /* Print nsecs (we don't want to exceed 7 numbers) */ 694 /* Print nsecs (we don't want to exceed 7 numbers) */
483 if (len < 7) { 695 if (len < 7) {
484 snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); 696 size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
697
698 snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
485 ret = trace_seq_printf(s, ".%s", nsecs_str); 699 ret = trace_seq_printf(s, ".%s", nsecs_str);
486 if (!ret) 700 if (!ret)
487 return TRACE_TYPE_PARTIAL_LINE; 701 return TRACE_TYPE_PARTIAL_LINE;
@@ -521,7 +735,8 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
521static enum print_line_t 735static enum print_line_t
522print_graph_entry_leaf(struct trace_iterator *iter, 736print_graph_entry_leaf(struct trace_iterator *iter,
523 struct ftrace_graph_ent_entry *entry, 737 struct ftrace_graph_ent_entry *entry,
524 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) 738 struct ftrace_graph_ret_entry *ret_entry,
739 struct trace_seq *s, u32 flags)
525{ 740{
526 struct fgraph_data *data = iter->private; 741 struct fgraph_data *data = iter->private;
527 struct ftrace_graph_ret *graph_ret; 742 struct ftrace_graph_ret *graph_ret;
@@ -535,24 +750,30 @@ print_graph_entry_leaf(struct trace_iterator *iter,
535 duration = graph_ret->rettime - graph_ret->calltime; 750 duration = graph_ret->rettime - graph_ret->calltime;
536 751
537 if (data) { 752 if (data) {
753 struct fgraph_cpu_data *cpu_data;
538 int cpu = iter->cpu; 754 int cpu = iter->cpu;
539 int *depth = &(per_cpu_ptr(data, cpu)->depth); 755
756 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
540 757
541 /* 758 /*
542 * Comments display at + 1 to depth. Since 759 * Comments display at + 1 to depth. Since
543 * this is a leaf function, keep the comments 760 * this is a leaf function, keep the comments
544 * equal to this depth. 761 * equal to this depth.
545 */ 762 */
546 *depth = call->depth - 1; 763 cpu_data->depth = call->depth - 1;
764
765 /* No need to keep this function around for this depth */
766 if (call->depth < FTRACE_RETFUNC_DEPTH)
767 cpu_data->enter_funcs[call->depth] = 0;
547 } 768 }
548 769
549 /* Overhead */ 770 /* Overhead */
550 ret = print_graph_overhead(duration, s); 771 ret = print_graph_overhead(duration, s, flags);
551 if (!ret) 772 if (!ret)
552 return TRACE_TYPE_PARTIAL_LINE; 773 return TRACE_TYPE_PARTIAL_LINE;
553 774
554 /* Duration */ 775 /* Duration */
555 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 776 if (flags & TRACE_GRAPH_PRINT_DURATION) {
556 ret = print_graph_duration(duration, s); 777 ret = print_graph_duration(duration, s);
557 if (ret == TRACE_TYPE_PARTIAL_LINE) 778 if (ret == TRACE_TYPE_PARTIAL_LINE)
558 return TRACE_TYPE_PARTIAL_LINE; 779 return TRACE_TYPE_PARTIAL_LINE;
@@ -565,11 +786,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
565 return TRACE_TYPE_PARTIAL_LINE; 786 return TRACE_TYPE_PARTIAL_LINE;
566 } 787 }
567 788
568 ret = seq_print_ip_sym(s, call->func, 0); 789 ret = trace_seq_printf(s, "%ps();\n", (void *)call->func);
569 if (!ret)
570 return TRACE_TYPE_PARTIAL_LINE;
571
572 ret = trace_seq_printf(s, "();\n");
573 if (!ret) 790 if (!ret)
574 return TRACE_TYPE_PARTIAL_LINE; 791 return TRACE_TYPE_PARTIAL_LINE;
575 792
@@ -579,7 +796,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
579static enum print_line_t 796static enum print_line_t
580print_graph_entry_nested(struct trace_iterator *iter, 797print_graph_entry_nested(struct trace_iterator *iter,
581 struct ftrace_graph_ent_entry *entry, 798 struct ftrace_graph_ent_entry *entry,
582 struct trace_seq *s, int cpu) 799 struct trace_seq *s, int cpu, u32 flags)
583{ 800{
584 struct ftrace_graph_ent *call = &entry->graph_ent; 801 struct ftrace_graph_ent *call = &entry->graph_ent;
585 struct fgraph_data *data = iter->private; 802 struct fgraph_data *data = iter->private;
@@ -587,19 +804,24 @@ print_graph_entry_nested(struct trace_iterator *iter,
587 int i; 804 int i;
588 805
589 if (data) { 806 if (data) {
807 struct fgraph_cpu_data *cpu_data;
590 int cpu = iter->cpu; 808 int cpu = iter->cpu;
591 int *depth = &(per_cpu_ptr(data, cpu)->depth);
592 809
593 *depth = call->depth; 810 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
811 cpu_data->depth = call->depth;
812
813 /* Save this function pointer to see if the exit matches */
814 if (call->depth < FTRACE_RETFUNC_DEPTH)
815 cpu_data->enter_funcs[call->depth] = call->func;
594 } 816 }
595 817
596 /* No overhead */ 818 /* No overhead */
597 ret = print_graph_overhead(-1, s); 819 ret = print_graph_overhead(-1, s, flags);
598 if (!ret) 820 if (!ret)
599 return TRACE_TYPE_PARTIAL_LINE; 821 return TRACE_TYPE_PARTIAL_LINE;
600 822
601 /* No time */ 823 /* No time */
602 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 824 if (flags & TRACE_GRAPH_PRINT_DURATION) {
603 ret = trace_seq_printf(s, " | "); 825 ret = trace_seq_printf(s, " | ");
604 if (!ret) 826 if (!ret)
605 return TRACE_TYPE_PARTIAL_LINE; 827 return TRACE_TYPE_PARTIAL_LINE;
@@ -612,11 +834,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
612 return TRACE_TYPE_PARTIAL_LINE; 834 return TRACE_TYPE_PARTIAL_LINE;
613 } 835 }
614 836
615 ret = seq_print_ip_sym(s, call->func, 0); 837 ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func);
616 if (!ret)
617 return TRACE_TYPE_PARTIAL_LINE;
618
619 ret = trace_seq_printf(s, "() {\n");
620 if (!ret) 838 if (!ret)
621 return TRACE_TYPE_PARTIAL_LINE; 839 return TRACE_TYPE_PARTIAL_LINE;
622 840
@@ -629,7 +847,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
629 847
630static enum print_line_t 848static enum print_line_t
631print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, 849print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
632 int type, unsigned long addr) 850 int type, unsigned long addr, u32 flags)
633{ 851{
634 struct fgraph_data *data = iter->private; 852 struct fgraph_data *data = iter->private;
635 struct trace_entry *ent = iter->ent; 853 struct trace_entry *ent = iter->ent;
@@ -642,27 +860,27 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
642 860
643 if (type) { 861 if (type) {
644 /* Interrupt */ 862 /* Interrupt */
645 ret = print_graph_irq(iter, addr, type, cpu, ent->pid); 863 ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags);
646 if (ret == TRACE_TYPE_PARTIAL_LINE) 864 if (ret == TRACE_TYPE_PARTIAL_LINE)
647 return TRACE_TYPE_PARTIAL_LINE; 865 return TRACE_TYPE_PARTIAL_LINE;
648 } 866 }
649 867
650 /* Absolute time */ 868 /* Absolute time */
651 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { 869 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
652 ret = print_graph_abs_time(iter->ts, s); 870 ret = print_graph_abs_time(iter->ts, s);
653 if (!ret) 871 if (!ret)
654 return TRACE_TYPE_PARTIAL_LINE; 872 return TRACE_TYPE_PARTIAL_LINE;
655 } 873 }
656 874
657 /* Cpu */ 875 /* Cpu */
658 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 876 if (flags & TRACE_GRAPH_PRINT_CPU) {
659 ret = print_graph_cpu(s, cpu); 877 ret = print_graph_cpu(s, cpu);
660 if (ret == TRACE_TYPE_PARTIAL_LINE) 878 if (ret == TRACE_TYPE_PARTIAL_LINE)
661 return TRACE_TYPE_PARTIAL_LINE; 879 return TRACE_TYPE_PARTIAL_LINE;
662 } 880 }
663 881
664 /* Proc */ 882 /* Proc */
665 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 883 if (flags & TRACE_GRAPH_PRINT_PROC) {
666 ret = print_graph_proc(s, ent->pid); 884 ret = print_graph_proc(s, ent->pid);
667 if (ret == TRACE_TYPE_PARTIAL_LINE) 885 if (ret == TRACE_TYPE_PARTIAL_LINE)
668 return TRACE_TYPE_PARTIAL_LINE; 886 return TRACE_TYPE_PARTIAL_LINE;
@@ -672,61 +890,201 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
672 return TRACE_TYPE_PARTIAL_LINE; 890 return TRACE_TYPE_PARTIAL_LINE;
673 } 891 }
674 892
893 /* Latency format */
894 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
895 ret = print_graph_lat_fmt(s, ent);
896 if (ret == TRACE_TYPE_PARTIAL_LINE)
897 return TRACE_TYPE_PARTIAL_LINE;
898 }
899
675 return 0; 900 return 0;
676} 901}
677 902
903/*
904 * Entry check for irq code
905 *
906 * returns 1 if
907 * - we are inside irq code
908 * - we just extered irq code
909 *
910 * retunns 0 if
911 * - funcgraph-interrupts option is set
912 * - we are not inside irq code
913 */
914static int
915check_irq_entry(struct trace_iterator *iter, u32 flags,
916 unsigned long addr, int depth)
917{
918 int cpu = iter->cpu;
919 int *depth_irq;
920 struct fgraph_data *data = iter->private;
921
922 /*
923 * If we are either displaying irqs, or we got called as
924 * a graph event and private data does not exist,
925 * then we bypass the irq check.
926 */
927 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
928 (!data))
929 return 0;
930
931 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
932
933 /*
934 * We are inside the irq code
935 */
936 if (*depth_irq >= 0)
937 return 1;
938
939 if ((addr < (unsigned long)__irqentry_text_start) ||
940 (addr >= (unsigned long)__irqentry_text_end))
941 return 0;
942
943 /*
944 * We are entering irq code.
945 */
946 *depth_irq = depth;
947 return 1;
948}
949
950/*
951 * Return check for irq code
952 *
953 * returns 1 if
954 * - we are inside irq code
955 * - we just left irq code
956 *
957 * returns 0 if
958 * - funcgraph-interrupts option is set
959 * - we are not inside irq code
960 */
961static int
962check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
963{
964 int cpu = iter->cpu;
965 int *depth_irq;
966 struct fgraph_data *data = iter->private;
967
968 /*
969 * If we are either displaying irqs, or we got called as
970 * a graph event and private data does not exist,
971 * then we bypass the irq check.
972 */
973 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
974 (!data))
975 return 0;
976
977 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
978
979 /*
980 * We are not inside the irq code.
981 */
982 if (*depth_irq == -1)
983 return 0;
984
985 /*
986 * We are inside the irq code, and this is returning entry.
987 * Let's not trace it and clear the entry depth, since
988 * we are out of irq code.
989 *
990 * This condition ensures that we 'leave the irq code' once
991 * we are out of the entry depth. Thus protecting us from
992 * the RETURN entry loss.
993 */
994 if (*depth_irq >= depth) {
995 *depth_irq = -1;
996 return 1;
997 }
998
999 /*
1000 * We are inside the irq code, and this is not the entry.
1001 */
1002 return 1;
1003}
1004
678static enum print_line_t 1005static enum print_line_t
679print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 1006print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
680 struct trace_iterator *iter) 1007 struct trace_iterator *iter, u32 flags)
681{ 1008{
682 int cpu = iter->cpu; 1009 struct fgraph_data *data = iter->private;
683 struct ftrace_graph_ent *call = &field->graph_ent; 1010 struct ftrace_graph_ent *call = &field->graph_ent;
684 struct ftrace_graph_ret_entry *leaf_ret; 1011 struct ftrace_graph_ret_entry *leaf_ret;
1012 static enum print_line_t ret;
1013 int cpu = iter->cpu;
1014
1015 if (check_irq_entry(iter, flags, call->func, call->depth))
1016 return TRACE_TYPE_HANDLED;
685 1017
686 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 1018 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
687 return TRACE_TYPE_PARTIAL_LINE; 1019 return TRACE_TYPE_PARTIAL_LINE;
688 1020
689 leaf_ret = get_return_for_leaf(iter, field); 1021 leaf_ret = get_return_for_leaf(iter, field);
690 if (leaf_ret) 1022 if (leaf_ret)
691 return print_graph_entry_leaf(iter, field, leaf_ret, s); 1023 ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags);
692 else 1024 else
693 return print_graph_entry_nested(iter, field, s, cpu); 1025 ret = print_graph_entry_nested(iter, field, s, cpu, flags);
1026
1027 if (data) {
1028 /*
1029 * If we failed to write our output, then we need to make
1030 * note of it. Because we already consumed our entry.
1031 */
1032 if (s->full) {
1033 data->failed = 1;
1034 data->cpu = cpu;
1035 } else
1036 data->failed = 0;
1037 }
694 1038
1039 return ret;
695} 1040}
696 1041
697static enum print_line_t 1042static enum print_line_t
698print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 1043print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
699 struct trace_entry *ent, struct trace_iterator *iter) 1044 struct trace_entry *ent, struct trace_iterator *iter,
1045 u32 flags)
700{ 1046{
701 unsigned long long duration = trace->rettime - trace->calltime; 1047 unsigned long long duration = trace->rettime - trace->calltime;
702 struct fgraph_data *data = iter->private; 1048 struct fgraph_data *data = iter->private;
703 pid_t pid = ent->pid; 1049 pid_t pid = ent->pid;
704 int cpu = iter->cpu; 1050 int cpu = iter->cpu;
1051 int func_match = 1;
705 int ret; 1052 int ret;
706 int i; 1053 int i;
707 1054
1055 if (check_irq_return(iter, flags, trace->depth))
1056 return TRACE_TYPE_HANDLED;
1057
708 if (data) { 1058 if (data) {
1059 struct fgraph_cpu_data *cpu_data;
709 int cpu = iter->cpu; 1060 int cpu = iter->cpu;
710 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1061
1062 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
711 1063
712 /* 1064 /*
713 * Comments display at + 1 to depth. This is the 1065 * Comments display at + 1 to depth. This is the
714 * return from a function, we now want the comments 1066 * return from a function, we now want the comments
715 * to display at the same level of the bracket. 1067 * to display at the same level of the bracket.
716 */ 1068 */
717 *depth = trace->depth - 1; 1069 cpu_data->depth = trace->depth - 1;
1070
1071 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
1072 if (cpu_data->enter_funcs[trace->depth] != trace->func)
1073 func_match = 0;
1074 cpu_data->enter_funcs[trace->depth] = 0;
1075 }
718 } 1076 }
719 1077
720 if (print_graph_prologue(iter, s, 0, 0)) 1078 if (print_graph_prologue(iter, s, 0, 0, flags))
721 return TRACE_TYPE_PARTIAL_LINE; 1079 return TRACE_TYPE_PARTIAL_LINE;
722 1080
723 /* Overhead */ 1081 /* Overhead */
724 ret = print_graph_overhead(duration, s); 1082 ret = print_graph_overhead(duration, s, flags);
725 if (!ret) 1083 if (!ret)
726 return TRACE_TYPE_PARTIAL_LINE; 1084 return TRACE_TYPE_PARTIAL_LINE;
727 1085
728 /* Duration */ 1086 /* Duration */
729 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 1087 if (flags & TRACE_GRAPH_PRINT_DURATION) {
730 ret = print_graph_duration(duration, s); 1088 ret = print_graph_duration(duration, s);
731 if (ret == TRACE_TYPE_PARTIAL_LINE) 1089 if (ret == TRACE_TYPE_PARTIAL_LINE)
732 return TRACE_TYPE_PARTIAL_LINE; 1090 return TRACE_TYPE_PARTIAL_LINE;
@@ -739,19 +1097,32 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
739 return TRACE_TYPE_PARTIAL_LINE; 1097 return TRACE_TYPE_PARTIAL_LINE;
740 } 1098 }
741 1099
742 ret = trace_seq_printf(s, "}\n"); 1100 /*
743 if (!ret) 1101 * If the return function does not have a matching entry,
744 return TRACE_TYPE_PARTIAL_LINE; 1102 * then the entry was lost. Instead of just printing
1103 * the '}' and letting the user guess what function this
1104 * belongs to, write out the function name.
1105 */
1106 if (func_match) {
1107 ret = trace_seq_printf(s, "}\n");
1108 if (!ret)
1109 return TRACE_TYPE_PARTIAL_LINE;
1110 } else {
1111 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
1112 if (!ret)
1113 return TRACE_TYPE_PARTIAL_LINE;
1114 }
745 1115
746 /* Overrun */ 1116 /* Overrun */
747 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 1117 if (flags & TRACE_GRAPH_PRINT_OVERRUN) {
748 ret = trace_seq_printf(s, " (Overruns: %lu)\n", 1118 ret = trace_seq_printf(s, " (Overruns: %lu)\n",
749 trace->overrun); 1119 trace->overrun);
750 if (!ret) 1120 if (!ret)
751 return TRACE_TYPE_PARTIAL_LINE; 1121 return TRACE_TYPE_PARTIAL_LINE;
752 } 1122 }
753 1123
754 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid); 1124 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET,
1125 cpu, pid, flags);
755 if (ret == TRACE_TYPE_PARTIAL_LINE) 1126 if (ret == TRACE_TYPE_PARTIAL_LINE)
756 return TRACE_TYPE_PARTIAL_LINE; 1127 return TRACE_TYPE_PARTIAL_LINE;
757 1128
@@ -759,8 +1130,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
759} 1130}
760 1131
761static enum print_line_t 1132static enum print_line_t
762print_graph_comment(struct trace_seq *s, struct trace_entry *ent, 1133print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
763 struct trace_iterator *iter) 1134 struct trace_iterator *iter, u32 flags)
764{ 1135{
765 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1136 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
766 struct fgraph_data *data = iter->private; 1137 struct fgraph_data *data = iter->private;
@@ -770,18 +1141,18 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
770 int i; 1141 int i;
771 1142
772 if (data) 1143 if (data)
773 depth = per_cpu_ptr(data, iter->cpu)->depth; 1144 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
774 1145
775 if (print_graph_prologue(iter, s, 0, 0)) 1146 if (print_graph_prologue(iter, s, 0, 0, flags))
776 return TRACE_TYPE_PARTIAL_LINE; 1147 return TRACE_TYPE_PARTIAL_LINE;
777 1148
778 /* No overhead */ 1149 /* No overhead */
779 ret = print_graph_overhead(-1, s); 1150 ret = print_graph_overhead(-1, s, flags);
780 if (!ret) 1151 if (!ret)
781 return TRACE_TYPE_PARTIAL_LINE; 1152 return TRACE_TYPE_PARTIAL_LINE;
782 1153
783 /* No time */ 1154 /* No time */
784 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 1155 if (flags & TRACE_GRAPH_PRINT_DURATION) {
785 ret = trace_seq_printf(s, " | "); 1156 ret = trace_seq_printf(s, " | ");
786 if (!ret) 1157 if (!ret)
787 return TRACE_TYPE_PARTIAL_LINE; 1158 return TRACE_TYPE_PARTIAL_LINE;
@@ -816,7 +1187,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
816 if (!event) 1187 if (!event)
817 return TRACE_TYPE_UNHANDLED; 1188 return TRACE_TYPE_UNHANDLED;
818 1189
819 ret = event->trace(iter, sym_flags); 1190 ret = event->funcs->trace(iter, sym_flags, event);
820 if (ret != TRACE_TYPE_HANDLED) 1191 if (ret != TRACE_TYPE_HANDLED)
821 return ret; 1192 return ret;
822 } 1193 }
@@ -836,90 +1207,253 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
836 1207
837 1208
838enum print_line_t 1209enum print_line_t
839print_graph_function(struct trace_iterator *iter) 1210__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
840{ 1211{
1212 struct ftrace_graph_ent_entry *field;
1213 struct fgraph_data *data = iter->private;
841 struct trace_entry *entry = iter->ent; 1214 struct trace_entry *entry = iter->ent;
842 struct trace_seq *s = &iter->seq; 1215 struct trace_seq *s = &iter->seq;
1216 int cpu = iter->cpu;
1217 int ret;
1218
1219 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1220 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1221 return TRACE_TYPE_HANDLED;
1222 }
1223
1224 /*
1225 * If the last output failed, there's a possibility we need
1226 * to print out the missing entry which would never go out.
1227 */
1228 if (data && data->failed) {
1229 field = &data->ent;
1230 iter->cpu = data->cpu;
1231 ret = print_graph_entry(field, s, iter, flags);
1232 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1233 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1234 ret = TRACE_TYPE_NO_CONSUME;
1235 }
1236 iter->cpu = cpu;
1237 return ret;
1238 }
843 1239
844 switch (entry->type) { 1240 switch (entry->type) {
845 case TRACE_GRAPH_ENT: { 1241 case TRACE_GRAPH_ENT: {
846 struct ftrace_graph_ent_entry *field; 1242 /*
1243 * print_graph_entry() may consume the current event,
1244 * thus @field may become invalid, so we need to save it.
1245 * sizeof(struct ftrace_graph_ent_entry) is very small,
1246 * it can be safely saved at the stack.
1247 */
1248 struct ftrace_graph_ent_entry saved;
847 trace_assign_type(field, entry); 1249 trace_assign_type(field, entry);
848 return print_graph_entry(field, s, iter); 1250 saved = *field;
1251 return print_graph_entry(&saved, s, iter, flags);
849 } 1252 }
850 case TRACE_GRAPH_RET: { 1253 case TRACE_GRAPH_RET: {
851 struct ftrace_graph_ret_entry *field; 1254 struct ftrace_graph_ret_entry *field;
852 trace_assign_type(field, entry); 1255 trace_assign_type(field, entry);
853 return print_graph_return(&field->ret, s, entry, iter); 1256 return print_graph_return(&field->ret, s, entry, iter, flags);
854 } 1257 }
1258 case TRACE_STACK:
1259 case TRACE_FN:
1260 /* dont trace stack and functions as comments */
1261 return TRACE_TYPE_UNHANDLED;
1262
855 default: 1263 default:
856 return print_graph_comment(s, entry, iter); 1264 return print_graph_comment(s, entry, iter, flags);
857 } 1265 }
858 1266
859 return TRACE_TYPE_HANDLED; 1267 return TRACE_TYPE_HANDLED;
860} 1268}
861 1269
862static void print_graph_headers(struct seq_file *s) 1270static enum print_line_t
1271print_graph_function(struct trace_iterator *iter)
1272{
1273 return __print_graph_function_flags(iter, tracer_flags.val);
1274}
1275
1276enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
1277 u32 flags)
1278{
1279 if (trace_flags & TRACE_ITER_LATENCY_FMT)
1280 flags |= TRACE_GRAPH_PRINT_DURATION;
1281 else
1282 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1283
1284 return __print_graph_function_flags(iter, flags);
1285}
1286
1287static enum print_line_t
1288print_graph_function_event(struct trace_iterator *iter, int flags,
1289 struct trace_event *event)
1290{
1291 return print_graph_function(iter);
1292}
1293
1294static void print_lat_header(struct seq_file *s, u32 flags)
1295{
1296 static const char spaces[] = " " /* 16 spaces */
1297 " " /* 4 spaces */
1298 " "; /* 17 spaces */
1299 int size = 0;
1300
1301 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1302 size += 16;
1303 if (flags & TRACE_GRAPH_PRINT_CPU)
1304 size += 4;
1305 if (flags & TRACE_GRAPH_PRINT_PROC)
1306 size += 17;
1307
1308 seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces);
1309 seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces);
1310 seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
1311 seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces);
1312 seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces);
1313 seq_printf(s, "#%.*s|||| / \n", size, spaces);
1314}
1315
1316static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
863{ 1317{
1318 int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
1319
1320 if (lat)
1321 print_lat_header(s, flags);
1322
864 /* 1st line */ 1323 /* 1st line */
865 seq_printf(s, "# "); 1324 seq_printf(s, "#");
866 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1325 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
867 seq_printf(s, " TIME "); 1326 seq_printf(s, " TIME ");
868 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1327 if (flags & TRACE_GRAPH_PRINT_CPU)
869 seq_printf(s, "CPU"); 1328 seq_printf(s, " CPU");
870 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1329 if (flags & TRACE_GRAPH_PRINT_PROC)
871 seq_printf(s, " TASK/PID "); 1330 seq_printf(s, " TASK/PID ");
872 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 1331 if (lat)
1332 seq_printf(s, "|||||");
1333 if (flags & TRACE_GRAPH_PRINT_DURATION)
873 seq_printf(s, " DURATION "); 1334 seq_printf(s, " DURATION ");
874 seq_printf(s, " FUNCTION CALLS\n"); 1335 seq_printf(s, " FUNCTION CALLS\n");
875 1336
876 /* 2nd line */ 1337 /* 2nd line */
877 seq_printf(s, "# "); 1338 seq_printf(s, "#");
878 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1339 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
879 seq_printf(s, " | "); 1340 seq_printf(s, " | ");
880 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1341 if (flags & TRACE_GRAPH_PRINT_CPU)
881 seq_printf(s, "| "); 1342 seq_printf(s, " | ");
882 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1343 if (flags & TRACE_GRAPH_PRINT_PROC)
883 seq_printf(s, " | | "); 1344 seq_printf(s, " | | ");
884 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 1345 if (lat)
1346 seq_printf(s, "|||||");
1347 if (flags & TRACE_GRAPH_PRINT_DURATION)
885 seq_printf(s, " | | "); 1348 seq_printf(s, " | | ");
886 seq_printf(s, " | | | |\n"); 1349 seq_printf(s, " | | | |\n");
887} 1350}
888 1351
889static void graph_trace_open(struct trace_iterator *iter) 1352void print_graph_headers(struct seq_file *s)
1353{
1354 print_graph_headers_flags(s, tracer_flags.val);
1355}
1356
1357void print_graph_headers_flags(struct seq_file *s, u32 flags)
1358{
1359 struct trace_iterator *iter = s->private;
1360
1361 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
1362 /* print nothing if the buffers are empty */
1363 if (trace_empty(iter))
1364 return;
1365
1366 print_trace_header(s, iter);
1367 flags |= TRACE_GRAPH_PRINT_DURATION;
1368 } else
1369 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1370
1371 __print_graph_headers_flags(s, flags);
1372}
1373
1374void graph_trace_open(struct trace_iterator *iter)
890{ 1375{
891 /* pid and depth on the last trace processed */ 1376 /* pid and depth on the last trace processed */
892 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1377 struct fgraph_data *data;
893 int cpu; 1378 int cpu;
894 1379
1380 iter->private = NULL;
1381
1382 data = kzalloc(sizeof(*data), GFP_KERNEL);
895 if (!data) 1383 if (!data)
896 pr_warning("function graph tracer: not enough memory\n"); 1384 goto out_err;
897 else 1385
898 for_each_possible_cpu(cpu) { 1386 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
899 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1387 if (!data->cpu_data)
900 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1388 goto out_err_free;
901 *pid = -1; 1389
902 *depth = 0; 1390 for_each_possible_cpu(cpu) {
903 } 1391 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1392 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1393 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1394 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
1395
1396 *pid = -1;
1397 *depth = 0;
1398 *ignore = 0;
1399 *depth_irq = -1;
1400 }
904 1401
905 iter->private = data; 1402 iter->private = data;
1403
1404 return;
1405
1406 out_err_free:
1407 kfree(data);
1408 out_err:
1409 pr_warning("function graph tracer: not enough memory\n");
1410}
1411
1412void graph_trace_close(struct trace_iterator *iter)
1413{
1414 struct fgraph_data *data = iter->private;
1415
1416 if (data) {
1417 free_percpu(data->cpu_data);
1418 kfree(data);
1419 }
906} 1420}
907 1421
908static void graph_trace_close(struct trace_iterator *iter) 1422static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
909{ 1423{
910 free_percpu(iter->private); 1424 if (bit == TRACE_GRAPH_PRINT_IRQS)
1425 ftrace_graph_skip_irqs = !set;
1426
1427 return 0;
911} 1428}
912 1429
1430static struct trace_event_functions graph_functions = {
1431 .trace = print_graph_function_event,
1432};
1433
1434static struct trace_event graph_trace_entry_event = {
1435 .type = TRACE_GRAPH_ENT,
1436 .funcs = &graph_functions,
1437};
1438
1439static struct trace_event graph_trace_ret_event = {
1440 .type = TRACE_GRAPH_RET,
1441 .funcs = &graph_functions
1442};
1443
913static struct tracer graph_trace __read_mostly = { 1444static struct tracer graph_trace __read_mostly = {
914 .name = "function_graph", 1445 .name = "function_graph",
915 .open = graph_trace_open, 1446 .open = graph_trace_open,
1447 .pipe_open = graph_trace_open,
916 .close = graph_trace_close, 1448 .close = graph_trace_close,
1449 .pipe_close = graph_trace_close,
917 .wait_pipe = poll_wait_pipe, 1450 .wait_pipe = poll_wait_pipe,
918 .init = graph_trace_init, 1451 .init = graph_trace_init,
919 .reset = graph_trace_reset, 1452 .reset = graph_trace_reset,
920 .print_line = print_graph_function, 1453 .print_line = print_graph_function,
921 .print_header = print_graph_headers, 1454 .print_header = print_graph_headers,
922 .flags = &tracer_flags, 1455 .flags = &tracer_flags,
1456 .set_flag = func_graph_set_flag,
923#ifdef CONFIG_FTRACE_SELFTEST 1457#ifdef CONFIG_FTRACE_SELFTEST
924 .selftest = trace_selftest_startup_function_graph, 1458 .selftest = trace_selftest_startup_function_graph,
925#endif 1459#endif
@@ -927,6 +1461,18 @@ static struct tracer graph_trace __read_mostly = {
927 1461
928static __init int init_graph_trace(void) 1462static __init int init_graph_trace(void)
929{ 1463{
1464 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
1465
1466 if (!register_ftrace_event(&graph_trace_entry_event)) {
1467 pr_warning("Warning: could not register graph trace events\n");
1468 return 1;
1469 }
1470
1471 if (!register_ftrace_event(&graph_trace_ret_event)) {
1472 pr_warning("Warning: could not register graph trace events\n");
1473 return 1;
1474 }
1475
930 return register_tracer(&graph_trace); 1476 return register_tracer(&graph_trace);
931} 1477}
932 1478
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
deleted file mode 100644
index ca7d7c4d0c2a..000000000000
--- a/kernel/trace/trace_hw_branches.c
+++ /dev/null
@@ -1,309 +0,0 @@
1/*
2 * h/w branch tracer for x86 based on BTS
3 *
4 * Copyright (C) 2008-2009 Intel Corporation.
5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */
7#include <linux/kallsyms.h>
8#include <linux/debugfs.h>
9#include <linux/ftrace.h>
10#include <linux/module.h>
11#include <linux/cpu.h>
12#include <linux/smp.h>
13#include <linux/fs.h>
14
15#include <asm/ds.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20
21#define BTS_BUFFER_SIZE (1 << 13)
22
23static DEFINE_PER_CPU(struct bts_tracer *, tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
25
26#define this_tracer per_cpu(tracer, smp_processor_id())
27
28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly;
30static struct trace_array *hw_branch_trace __read_mostly;
31
32
33static void bts_trace_init_cpu(int cpu)
34{
35 per_cpu(tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
37 NULL, (size_t)-1, BTS_KERNEL);
38
39 if (IS_ERR(per_cpu(tracer, cpu)))
40 per_cpu(tracer, cpu) = NULL;
41}
42
43static int bts_trace_init(struct trace_array *tr)
44{
45 int cpu;
46
47 hw_branch_trace = tr;
48 trace_hw_branches_enabled = 0;
49
50 get_online_cpus();
51 for_each_online_cpu(cpu) {
52 bts_trace_init_cpu(cpu);
53
54 if (likely(per_cpu(tracer, cpu)))
55 trace_hw_branches_enabled = 1;
56 }
57 trace_hw_branches_suspended = 0;
58 put_online_cpus();
59
60 /* If we could not enable tracing on a single cpu, we fail. */
61 return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
62}
63
64static void bts_trace_reset(struct trace_array *tr)
65{
66 int cpu;
67
68 get_online_cpus();
69 for_each_online_cpu(cpu) {
70 if (likely(per_cpu(tracer, cpu))) {
71 ds_release_bts(per_cpu(tracer, cpu));
72 per_cpu(tracer, cpu) = NULL;
73 }
74 }
75 trace_hw_branches_enabled = 0;
76 trace_hw_branches_suspended = 0;
77 put_online_cpus();
78}
79
80static void bts_trace_start(struct trace_array *tr)
81{
82 int cpu;
83
84 get_online_cpus();
85 for_each_online_cpu(cpu)
86 if (likely(per_cpu(tracer, cpu)))
87 ds_resume_bts(per_cpu(tracer, cpu));
88 trace_hw_branches_suspended = 0;
89 put_online_cpus();
90}
91
92static void bts_trace_stop(struct trace_array *tr)
93{
94 int cpu;
95
96 get_online_cpus();
97 for_each_online_cpu(cpu)
98 if (likely(per_cpu(tracer, cpu)))
99 ds_suspend_bts(per_cpu(tracer, cpu));
100 trace_hw_branches_suspended = 1;
101 put_online_cpus();
102}
103
104static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
105 unsigned long action, void *hcpu)
106{
107 int cpu = (long)hcpu;
108
109 switch (action) {
110 case CPU_ONLINE:
111 case CPU_DOWN_FAILED:
112 /* The notification is sent with interrupts enabled. */
113 if (trace_hw_branches_enabled) {
114 bts_trace_init_cpu(cpu);
115
116 if (trace_hw_branches_suspended &&
117 likely(per_cpu(tracer, cpu)))
118 ds_suspend_bts(per_cpu(tracer, cpu));
119 }
120 break;
121
122 case CPU_DOWN_PREPARE:
123 /* The notification is sent with interrupts enabled. */
124 if (likely(per_cpu(tracer, cpu))) {
125 ds_release_bts(per_cpu(tracer, cpu));
126 per_cpu(tracer, cpu) = NULL;
127 }
128 }
129
130 return NOTIFY_DONE;
131}
132
133static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
134 .notifier_call = bts_hotcpu_handler
135};
136
137static void bts_trace_print_header(struct seq_file *m)
138{
139 seq_puts(m, "# CPU# TO <- FROM\n");
140}
141
142static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
143{
144 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
145 struct trace_entry *entry = iter->ent;
146 struct trace_seq *seq = &iter->seq;
147 struct hw_branch_entry *it;
148
149 trace_assign_type(it, entry);
150
151 if (entry->type == TRACE_HW_BRANCHES) {
152 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
153 seq_print_ip_sym(seq, it->to, symflags) &&
154 trace_seq_printf(seq, "\t <- ") &&
155 seq_print_ip_sym(seq, it->from, symflags) &&
156 trace_seq_printf(seq, "\n"))
157 return TRACE_TYPE_HANDLED;
158 return TRACE_TYPE_PARTIAL_LINE;;
159 }
160 return TRACE_TYPE_UNHANDLED;
161}
162
163void trace_hw_branch(u64 from, u64 to)
164{
165 struct ftrace_event_call *call = &event_hw_branch;
166 struct trace_array *tr = hw_branch_trace;
167 struct ring_buffer_event *event;
168 struct hw_branch_entry *entry;
169 unsigned long irq1;
170 int cpu;
171
172 if (unlikely(!tr))
173 return;
174
175 if (unlikely(!trace_hw_branches_enabled))
176 return;
177
178 local_irq_save(irq1);
179 cpu = raw_smp_processor_id();
180 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
181 goto out;
182
183 event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
184 sizeof(*entry), 0, 0);
185 if (!event)
186 goto out;
187 entry = ring_buffer_event_data(event);
188 tracing_generic_entry_update(&entry->ent, 0, from);
189 entry->ent.type = TRACE_HW_BRANCHES;
190 entry->from = from;
191 entry->to = to;
192 if (!filter_check_discard(call, entry, tr->buffer, event))
193 trace_buffer_unlock_commit(tr, event, 0, 0);
194
195 out:
196 atomic_dec(&tr->data[cpu]->disabled);
197 local_irq_restore(irq1);
198}
199
200static void trace_bts_at(const struct bts_trace *trace, void *at)
201{
202 struct bts_struct bts;
203 int err = 0;
204
205 WARN_ON_ONCE(!trace->read);
206 if (!trace->read)
207 return;
208
209 err = trace->read(this_tracer, at, &bts);
210 if (err < 0)
211 return;
212
213 switch (bts.qualifier) {
214 case BTS_BRANCH:
215 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
216 break;
217 }
218}
219
220/*
221 * Collect the trace on the current cpu and write it into the ftrace buffer.
222 *
223 * pre: tracing must be suspended on the current cpu
224 */
225static void trace_bts_cpu(void *arg)
226{
227 struct trace_array *tr = (struct trace_array *)arg;
228 const struct bts_trace *trace;
229 unsigned char *at;
230
231 if (unlikely(!tr))
232 return;
233
234 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
235 return;
236
237 if (unlikely(!this_tracer))
238 return;
239
240 trace = ds_read_bts(this_tracer);
241 if (!trace)
242 return;
243
244 for (at = trace->ds.top; (void *)at < trace->ds.end;
245 at += trace->ds.size)
246 trace_bts_at(trace, at);
247
248 for (at = trace->ds.begin; (void *)at < trace->ds.top;
249 at += trace->ds.size)
250 trace_bts_at(trace, at);
251}
252
253static void trace_bts_prepare(struct trace_iterator *iter)
254{
255 int cpu;
256
257 get_online_cpus();
258 for_each_online_cpu(cpu)
259 if (likely(per_cpu(tracer, cpu)))
260 ds_suspend_bts(per_cpu(tracer, cpu));
261 /*
262 * We need to collect the trace on the respective cpu since ftrace
263 * implicitly adds the record for the current cpu.
264 * Once that is more flexible, we could collect the data from any cpu.
265 */
266 on_each_cpu(trace_bts_cpu, iter->tr, 1);
267
268 for_each_online_cpu(cpu)
269 if (likely(per_cpu(tracer, cpu)))
270 ds_resume_bts(per_cpu(tracer, cpu));
271 put_online_cpus();
272}
273
274static void trace_bts_close(struct trace_iterator *iter)
275{
276 tracing_reset_online_cpus(iter->tr);
277}
278
279void trace_hw_branch_oops(void)
280{
281 if (this_tracer) {
282 ds_suspend_bts_noirq(this_tracer);
283 trace_bts_cpu(hw_branch_trace);
284 ds_resume_bts_noirq(this_tracer);
285 }
286}
287
288struct tracer bts_tracer __read_mostly =
289{
290 .name = "hw-branch-tracer",
291 .init = bts_trace_init,
292 .reset = bts_trace_reset,
293 .print_header = bts_trace_print_header,
294 .print_line = bts_trace_print_line,
295 .start = bts_trace_start,
296 .stop = bts_trace_stop,
297 .open = trace_bts_prepare,
298 .close = trace_bts_close,
299#ifdef CONFIG_FTRACE_SELFTEST
300 .selftest = trace_selftest_startup_hw_branches,
301#endif /* CONFIG_FTRACE_SELFTEST */
302};
303
304__init static int init_bts_trace(void)
305{
306 register_hotcpu_notifier(&bts_hotcpu_notifier);
307 return register_tracer(&bts_tracer);
308}
309device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b923d13e2fad..5cf8c602b880 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -34,6 +34,9 @@ static int trace_type __read_mostly;
34 34
35static int save_lat_flag; 35static int save_lat_flag;
36 36
37static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
38static int start_irqsoff_tracer(struct trace_array *tr, int graph);
39
37#ifdef CONFIG_PREEMPT_TRACER 40#ifdef CONFIG_PREEMPT_TRACER
38static inline int 41static inline int
39preempt_trace(void) 42preempt_trace(void)
@@ -55,6 +58,23 @@ irq_trace(void)
55# define irq_trace() (0) 58# define irq_trace() (0)
56#endif 59#endif
57 60
61#define TRACE_DISPLAY_GRAPH 1
62
63static struct tracer_opt trace_opts[] = {
64#ifdef CONFIG_FUNCTION_GRAPH_TRACER
65 /* display latency trace as call graph */
66 { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
67#endif
68 { } /* Empty entry */
69};
70
71static struct tracer_flags tracer_flags = {
72 .val = 0,
73 .opts = trace_opts,
74};
75
76#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
77
58/* 78/*
59 * Sequence count - we record it when starting a measurement and 79 * Sequence count - we record it when starting a measurement and
60 * skip the latency if the sequence has changed - some other section 80 * skip the latency if the sequence has changed - some other section
@@ -67,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence;
67 87
68#ifdef CONFIG_FUNCTION_TRACER 88#ifdef CONFIG_FUNCTION_TRACER
69/* 89/*
70 * irqsoff uses its own tracer function to keep the overhead down: 90 * Prologue for the preempt and irqs off function tracers.
91 *
92 * Returns 1 if it is OK to continue, and data->disabled is
93 * incremented.
94 * 0 if the trace is to be ignored, and data->disabled
95 * is kept the same.
96 *
97 * Note, this function is also used outside this ifdef but
98 * inside the #ifdef of the function graph tracer below.
99 * This is OK, since the function graph tracer is
100 * dependent on the function tracer.
71 */ 101 */
72static void 102static int func_prolog_dec(struct trace_array *tr,
73irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) 103 struct trace_array_cpu **data,
104 unsigned long *flags)
74{ 105{
75 struct trace_array *tr = irqsoff_trace;
76 struct trace_array_cpu *data;
77 unsigned long flags;
78 long disabled; 106 long disabled;
79 int cpu; 107 int cpu;
80 108
@@ -86,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
86 */ 114 */
87 cpu = raw_smp_processor_id(); 115 cpu = raw_smp_processor_id();
88 if (likely(!per_cpu(tracing_cpu, cpu))) 116 if (likely(!per_cpu(tracing_cpu, cpu)))
89 return; 117 return 0;
90 118
91 local_save_flags(flags); 119 local_save_flags(*flags);
92 /* slight chance to get a false positive on tracing_cpu */ 120 /* slight chance to get a false positive on tracing_cpu */
93 if (!irqs_disabled_flags(flags)) 121 if (!irqs_disabled_flags(*flags))
94 return; 122 return 0;
95 123
96 data = tr->data[cpu]; 124 *data = tr->data[cpu];
97 disabled = atomic_inc_return(&data->disabled); 125 disabled = atomic_inc_return(&(*data)->disabled);
98 126
99 if (likely(disabled == 1)) 127 if (likely(disabled == 1))
100 trace_function(tr, ip, parent_ip, flags, preempt_count()); 128 return 1;
129
130 atomic_dec(&(*data)->disabled);
131
132 return 0;
133}
134
135/*
136 * irqsoff uses its own tracer function to keep the overhead down:
137 */
138static void
139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
140{
141 struct trace_array *tr = irqsoff_trace;
142 struct trace_array_cpu *data;
143 unsigned long flags;
144
145 if (!func_prolog_dec(tr, &data, &flags))
146 return;
147
148 trace_function(tr, ip, parent_ip, flags, preempt_count());
101 149
102 atomic_dec(&data->disabled); 150 atomic_dec(&data->disabled);
103} 151}
@@ -108,6 +156,132 @@ static struct ftrace_ops trace_ops __read_mostly =
108}; 156};
109#endif /* CONFIG_FUNCTION_TRACER */ 157#endif /* CONFIG_FUNCTION_TRACER */
110 158
159#ifdef CONFIG_FUNCTION_GRAPH_TRACER
160static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
161{
162 int cpu;
163
164 if (!(bit & TRACE_DISPLAY_GRAPH))
165 return -EINVAL;
166
167 if (!(is_graph() ^ set))
168 return 0;
169
170 stop_irqsoff_tracer(irqsoff_trace, !set);
171
172 for_each_possible_cpu(cpu)
173 per_cpu(tracing_cpu, cpu) = 0;
174
175 tracing_max_latency = 0;
176 tracing_reset_online_cpus(irqsoff_trace);
177
178 return start_irqsoff_tracer(irqsoff_trace, set);
179}
180
181static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
182{
183 struct trace_array *tr = irqsoff_trace;
184 struct trace_array_cpu *data;
185 unsigned long flags;
186 int ret;
187 int pc;
188
189 if (!func_prolog_dec(tr, &data, &flags))
190 return 0;
191
192 pc = preempt_count();
193 ret = __trace_graph_entry(tr, trace, flags, pc);
194 atomic_dec(&data->disabled);
195
196 return ret;
197}
198
199static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
200{
201 struct trace_array *tr = irqsoff_trace;
202 struct trace_array_cpu *data;
203 unsigned long flags;
204 int pc;
205
206 if (!func_prolog_dec(tr, &data, &flags))
207 return;
208
209 pc = preempt_count();
210 __trace_graph_return(tr, trace, flags, pc);
211 atomic_dec(&data->disabled);
212}
213
214static void irqsoff_trace_open(struct trace_iterator *iter)
215{
216 if (is_graph())
217 graph_trace_open(iter);
218
219}
220
221static void irqsoff_trace_close(struct trace_iterator *iter)
222{
223 if (iter->private)
224 graph_trace_close(iter);
225}
226
227#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
228 TRACE_GRAPH_PRINT_PROC)
229
230static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
231{
232 /*
233 * In graph mode call the graph tracer output function,
234 * otherwise go with the TRACE_FN event handler
235 */
236 if (is_graph())
237 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
238
239 return TRACE_TYPE_UNHANDLED;
240}
241
242static void irqsoff_print_header(struct seq_file *s)
243{
244 if (is_graph())
245 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
246 else
247 trace_default_header(s);
248}
249
250static void
251__trace_function(struct trace_array *tr,
252 unsigned long ip, unsigned long parent_ip,
253 unsigned long flags, int pc)
254{
255 if (is_graph())
256 trace_graph_function(tr, ip, parent_ip, flags, pc);
257 else
258 trace_function(tr, ip, parent_ip, flags, pc);
259}
260
261#else
262#define __trace_function trace_function
263
264static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
265{
266 return -EINVAL;
267}
268
269static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
270{
271 return -1;
272}
273
274static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
275{
276 return TRACE_TYPE_UNHANDLED;
277}
278
279static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
280static void irqsoff_print_header(struct seq_file *s) { }
281static void irqsoff_trace_open(struct trace_iterator *iter) { }
282static void irqsoff_trace_close(struct trace_iterator *iter) { }
283#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
284
111/* 285/*
112 * Should this new latency be reported/recorded? 286 * Should this new latency be reported/recorded?
113 */ 287 */
@@ -129,15 +303,10 @@ check_critical_timing(struct trace_array *tr,
129 unsigned long parent_ip, 303 unsigned long parent_ip,
130 int cpu) 304 int cpu)
131{ 305{
132 unsigned long latency, t0, t1;
133 cycle_t T0, T1, delta; 306 cycle_t T0, T1, delta;
134 unsigned long flags; 307 unsigned long flags;
135 int pc; 308 int pc;
136 309
137 /*
138 * usecs conversion is slow so we try to delay the conversion
139 * as long as possible:
140 */
141 T0 = data->preempt_timestamp; 310 T0 = data->preempt_timestamp;
142 T1 = ftrace_now(cpu); 311 T1 = ftrace_now(cpu);
143 delta = T1-T0; 312 delta = T1-T0;
@@ -155,20 +324,19 @@ check_critical_timing(struct trace_array *tr,
155 if (!report_latency(delta)) 324 if (!report_latency(delta))
156 goto out_unlock; 325 goto out_unlock;
157 326
158 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 327 __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
159 328 /* Skip 5 functions to get to the irq/preempt enable function */
160 latency = nsecs_to_usecs(delta); 329 __trace_stack(tr, flags, 5, pc);
161 330
162 if (data->critical_sequence != max_sequence) 331 if (data->critical_sequence != max_sequence)
163 goto out_unlock; 332 goto out_unlock;
164 333
165 tracing_max_latency = delta;
166 t0 = nsecs_to_usecs(T0);
167 t1 = nsecs_to_usecs(T1);
168
169 data->critical_end = parent_ip; 334 data->critical_end = parent_ip;
170 335
171 update_max_tr_single(tr, current, cpu); 336 if (likely(!is_tracing_stopped())) {
337 tracing_max_latency = delta;
338 update_max_tr_single(tr, current, cpu);
339 }
172 340
173 max_sequence++; 341 max_sequence++;
174 342
@@ -178,8 +346,7 @@ out_unlock:
178out: 346out:
179 data->critical_sequence = max_sequence; 347 data->critical_sequence = max_sequence;
180 data->preempt_timestamp = ftrace_now(cpu); 348 data->preempt_timestamp = ftrace_now(cpu);
181 tracing_reset(tr, cpu); 349 __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
182 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
183} 350}
184 351
185static inline void 352static inline void
@@ -208,11 +375,10 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
208 data->critical_sequence = max_sequence; 375 data->critical_sequence = max_sequence;
209 data->preempt_timestamp = ftrace_now(cpu); 376 data->preempt_timestamp = ftrace_now(cpu);
210 data->critical_start = parent_ip ? : ip; 377 data->critical_start = parent_ip ? : ip;
211 tracing_reset(tr, cpu);
212 378
213 local_save_flags(flags); 379 local_save_flags(flags);
214 380
215 trace_function(tr, ip, parent_ip, flags, preempt_count()); 381 __trace_function(tr, ip, parent_ip, flags, preempt_count());
216 382
217 per_cpu(tracing_cpu, cpu) = 1; 383 per_cpu(tracing_cpu, cpu) = 1;
218 384
@@ -246,7 +412,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
246 atomic_inc(&data->disabled); 412 atomic_inc(&data->disabled);
247 413
248 local_save_flags(flags); 414 local_save_flags(flags);
249 trace_function(tr, ip, parent_ip, flags, preempt_count()); 415 __trace_function(tr, ip, parent_ip, flags, preempt_count());
250 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 416 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
251 data->critical_start = 0; 417 data->critical_start = 0;
252 atomic_dec(&data->disabled); 418 atomic_dec(&data->disabled);
@@ -355,19 +521,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
355} 521}
356#endif /* CONFIG_PREEMPT_TRACER */ 522#endif /* CONFIG_PREEMPT_TRACER */
357 523
358static void start_irqsoff_tracer(struct trace_array *tr) 524static int start_irqsoff_tracer(struct trace_array *tr, int graph)
359{ 525{
360 register_ftrace_function(&trace_ops); 526 int ret = 0;
361 if (tracing_is_enabled()) 527
528 if (!graph)
529 ret = register_ftrace_function(&trace_ops);
530 else
531 ret = register_ftrace_graph(&irqsoff_graph_return,
532 &irqsoff_graph_entry);
533
534 if (!ret && tracing_is_enabled())
362 tracer_enabled = 1; 535 tracer_enabled = 1;
363 else 536 else
364 tracer_enabled = 0; 537 tracer_enabled = 0;
538
539 return ret;
365} 540}
366 541
367static void stop_irqsoff_tracer(struct trace_array *tr) 542static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
368{ 543{
369 tracer_enabled = 0; 544 tracer_enabled = 0;
370 unregister_ftrace_function(&trace_ops); 545
546 if (!graph)
547 unregister_ftrace_function(&trace_ops);
548 else
549 unregister_ftrace_graph();
371} 550}
372 551
373static void __irqsoff_tracer_init(struct trace_array *tr) 552static void __irqsoff_tracer_init(struct trace_array *tr)
@@ -379,12 +558,15 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
379 irqsoff_trace = tr; 558 irqsoff_trace = tr;
380 /* make sure that the tracer is visible */ 559 /* make sure that the tracer is visible */
381 smp_wmb(); 560 smp_wmb();
382 start_irqsoff_tracer(tr); 561 tracing_reset_online_cpus(tr);
562
563 if (start_irqsoff_tracer(tr, is_graph()))
564 printk(KERN_ERR "failed to start irqsoff tracer\n");
383} 565}
384 566
385static void irqsoff_tracer_reset(struct trace_array *tr) 567static void irqsoff_tracer_reset(struct trace_array *tr)
386{ 568{
387 stop_irqsoff_tracer(tr); 569 stop_irqsoff_tracer(tr, is_graph());
388 570
389 if (!save_lat_flag) 571 if (!save_lat_flag)
390 trace_flags &= ~TRACE_ITER_LATENCY_FMT; 572 trace_flags &= ~TRACE_ITER_LATENCY_FMT;
@@ -416,9 +598,16 @@ static struct tracer irqsoff_tracer __read_mostly =
416 .start = irqsoff_tracer_start, 598 .start = irqsoff_tracer_start,
417 .stop = irqsoff_tracer_stop, 599 .stop = irqsoff_tracer_stop,
418 .print_max = 1, 600 .print_max = 1,
601 .print_header = irqsoff_print_header,
602 .print_line = irqsoff_print_line,
603 .flags = &tracer_flags,
604 .set_flag = irqsoff_set_flag,
419#ifdef CONFIG_FTRACE_SELFTEST 605#ifdef CONFIG_FTRACE_SELFTEST
420 .selftest = trace_selftest_startup_irqsoff, 606 .selftest = trace_selftest_startup_irqsoff,
421#endif 607#endif
608 .open = irqsoff_trace_open,
609 .close = irqsoff_trace_close,
610 .use_max_tr = 1,
422}; 611};
423# define register_irqsoff(trace) register_tracer(&trace) 612# define register_irqsoff(trace) register_tracer(&trace)
424#else 613#else
@@ -442,9 +631,16 @@ static struct tracer preemptoff_tracer __read_mostly =
442 .start = irqsoff_tracer_start, 631 .start = irqsoff_tracer_start,
443 .stop = irqsoff_tracer_stop, 632 .stop = irqsoff_tracer_stop,
444 .print_max = 1, 633 .print_max = 1,
634 .print_header = irqsoff_print_header,
635 .print_line = irqsoff_print_line,
636 .flags = &tracer_flags,
637 .set_flag = irqsoff_set_flag,
445#ifdef CONFIG_FTRACE_SELFTEST 638#ifdef CONFIG_FTRACE_SELFTEST
446 .selftest = trace_selftest_startup_preemptoff, 639 .selftest = trace_selftest_startup_preemptoff,
447#endif 640#endif
641 .open = irqsoff_trace_open,
642 .close = irqsoff_trace_close,
643 .use_max_tr = 1,
448}; 644};
449# define register_preemptoff(trace) register_tracer(&trace) 645# define register_preemptoff(trace) register_tracer(&trace)
450#else 646#else
@@ -470,9 +666,16 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
470 .start = irqsoff_tracer_start, 666 .start = irqsoff_tracer_start,
471 .stop = irqsoff_tracer_stop, 667 .stop = irqsoff_tracer_stop,
472 .print_max = 1, 668 .print_max = 1,
669 .print_header = irqsoff_print_header,
670 .print_line = irqsoff_print_line,
671 .flags = &tracer_flags,
672 .set_flag = irqsoff_set_flag,
473#ifdef CONFIG_FTRACE_SELFTEST 673#ifdef CONFIG_FTRACE_SELFTEST
474 .selftest = trace_selftest_startup_preemptirqsoff, 674 .selftest = trace_selftest_startup_preemptirqsoff,
475#endif 675#endif
676 .open = irqsoff_trace_open,
677 .close = irqsoff_trace_close,
678 .use_max_tr = 1,
476}; 679};
477 680
478# define register_preemptirqsoff(trace) register_tracer(&trace) 681# define register_preemptirqsoff(trace) register_tracer(&trace)
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
new file mode 100644
index 000000000000..3c5c5dfea0b3
--- /dev/null
+++ b/kernel/trace/trace_kdb.c
@@ -0,0 +1,135 @@
1/*
2 * kdb helper for dumping the ftrace buffer
3 *
4 * Copyright (C) 2010 Jason Wessel <jason.wessel@windriver.com>
5 *
6 * ftrace_dump_buf based on ftrace_dump:
7 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
8 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
9 *
10 */
11#include <linux/init.h>
12#include <linux/kgdb.h>
13#include <linux/kdb.h>
14#include <linux/ftrace.h>
15
16#include "trace.h"
17#include "trace_output.h"
18
19static void ftrace_dump_buf(int skip_lines, long cpu_file)
20{
21 /* use static because iter can be a bit big for the stack */
22 static struct trace_iterator iter;
23 unsigned int old_userobj;
24 int cnt = 0, cpu;
25
26 trace_init_global_iter(&iter);
27
28 for_each_tracing_cpu(cpu) {
29 atomic_inc(&iter.tr->data[cpu]->disabled);
30 }
31
32 old_userobj = trace_flags;
33
34 /* don't look at user memory in panic mode */
35 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
36
37 kdb_printf("Dumping ftrace buffer:\n");
38
39 /* reset all but tr, trace, and overruns */
40 memset(&iter.seq, 0,
41 sizeof(struct trace_iterator) -
42 offsetof(struct trace_iterator, seq));
43 iter.iter_flags |= TRACE_FILE_LAT_FMT;
44 iter.pos = -1;
45
46 if (cpu_file == TRACE_PIPE_ALL_CPU) {
47 for_each_tracing_cpu(cpu) {
48 iter.buffer_iter[cpu] =
49 ring_buffer_read_prepare(iter.tr->buffer, cpu);
50 ring_buffer_read_start(iter.buffer_iter[cpu]);
51 tracing_iter_reset(&iter, cpu);
52 }
53 } else {
54 iter.cpu_file = cpu_file;
55 iter.buffer_iter[cpu_file] =
56 ring_buffer_read_prepare(iter.tr->buffer, cpu_file);
57 ring_buffer_read_start(iter.buffer_iter[cpu_file]);
58 tracing_iter_reset(&iter, cpu_file);
59 }
60 if (!trace_empty(&iter))
61 trace_find_next_entry_inc(&iter);
62 while (!trace_empty(&iter)) {
63 if (!cnt)
64 kdb_printf("---------------------------------\n");
65 cnt++;
66
67 if (trace_find_next_entry_inc(&iter) != NULL && !skip_lines)
68 print_trace_line(&iter);
69 if (!skip_lines)
70 trace_printk_seq(&iter.seq);
71 else
72 skip_lines--;
73 if (KDB_FLAG(CMD_INTERRUPT))
74 goto out;
75 }
76
77 if (!cnt)
78 kdb_printf(" (ftrace buffer empty)\n");
79 else
80 kdb_printf("---------------------------------\n");
81
82out:
83 trace_flags = old_userobj;
84
85 for_each_tracing_cpu(cpu) {
86 atomic_dec(&iter.tr->data[cpu]->disabled);
87 }
88
89 for_each_tracing_cpu(cpu)
90 if (iter.buffer_iter[cpu])
91 ring_buffer_read_finish(iter.buffer_iter[cpu]);
92}
93
94/*
95 * kdb_ftdump - Dump the ftrace log buffer
96 */
97static int kdb_ftdump(int argc, const char **argv)
98{
99 int skip_lines = 0;
100 long cpu_file;
101 char *cp;
102
103 if (argc > 2)
104 return KDB_ARGCOUNT;
105
106 if (argc) {
107 skip_lines = simple_strtol(argv[1], &cp, 0);
108 if (*cp)
109 skip_lines = 0;
110 }
111
112 if (argc == 2) {
113 cpu_file = simple_strtol(argv[2], &cp, 0);
114 if (*cp || cpu_file >= NR_CPUS || cpu_file < 0 ||
115 !cpu_online(cpu_file))
116 return KDB_BADINT;
117 } else {
118 cpu_file = TRACE_PIPE_ALL_CPU;
119 }
120
121 kdb_trap_printk++;
122 ftrace_dump_buf(skip_lines, cpu_file);
123 kdb_trap_printk--;
124
125 return 0;
126}
127
128static __init int kdb_ftrace_register(void)
129{
130 kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
131 "Dump ftrace log", 0, KDB_REPEAT_NONE);
132 return 0;
133}
134
135late_initcall(kdb_ftrace_register);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..2dec9bcde8b4
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1847 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <asm/bitsperlong.h>
35
36#include "trace.h"
37#include "trace_output.h"
38
39#define MAX_TRACE_ARGS 128
40#define MAX_ARGSTR_LEN 63
41#define MAX_EVENT_NAME_LEN 64
42#define MAX_STRING_SIZE PATH_MAX
43#define KPROBE_EVENT_SYSTEM "kprobes"
44
45/* Reserved field names */
46#define FIELD_STRING_IP "__probe_ip"
47#define FIELD_STRING_RETIP "__probe_ret_ip"
48#define FIELD_STRING_FUNC "__probe_func"
49
50const char *reserved_field_names[] = {
51 "common_type",
52 "common_flags",
53 "common_preempt_count",
54 "common_pid",
55 "common_tgid",
56 "common_lock_depth",
57 FIELD_STRING_IP,
58 FIELD_STRING_RETIP,
59 FIELD_STRING_FUNC,
60};
61
62/* Printing function type */
63typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
64 void *);
65#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
66#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
67
68/* Printing in basic type function template */
69#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
70static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
71 const char *name, \
72 void *data, void *ent)\
73{ \
74 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
75} \
76static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
77
78DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
80DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
82DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
83DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
84DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
85DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
86
87/* data_rloc: data relative location, compatible with u32 */
88#define make_data_rloc(len, roffs) \
89 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
90#define get_rloc_len(dl) ((u32)(dl) >> 16)
91#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
92
93static inline void *get_rloc_data(u32 *dl)
94{
95 return (u8 *)dl + get_rloc_offs(*dl);
96}
97
98/* For data_loc conversion */
99static inline void *get_loc_data(u32 *dl, void *ent)
100{
101 return (u8 *)ent + get_rloc_offs(*dl);
102}
103
104/*
105 * Convert data_rloc to data_loc:
106 * data_rloc stores the offset from data_rloc itself, but data_loc
107 * stores the offset from event entry.
108 */
109#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
110
111/* For defining macros, define string/string_size types */
112typedef u32 string;
113typedef u32 string_size;
114
115/* Print type function for string type */
116static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
117 const char *name,
118 void *data, void *ent)
119{
120 int len = *(u32 *)data >> 16;
121
122 if (!len)
123 return trace_seq_printf(s, " %s=(fault)", name);
124 else
125 return trace_seq_printf(s, " %s=\"%s\"", name,
126 (const char *)get_loc_data(data, ent));
127}
128static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
129
130/* Data fetch function type */
131typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
132
133struct fetch_param {
134 fetch_func_t fn;
135 void *data;
136};
137
138static __kprobes void call_fetch(struct fetch_param *fprm,
139 struct pt_regs *regs, void *dest)
140{
141 return fprm->fn(regs, fprm->data, dest);
142}
143
144#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
145/*
146 * Define macro for basic types - we don't need to define s* types, because
147 * we have to care only about bitwidth at recording time.
148 */
149#define DEFINE_BASIC_FETCH_FUNCS(method) \
150DEFINE_FETCH_##method(u8) \
151DEFINE_FETCH_##method(u16) \
152DEFINE_FETCH_##method(u32) \
153DEFINE_FETCH_##method(u64)
154
155#define CHECK_FETCH_FUNCS(method, fn) \
156 (((FETCH_FUNC_NAME(method, u8) == fn) || \
157 (FETCH_FUNC_NAME(method, u16) == fn) || \
158 (FETCH_FUNC_NAME(method, u32) == fn) || \
159 (FETCH_FUNC_NAME(method, u64) == fn) || \
160 (FETCH_FUNC_NAME(method, string) == fn) || \
161 (FETCH_FUNC_NAME(method, string_size) == fn)) \
162 && (fn != NULL))
163
164/* Data fetch function templates */
165#define DEFINE_FETCH_reg(type) \
166static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
167 void *offset, void *dest) \
168{ \
169 *(type *)dest = (type)regs_get_register(regs, \
170 (unsigned int)((unsigned long)offset)); \
171}
172DEFINE_BASIC_FETCH_FUNCS(reg)
173/* No string on the register */
174#define fetch_reg_string NULL
175#define fetch_reg_string_size NULL
176
177#define DEFINE_FETCH_stack(type) \
178static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
179 void *offset, void *dest) \
180{ \
181 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
182 (unsigned int)((unsigned long)offset)); \
183}
184DEFINE_BASIC_FETCH_FUNCS(stack)
185/* No string on the stack entry */
186#define fetch_stack_string NULL
187#define fetch_stack_string_size NULL
188
189#define DEFINE_FETCH_retval(type) \
190static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
191 void *dummy, void *dest) \
192{ \
193 *(type *)dest = (type)regs_return_value(regs); \
194}
195DEFINE_BASIC_FETCH_FUNCS(retval)
196/* No string on the retval */
197#define fetch_retval_string NULL
198#define fetch_retval_string_size NULL
199
200#define DEFINE_FETCH_memory(type) \
201static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
202 void *addr, void *dest) \
203{ \
204 type retval; \
205 if (probe_kernel_address(addr, retval)) \
206 *(type *)dest = 0; \
207 else \
208 *(type *)dest = retval; \
209}
210DEFINE_BASIC_FETCH_FUNCS(memory)
211/*
212 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
213 * length and relative data location.
214 */
215static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
216 void *addr, void *dest)
217{
218 long ret;
219 int maxlen = get_rloc_len(*(u32 *)dest);
220 u8 *dst = get_rloc_data(dest);
221 u8 *src = addr;
222 mm_segment_t old_fs = get_fs();
223 if (!maxlen)
224 return;
225 /*
226 * Try to get string again, since the string can be changed while
227 * probing.
228 */
229 set_fs(KERNEL_DS);
230 pagefault_disable();
231 do
232 ret = __copy_from_user_inatomic(dst++, src++, 1);
233 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
234 dst[-1] = '\0';
235 pagefault_enable();
236 set_fs(old_fs);
237
238 if (ret < 0) { /* Failed to fetch string */
239 ((u8 *)get_rloc_data(dest))[0] = '\0';
240 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
241 } else
242 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
243 get_rloc_offs(*(u32 *)dest));
244}
245/* Return the length of string -- including null terminal byte */
246static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
247 void *addr, void *dest)
248{
249 int ret, len = 0;
250 u8 c;
251 mm_segment_t old_fs = get_fs();
252
253 set_fs(KERNEL_DS);
254 pagefault_disable();
255 do {
256 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
257 len++;
258 } while (c && ret == 0 && len < MAX_STRING_SIZE);
259 pagefault_enable();
260 set_fs(old_fs);
261
262 if (ret < 0) /* Failed to check the length */
263 *(u32 *)dest = 0;
264 else
265 *(u32 *)dest = len;
266}
267
268/* Memory fetching by symbol */
269struct symbol_cache {
270 char *symbol;
271 long offset;
272 unsigned long addr;
273};
274
275static unsigned long update_symbol_cache(struct symbol_cache *sc)
276{
277 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
278 if (sc->addr)
279 sc->addr += sc->offset;
280 return sc->addr;
281}
282
283static void free_symbol_cache(struct symbol_cache *sc)
284{
285 kfree(sc->symbol);
286 kfree(sc);
287}
288
289static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
290{
291 struct symbol_cache *sc;
292
293 if (!sym || strlen(sym) == 0)
294 return NULL;
295 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
296 if (!sc)
297 return NULL;
298
299 sc->symbol = kstrdup(sym, GFP_KERNEL);
300 if (!sc->symbol) {
301 kfree(sc);
302 return NULL;
303 }
304 sc->offset = offset;
305
306 update_symbol_cache(sc);
307 return sc;
308}
309
310#define DEFINE_FETCH_symbol(type) \
311static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
312 void *data, void *dest) \
313{ \
314 struct symbol_cache *sc = data; \
315 if (sc->addr) \
316 fetch_memory_##type(regs, (void *)sc->addr, dest); \
317 else \
318 *(type *)dest = 0; \
319}
320DEFINE_BASIC_FETCH_FUNCS(symbol)
321DEFINE_FETCH_symbol(string)
322DEFINE_FETCH_symbol(string_size)
323
324/* Dereference memory access function */
325struct deref_fetch_param {
326 struct fetch_param orig;
327 long offset;
328};
329
330#define DEFINE_FETCH_deref(type) \
331static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
332 void *data, void *dest) \
333{ \
334 struct deref_fetch_param *dprm = data; \
335 unsigned long addr; \
336 call_fetch(&dprm->orig, regs, &addr); \
337 if (addr) { \
338 addr += dprm->offset; \
339 fetch_memory_##type(regs, (void *)addr, dest); \
340 } else \
341 *(type *)dest = 0; \
342}
343DEFINE_BASIC_FETCH_FUNCS(deref)
344DEFINE_FETCH_deref(string)
345DEFINE_FETCH_deref(string_size)
346
347static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
348{
349 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
350 free_deref_fetch_param(data->orig.data);
351 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
352 free_symbol_cache(data->orig.data);
353 kfree(data);
354}
355
356/* Default (unsigned long) fetch type */
357#define __DEFAULT_FETCH_TYPE(t) u##t
358#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
359#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
360#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
361
362/* Fetch types */
363enum {
364 FETCH_MTD_reg = 0,
365 FETCH_MTD_stack,
366 FETCH_MTD_retval,
367 FETCH_MTD_memory,
368 FETCH_MTD_symbol,
369 FETCH_MTD_deref,
370 FETCH_MTD_END,
371};
372
373#define ASSIGN_FETCH_FUNC(method, type) \
374 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
375
376#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
377 {.name = _name, \
378 .size = _size, \
379 .is_signed = sign, \
380 .print = PRINT_TYPE_FUNC_NAME(ptype), \
381 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
382 .fmttype = _fmttype, \
383 .fetch = { \
384ASSIGN_FETCH_FUNC(reg, ftype), \
385ASSIGN_FETCH_FUNC(stack, ftype), \
386ASSIGN_FETCH_FUNC(retval, ftype), \
387ASSIGN_FETCH_FUNC(memory, ftype), \
388ASSIGN_FETCH_FUNC(symbol, ftype), \
389ASSIGN_FETCH_FUNC(deref, ftype), \
390 } \
391 }
392
393#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
394 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
395
396#define FETCH_TYPE_STRING 0
397#define FETCH_TYPE_STRSIZE 1
398
399/* Fetch type information table */
400static const struct fetch_type {
401 const char *name; /* Name of type */
402 size_t size; /* Byte size of type */
403 int is_signed; /* Signed flag */
404 print_type_func_t print; /* Print functions */
405 const char *fmt; /* Fromat string */
406 const char *fmttype; /* Name in format file */
407 /* Fetch functions */
408 fetch_func_t fetch[FETCH_MTD_END];
409} fetch_type_table[] = {
410 /* Special types */
411 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
412 sizeof(u32), 1, "__data_loc char[]"),
413 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
414 string_size, sizeof(u32), 0, "u32"),
415 /* Basic types */
416 ASSIGN_FETCH_TYPE(u8, u8, 0),
417 ASSIGN_FETCH_TYPE(u16, u16, 0),
418 ASSIGN_FETCH_TYPE(u32, u32, 0),
419 ASSIGN_FETCH_TYPE(u64, u64, 0),
420 ASSIGN_FETCH_TYPE(s8, u8, 1),
421 ASSIGN_FETCH_TYPE(s16, u16, 1),
422 ASSIGN_FETCH_TYPE(s32, u32, 1),
423 ASSIGN_FETCH_TYPE(s64, u64, 1),
424};
425
426static const struct fetch_type *find_fetch_type(const char *type)
427{
428 int i;
429
430 if (!type)
431 type = DEFAULT_FETCH_TYPE_STR;
432
433 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
434 if (strcmp(type, fetch_type_table[i].name) == 0)
435 return &fetch_type_table[i];
436 return NULL;
437}
438
439/* Special function : only accept unsigned long */
440static __kprobes void fetch_stack_address(struct pt_regs *regs,
441 void *dummy, void *dest)
442{
443 *(unsigned long *)dest = kernel_stack_pointer(regs);
444}
445
446static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
447 fetch_func_t orig_fn)
448{
449 int i;
450
451 if (type != &fetch_type_table[FETCH_TYPE_STRING])
452 return NULL; /* Only string type needs size function */
453 for (i = 0; i < FETCH_MTD_END; i++)
454 if (type->fetch[i] == orig_fn)
455 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
456
457 WARN_ON(1); /* This should not happen */
458 return NULL;
459}
460
461/**
462 * Kprobe event core functions
463 */
464
465struct probe_arg {
466 struct fetch_param fetch;
467 struct fetch_param fetch_size;
468 unsigned int offset; /* Offset from argument entry */
469 const char *name; /* Name of this argument */
470 const char *comm; /* Command of this argument */
471 const struct fetch_type *type; /* Type of this argument */
472};
473
474/* Flags for trace_probe */
475#define TP_FLAG_TRACE 1
476#define TP_FLAG_PROFILE 2
477
478struct trace_probe {
479 struct list_head list;
480 struct kretprobe rp; /* Use rp.kp for kprobe use */
481 unsigned long nhit;
482 unsigned int flags; /* For TP_FLAG_* */
483 const char *symbol; /* symbol name */
484 struct ftrace_event_class class;
485 struct ftrace_event_call call;
486 ssize_t size; /* trace entry size */
487 unsigned int nr_args;
488 struct probe_arg args[];
489};
490
491#define SIZEOF_TRACE_PROBE(n) \
492 (offsetof(struct trace_probe, args) + \
493 (sizeof(struct probe_arg) * (n)))
494
495
496static __kprobes int probe_is_return(struct trace_probe *tp)
497{
498 return tp->rp.handler != NULL;
499}
500
501static __kprobes const char *probe_symbol(struct trace_probe *tp)
502{
503 return tp->symbol ? tp->symbol : "unknown";
504}
505
506static int register_probe_event(struct trace_probe *tp);
507static void unregister_probe_event(struct trace_probe *tp);
508
509static DEFINE_MUTEX(probe_lock);
510static LIST_HEAD(probe_list);
511
512static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
513static int kretprobe_dispatcher(struct kretprobe_instance *ri,
514 struct pt_regs *regs);
515
516/* Check the name is good for event/group/fields */
517static int is_good_name(const char *name)
518{
519 if (!isalpha(*name) && *name != '_')
520 return 0;
521 while (*++name != '\0') {
522 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
523 return 0;
524 }
525 return 1;
526}
527
528/*
529 * Allocate new trace_probe and initialize it (including kprobes).
530 */
531static struct trace_probe *alloc_trace_probe(const char *group,
532 const char *event,
533 void *addr,
534 const char *symbol,
535 unsigned long offs,
536 int nargs, int is_return)
537{
538 struct trace_probe *tp;
539 int ret = -ENOMEM;
540
541 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
542 if (!tp)
543 return ERR_PTR(ret);
544
545 if (symbol) {
546 tp->symbol = kstrdup(symbol, GFP_KERNEL);
547 if (!tp->symbol)
548 goto error;
549 tp->rp.kp.symbol_name = tp->symbol;
550 tp->rp.kp.offset = offs;
551 } else
552 tp->rp.kp.addr = addr;
553
554 if (is_return)
555 tp->rp.handler = kretprobe_dispatcher;
556 else
557 tp->rp.kp.pre_handler = kprobe_dispatcher;
558
559 if (!event || !is_good_name(event)) {
560 ret = -EINVAL;
561 goto error;
562 }
563
564 tp->call.class = &tp->class;
565 tp->call.name = kstrdup(event, GFP_KERNEL);
566 if (!tp->call.name)
567 goto error;
568
569 if (!group || !is_good_name(group)) {
570 ret = -EINVAL;
571 goto error;
572 }
573
574 tp->class.system = kstrdup(group, GFP_KERNEL);
575 if (!tp->class.system)
576 goto error;
577
578 INIT_LIST_HEAD(&tp->list);
579 return tp;
580error:
581 kfree(tp->call.name);
582 kfree(tp->symbol);
583 kfree(tp);
584 return ERR_PTR(ret);
585}
586
587static void free_probe_arg(struct probe_arg *arg)
588{
589 if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
590 free_deref_fetch_param(arg->fetch.data);
591 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
592 free_symbol_cache(arg->fetch.data);
593 kfree(arg->name);
594 kfree(arg->comm);
595}
596
597static void free_trace_probe(struct trace_probe *tp)
598{
599 int i;
600
601 for (i = 0; i < tp->nr_args; i++)
602 free_probe_arg(&tp->args[i]);
603
604 kfree(tp->call.class->system);
605 kfree(tp->call.name);
606 kfree(tp->symbol);
607 kfree(tp);
608}
609
610static struct trace_probe *find_probe_event(const char *event,
611 const char *group)
612{
613 struct trace_probe *tp;
614
615 list_for_each_entry(tp, &probe_list, list)
616 if (strcmp(tp->call.name, event) == 0 &&
617 strcmp(tp->call.class->system, group) == 0)
618 return tp;
619 return NULL;
620}
621
622/* Unregister a trace_probe and probe_event: call with locking probe_lock */
623static void unregister_trace_probe(struct trace_probe *tp)
624{
625 if (probe_is_return(tp))
626 unregister_kretprobe(&tp->rp);
627 else
628 unregister_kprobe(&tp->rp.kp);
629 list_del(&tp->list);
630 unregister_probe_event(tp);
631}
632
633/* Register a trace_probe and probe_event */
634static int register_trace_probe(struct trace_probe *tp)
635{
636 struct trace_probe *old_tp;
637 int ret;
638
639 mutex_lock(&probe_lock);
640
641 /* register as an event */
642 old_tp = find_probe_event(tp->call.name, tp->call.class->system);
643 if (old_tp) {
644 /* delete old event */
645 unregister_trace_probe(old_tp);
646 free_trace_probe(old_tp);
647 }
648 ret = register_probe_event(tp);
649 if (ret) {
650 pr_warning("Failed to register probe event(%d)\n", ret);
651 goto end;
652 }
653
654 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
655 if (probe_is_return(tp))
656 ret = register_kretprobe(&tp->rp);
657 else
658 ret = register_kprobe(&tp->rp.kp);
659
660 if (ret) {
661 pr_warning("Could not insert probe(%d)\n", ret);
662 if (ret == -EILSEQ) {
663 pr_warning("Probing address(0x%p) is not an "
664 "instruction boundary.\n",
665 tp->rp.kp.addr);
666 ret = -EINVAL;
667 }
668 unregister_probe_event(tp);
669 } else
670 list_add_tail(&tp->list, &probe_list);
671end:
672 mutex_unlock(&probe_lock);
673 return ret;
674}
675
676/* Split symbol and offset. */
677static int split_symbol_offset(char *symbol, unsigned long *offset)
678{
679 char *tmp;
680 int ret;
681
682 if (!offset)
683 return -EINVAL;
684
685 tmp = strchr(symbol, '+');
686 if (tmp) {
687 /* skip sign because strict_strtol doesn't accept '+' */
688 ret = strict_strtoul(tmp + 1, 0, offset);
689 if (ret)
690 return ret;
691 *tmp = '\0';
692 } else
693 *offset = 0;
694 return 0;
695}
696
697#define PARAM_MAX_ARGS 16
698#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
699
700static int parse_probe_vars(char *arg, const struct fetch_type *t,
701 struct fetch_param *f, int is_return)
702{
703 int ret = 0;
704 unsigned long param;
705
706 if (strcmp(arg, "retval") == 0) {
707 if (is_return)
708 f->fn = t->fetch[FETCH_MTD_retval];
709 else
710 ret = -EINVAL;
711 } else if (strncmp(arg, "stack", 5) == 0) {
712 if (arg[5] == '\0') {
713 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
714 f->fn = fetch_stack_address;
715 else
716 ret = -EINVAL;
717 } else if (isdigit(arg[5])) {
718 ret = strict_strtoul(arg + 5, 10, &param);
719 if (ret || param > PARAM_MAX_STACK)
720 ret = -EINVAL;
721 else {
722 f->fn = t->fetch[FETCH_MTD_stack];
723 f->data = (void *)param;
724 }
725 } else
726 ret = -EINVAL;
727 } else
728 ret = -EINVAL;
729 return ret;
730}
731
732/* Recursive argument parser */
733static int __parse_probe_arg(char *arg, const struct fetch_type *t,
734 struct fetch_param *f, int is_return)
735{
736 int ret = 0;
737 unsigned long param;
738 long offset;
739 char *tmp;
740
741 switch (arg[0]) {
742 case '$':
743 ret = parse_probe_vars(arg + 1, t, f, is_return);
744 break;
745 case '%': /* named register */
746 ret = regs_query_register_offset(arg + 1);
747 if (ret >= 0) {
748 f->fn = t->fetch[FETCH_MTD_reg];
749 f->data = (void *)(unsigned long)ret;
750 ret = 0;
751 }
752 break;
753 case '@': /* memory or symbol */
754 if (isdigit(arg[1])) {
755 ret = strict_strtoul(arg + 1, 0, &param);
756 if (ret)
757 break;
758 f->fn = t->fetch[FETCH_MTD_memory];
759 f->data = (void *)param;
760 } else {
761 ret = split_symbol_offset(arg + 1, &offset);
762 if (ret)
763 break;
764 f->data = alloc_symbol_cache(arg + 1, offset);
765 if (f->data)
766 f->fn = t->fetch[FETCH_MTD_symbol];
767 }
768 break;
769 case '+': /* deref memory */
770 case '-':
771 tmp = strchr(arg, '(');
772 if (!tmp)
773 break;
774 *tmp = '\0';
775 ret = strict_strtol(arg + 1, 0, &offset);
776 if (ret)
777 break;
778 if (arg[0] == '-')
779 offset = -offset;
780 arg = tmp + 1;
781 tmp = strrchr(arg, ')');
782 if (tmp) {
783 struct deref_fetch_param *dprm;
784 const struct fetch_type *t2 = find_fetch_type(NULL);
785 *tmp = '\0';
786 dprm = kzalloc(sizeof(struct deref_fetch_param),
787 GFP_KERNEL);
788 if (!dprm)
789 return -ENOMEM;
790 dprm->offset = offset;
791 ret = __parse_probe_arg(arg, t2, &dprm->orig,
792 is_return);
793 if (ret)
794 kfree(dprm);
795 else {
796 f->fn = t->fetch[FETCH_MTD_deref];
797 f->data = (void *)dprm;
798 }
799 }
800 break;
801 }
802 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
803 pr_info("%s type has no corresponding fetch method.\n",
804 t->name);
805 ret = -EINVAL;
806 }
807 return ret;
808}
809
810/* String length checking wrapper */
811static int parse_probe_arg(char *arg, struct trace_probe *tp,
812 struct probe_arg *parg, int is_return)
813{
814 const char *t;
815 int ret;
816
817 if (strlen(arg) > MAX_ARGSTR_LEN) {
818 pr_info("Argument is too long.: %s\n", arg);
819 return -ENOSPC;
820 }
821 parg->comm = kstrdup(arg, GFP_KERNEL);
822 if (!parg->comm) {
823 pr_info("Failed to allocate memory for command '%s'.\n", arg);
824 return -ENOMEM;
825 }
826 t = strchr(parg->comm, ':');
827 if (t) {
828 arg[t - parg->comm] = '\0';
829 t++;
830 }
831 parg->type = find_fetch_type(t);
832 if (!parg->type) {
833 pr_info("Unsupported type: %s\n", t);
834 return -EINVAL;
835 }
836 parg->offset = tp->size;
837 tp->size += parg->type->size;
838 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
839 if (ret >= 0) {
840 parg->fetch_size.fn = get_fetch_size_function(parg->type,
841 parg->fetch.fn);
842 parg->fetch_size.data = parg->fetch.data;
843 }
844 return ret;
845}
846
847/* Return 1 if name is reserved or already used by another argument */
848static int conflict_field_name(const char *name,
849 struct probe_arg *args, int narg)
850{
851 int i;
852 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
853 if (strcmp(reserved_field_names[i], name) == 0)
854 return 1;
855 for (i = 0; i < narg; i++)
856 if (strcmp(args[i].name, name) == 0)
857 return 1;
858 return 0;
859}
860
861static int create_trace_probe(int argc, char **argv)
862{
863 /*
864 * Argument syntax:
865 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
866 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
867 * Fetch args:
868 * $retval : fetch return value
869 * $stack : fetch stack address
870 * $stackN : fetch Nth of stack (N:0-)
871 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
872 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
873 * %REG : fetch register REG
874 * Dereferencing memory fetch:
875 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
876 * Alias name of args:
877 * NAME=FETCHARG : set NAME as alias of FETCHARG.
878 * Type of args:
879 * FETCHARG:TYPE : use TYPE instead of unsigned long.
880 */
881 struct trace_probe *tp;
882 int i, ret = 0;
883 int is_return = 0, is_delete = 0;
884 char *symbol = NULL, *event = NULL, *group = NULL;
885 char *arg;
886 unsigned long offset = 0;
887 void *addr = NULL;
888 char buf[MAX_EVENT_NAME_LEN];
889
890 /* argc must be >= 1 */
891 if (argv[0][0] == 'p')
892 is_return = 0;
893 else if (argv[0][0] == 'r')
894 is_return = 1;
895 else if (argv[0][0] == '-')
896 is_delete = 1;
897 else {
898 pr_info("Probe definition must be started with 'p', 'r' or"
899 " '-'.\n");
900 return -EINVAL;
901 }
902
903 if (argv[0][1] == ':') {
904 event = &argv[0][2];
905 if (strchr(event, '/')) {
906 group = event;
907 event = strchr(group, '/') + 1;
908 event[-1] = '\0';
909 if (strlen(group) == 0) {
910 pr_info("Group name is not specified\n");
911 return -EINVAL;
912 }
913 }
914 if (strlen(event) == 0) {
915 pr_info("Event name is not specified\n");
916 return -EINVAL;
917 }
918 }
919 if (!group)
920 group = KPROBE_EVENT_SYSTEM;
921
922 if (is_delete) {
923 if (!event) {
924 pr_info("Delete command needs an event name.\n");
925 return -EINVAL;
926 }
927 mutex_lock(&probe_lock);
928 tp = find_probe_event(event, group);
929 if (!tp) {
930 mutex_unlock(&probe_lock);
931 pr_info("Event %s/%s doesn't exist.\n", group, event);
932 return -ENOENT;
933 }
934 /* delete an event */
935 unregister_trace_probe(tp);
936 free_trace_probe(tp);
937 mutex_unlock(&probe_lock);
938 return 0;
939 }
940
941 if (argc < 2) {
942 pr_info("Probe point is not specified.\n");
943 return -EINVAL;
944 }
945 if (isdigit(argv[1][0])) {
946 if (is_return) {
947 pr_info("Return probe point must be a symbol.\n");
948 return -EINVAL;
949 }
950 /* an address specified */
951 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
952 if (ret) {
953 pr_info("Failed to parse address.\n");
954 return ret;
955 }
956 } else {
957 /* a symbol specified */
958 symbol = argv[1];
959 /* TODO: support .init module functions */
960 ret = split_symbol_offset(symbol, &offset);
961 if (ret) {
962 pr_info("Failed to parse symbol.\n");
963 return ret;
964 }
965 if (offset && is_return) {
966 pr_info("Return probe must be used without offset.\n");
967 return -EINVAL;
968 }
969 }
970 argc -= 2; argv += 2;
971
972 /* setup a probe */
973 if (!event) {
974 /* Make a new event name */
975 if (symbol)
976 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
977 is_return ? 'r' : 'p', symbol, offset);
978 else
979 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
980 is_return ? 'r' : 'p', addr);
981 event = buf;
982 }
983 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
984 is_return);
985 if (IS_ERR(tp)) {
986 pr_info("Failed to allocate trace_probe.(%d)\n",
987 (int)PTR_ERR(tp));
988 return PTR_ERR(tp);
989 }
990
991 /* parse arguments */
992 ret = 0;
993 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
994 /* Increment count for freeing args in error case */
995 tp->nr_args++;
996
997 /* Parse argument name */
998 arg = strchr(argv[i], '=');
999 if (arg) {
1000 *arg++ = '\0';
1001 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
1002 } else {
1003 arg = argv[i];
1004 /* If argument name is omitted, set "argN" */
1005 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
1006 tp->args[i].name = kstrdup(buf, GFP_KERNEL);
1007 }
1008
1009 if (!tp->args[i].name) {
1010 pr_info("Failed to allocate argument[%d] name.\n", i);
1011 ret = -ENOMEM;
1012 goto error;
1013 }
1014
1015 if (!is_good_name(tp->args[i].name)) {
1016 pr_info("Invalid argument[%d] name: %s\n",
1017 i, tp->args[i].name);
1018 ret = -EINVAL;
1019 goto error;
1020 }
1021
1022 if (conflict_field_name(tp->args[i].name, tp->args, i)) {
1023 pr_info("Argument[%d] name '%s' conflicts with "
1024 "another field.\n", i, argv[i]);
1025 ret = -EINVAL;
1026 goto error;
1027 }
1028
1029 /* Parse fetch argument */
1030 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
1031 if (ret) {
1032 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
1033 goto error;
1034 }
1035 }
1036
1037 ret = register_trace_probe(tp);
1038 if (ret)
1039 goto error;
1040 return 0;
1041
1042error:
1043 free_trace_probe(tp);
1044 return ret;
1045}
1046
1047static void cleanup_all_probes(void)
1048{
1049 struct trace_probe *tp;
1050
1051 mutex_lock(&probe_lock);
1052 /* TODO: Use batch unregistration */
1053 while (!list_empty(&probe_list)) {
1054 tp = list_entry(probe_list.next, struct trace_probe, list);
1055 unregister_trace_probe(tp);
1056 free_trace_probe(tp);
1057 }
1058 mutex_unlock(&probe_lock);
1059}
1060
1061
1062/* Probes listing interfaces */
1063static void *probes_seq_start(struct seq_file *m, loff_t *pos)
1064{
1065 mutex_lock(&probe_lock);
1066 return seq_list_start(&probe_list, *pos);
1067}
1068
1069static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
1070{
1071 return seq_list_next(v, &probe_list, pos);
1072}
1073
1074static void probes_seq_stop(struct seq_file *m, void *v)
1075{
1076 mutex_unlock(&probe_lock);
1077}
1078
1079static int probes_seq_show(struct seq_file *m, void *v)
1080{
1081 struct trace_probe *tp = v;
1082 int i;
1083
1084 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
1085 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
1086
1087 if (!tp->symbol)
1088 seq_printf(m, " 0x%p", tp->rp.kp.addr);
1089 else if (tp->rp.kp.offset)
1090 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
1091 else
1092 seq_printf(m, " %s", probe_symbol(tp));
1093
1094 for (i = 0; i < tp->nr_args; i++)
1095 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
1096 seq_printf(m, "\n");
1097
1098 return 0;
1099}
1100
1101static const struct seq_operations probes_seq_op = {
1102 .start = probes_seq_start,
1103 .next = probes_seq_next,
1104 .stop = probes_seq_stop,
1105 .show = probes_seq_show
1106};
1107
1108static int probes_open(struct inode *inode, struct file *file)
1109{
1110 if ((file->f_mode & FMODE_WRITE) &&
1111 (file->f_flags & O_TRUNC))
1112 cleanup_all_probes();
1113
1114 return seq_open(file, &probes_seq_op);
1115}
1116
1117static int command_trace_probe(const char *buf)
1118{
1119 char **argv;
1120 int argc = 0, ret = 0;
1121
1122 argv = argv_split(GFP_KERNEL, buf, &argc);
1123 if (!argv)
1124 return -ENOMEM;
1125
1126 if (argc)
1127 ret = create_trace_probe(argc, argv);
1128
1129 argv_free(argv);
1130 return ret;
1131}
1132
1133#define WRITE_BUFSIZE 128
1134
1135static ssize_t probes_write(struct file *file, const char __user *buffer,
1136 size_t count, loff_t *ppos)
1137{
1138 char *kbuf, *tmp;
1139 int ret;
1140 size_t done;
1141 size_t size;
1142
1143 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
1144 if (!kbuf)
1145 return -ENOMEM;
1146
1147 ret = done = 0;
1148 while (done < count) {
1149 size = count - done;
1150 if (size >= WRITE_BUFSIZE)
1151 size = WRITE_BUFSIZE - 1;
1152 if (copy_from_user(kbuf, buffer + done, size)) {
1153 ret = -EFAULT;
1154 goto out;
1155 }
1156 kbuf[size] = '\0';
1157 tmp = strchr(kbuf, '\n');
1158 if (tmp) {
1159 *tmp = '\0';
1160 size = tmp - kbuf + 1;
1161 } else if (done + size < count) {
1162 pr_warning("Line length is too long: "
1163 "Should be less than %d.", WRITE_BUFSIZE);
1164 ret = -EINVAL;
1165 goto out;
1166 }
1167 done += size;
1168 /* Remove comments */
1169 tmp = strchr(kbuf, '#');
1170 if (tmp)
1171 *tmp = '\0';
1172
1173 ret = command_trace_probe(kbuf);
1174 if (ret)
1175 goto out;
1176 }
1177 ret = done;
1178out:
1179 kfree(kbuf);
1180 return ret;
1181}
1182
1183static const struct file_operations kprobe_events_ops = {
1184 .owner = THIS_MODULE,
1185 .open = probes_open,
1186 .read = seq_read,
1187 .llseek = seq_lseek,
1188 .release = seq_release,
1189 .write = probes_write,
1190};
1191
1192/* Probes profiling interfaces */
1193static int probes_profile_seq_show(struct seq_file *m, void *v)
1194{
1195 struct trace_probe *tp = v;
1196
1197 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
1198 tp->rp.kp.nmissed);
1199
1200 return 0;
1201}
1202
1203static const struct seq_operations profile_seq_op = {
1204 .start = probes_seq_start,
1205 .next = probes_seq_next,
1206 .stop = probes_seq_stop,
1207 .show = probes_profile_seq_show
1208};
1209
1210static int profile_open(struct inode *inode, struct file *file)
1211{
1212 return seq_open(file, &profile_seq_op);
1213}
1214
1215static const struct file_operations kprobe_profile_ops = {
1216 .owner = THIS_MODULE,
1217 .open = profile_open,
1218 .read = seq_read,
1219 .llseek = seq_lseek,
1220 .release = seq_release,
1221};
1222
1223/* Sum up total data length for dynamic arraies (strings) */
1224static __kprobes int __get_data_size(struct trace_probe *tp,
1225 struct pt_regs *regs)
1226{
1227 int i, ret = 0;
1228 u32 len;
1229
1230 for (i = 0; i < tp->nr_args; i++)
1231 if (unlikely(tp->args[i].fetch_size.fn)) {
1232 call_fetch(&tp->args[i].fetch_size, regs, &len);
1233 ret += len;
1234 }
1235
1236 return ret;
1237}
1238
1239/* Store the value of each argument */
1240static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
1241 struct pt_regs *regs,
1242 u8 *data, int maxlen)
1243{
1244 int i;
1245 u32 end = tp->size;
1246 u32 *dl; /* Data (relative) location */
1247
1248 for (i = 0; i < tp->nr_args; i++) {
1249 if (unlikely(tp->args[i].fetch_size.fn)) {
1250 /*
1251 * First, we set the relative location and
1252 * maximum data length to *dl
1253 */
1254 dl = (u32 *)(data + tp->args[i].offset);
1255 *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
1256 /* Then try to fetch string or dynamic array data */
1257 call_fetch(&tp->args[i].fetch, regs, dl);
1258 /* Reduce maximum length */
1259 end += get_rloc_len(*dl);
1260 maxlen -= get_rloc_len(*dl);
1261 /* Trick here, convert data_rloc to data_loc */
1262 *dl = convert_rloc_to_loc(*dl,
1263 ent_size + tp->args[i].offset);
1264 } else
1265 /* Just fetching data normally */
1266 call_fetch(&tp->args[i].fetch, regs,
1267 data + tp->args[i].offset);
1268 }
1269}
1270
1271/* Kprobe handler */
1272static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1273{
1274 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1275 struct kprobe_trace_entry_head *entry;
1276 struct ring_buffer_event *event;
1277 struct ring_buffer *buffer;
1278 int size, dsize, pc;
1279 unsigned long irq_flags;
1280 struct ftrace_event_call *call = &tp->call;
1281
1282 tp->nhit++;
1283
1284 local_save_flags(irq_flags);
1285 pc = preempt_count();
1286
1287 dsize = __get_data_size(tp, regs);
1288 size = sizeof(*entry) + tp->size + dsize;
1289
1290 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1291 size, irq_flags, pc);
1292 if (!event)
1293 return;
1294
1295 entry = ring_buffer_event_data(event);
1296 entry->ip = (unsigned long)kp->addr;
1297 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1298
1299 if (!filter_current_check_discard(buffer, call, entry, event))
1300 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1301}
1302
1303/* Kretprobe handler */
1304static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1305 struct pt_regs *regs)
1306{
1307 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1308 struct kretprobe_trace_entry_head *entry;
1309 struct ring_buffer_event *event;
1310 struct ring_buffer *buffer;
1311 int size, pc, dsize;
1312 unsigned long irq_flags;
1313 struct ftrace_event_call *call = &tp->call;
1314
1315 local_save_flags(irq_flags);
1316 pc = preempt_count();
1317
1318 dsize = __get_data_size(tp, regs);
1319 size = sizeof(*entry) + tp->size + dsize;
1320
1321 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1322 size, irq_flags, pc);
1323 if (!event)
1324 return;
1325
1326 entry = ring_buffer_event_data(event);
1327 entry->func = (unsigned long)tp->rp.kp.addr;
1328 entry->ret_ip = (unsigned long)ri->ret_addr;
1329 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1330
1331 if (!filter_current_check_discard(buffer, call, entry, event))
1332 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1333}
1334
1335/* Event entry printers */
1336enum print_line_t
1337print_kprobe_event(struct trace_iterator *iter, int flags,
1338 struct trace_event *event)
1339{
1340 struct kprobe_trace_entry_head *field;
1341 struct trace_seq *s = &iter->seq;
1342 struct trace_probe *tp;
1343 u8 *data;
1344 int i;
1345
1346 field = (struct kprobe_trace_entry_head *)iter->ent;
1347 tp = container_of(event, struct trace_probe, call.event);
1348
1349 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1350 goto partial;
1351
1352 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1353 goto partial;
1354
1355 if (!trace_seq_puts(s, ")"))
1356 goto partial;
1357
1358 data = (u8 *)&field[1];
1359 for (i = 0; i < tp->nr_args; i++)
1360 if (!tp->args[i].type->print(s, tp->args[i].name,
1361 data + tp->args[i].offset, field))
1362 goto partial;
1363
1364 if (!trace_seq_puts(s, "\n"))
1365 goto partial;
1366
1367 return TRACE_TYPE_HANDLED;
1368partial:
1369 return TRACE_TYPE_PARTIAL_LINE;
1370}
1371
1372enum print_line_t
1373print_kretprobe_event(struct trace_iterator *iter, int flags,
1374 struct trace_event *event)
1375{
1376 struct kretprobe_trace_entry_head *field;
1377 struct trace_seq *s = &iter->seq;
1378 struct trace_probe *tp;
1379 u8 *data;
1380 int i;
1381
1382 field = (struct kretprobe_trace_entry_head *)iter->ent;
1383 tp = container_of(event, struct trace_probe, call.event);
1384
1385 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1386 goto partial;
1387
1388 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1389 goto partial;
1390
1391 if (!trace_seq_puts(s, " <- "))
1392 goto partial;
1393
1394 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1395 goto partial;
1396
1397 if (!trace_seq_puts(s, ")"))
1398 goto partial;
1399
1400 data = (u8 *)&field[1];
1401 for (i = 0; i < tp->nr_args; i++)
1402 if (!tp->args[i].type->print(s, tp->args[i].name,
1403 data + tp->args[i].offset, field))
1404 goto partial;
1405
1406 if (!trace_seq_puts(s, "\n"))
1407 goto partial;
1408
1409 return TRACE_TYPE_HANDLED;
1410partial:
1411 return TRACE_TYPE_PARTIAL_LINE;
1412}
1413
1414static int probe_event_enable(struct ftrace_event_call *call)
1415{
1416 struct trace_probe *tp = (struct trace_probe *)call->data;
1417
1418 tp->flags |= TP_FLAG_TRACE;
1419 if (probe_is_return(tp))
1420 return enable_kretprobe(&tp->rp);
1421 else
1422 return enable_kprobe(&tp->rp.kp);
1423}
1424
1425static void probe_event_disable(struct ftrace_event_call *call)
1426{
1427 struct trace_probe *tp = (struct trace_probe *)call->data;
1428
1429 tp->flags &= ~TP_FLAG_TRACE;
1430 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1431 if (probe_is_return(tp))
1432 disable_kretprobe(&tp->rp);
1433 else
1434 disable_kprobe(&tp->rp.kp);
1435 }
1436}
1437
1438#undef DEFINE_FIELD
1439#define DEFINE_FIELD(type, item, name, is_signed) \
1440 do { \
1441 ret = trace_define_field(event_call, #type, name, \
1442 offsetof(typeof(field), item), \
1443 sizeof(field.item), is_signed, \
1444 FILTER_OTHER); \
1445 if (ret) \
1446 return ret; \
1447 } while (0)
1448
1449static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1450{
1451 int ret, i;
1452 struct kprobe_trace_entry_head field;
1453 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1454
1455 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1456 /* Set argument names as fields */
1457 for (i = 0; i < tp->nr_args; i++) {
1458 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1459 tp->args[i].name,
1460 sizeof(field) + tp->args[i].offset,
1461 tp->args[i].type->size,
1462 tp->args[i].type->is_signed,
1463 FILTER_OTHER);
1464 if (ret)
1465 return ret;
1466 }
1467 return 0;
1468}
1469
1470static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1471{
1472 int ret, i;
1473 struct kretprobe_trace_entry_head field;
1474 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1475
1476 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1477 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1478 /* Set argument names as fields */
1479 for (i = 0; i < tp->nr_args; i++) {
1480 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1481 tp->args[i].name,
1482 sizeof(field) + tp->args[i].offset,
1483 tp->args[i].type->size,
1484 tp->args[i].type->is_signed,
1485 FILTER_OTHER);
1486 if (ret)
1487 return ret;
1488 }
1489 return 0;
1490}
1491
1492static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1493{
1494 int i;
1495 int pos = 0;
1496
1497 const char *fmt, *arg;
1498
1499 if (!probe_is_return(tp)) {
1500 fmt = "(%lx)";
1501 arg = "REC->" FIELD_STRING_IP;
1502 } else {
1503 fmt = "(%lx <- %lx)";
1504 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1505 }
1506
1507 /* When len=0, we just calculate the needed length */
1508#define LEN_OR_ZERO (len ? len - pos : 0)
1509
1510 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1511
1512 for (i = 0; i < tp->nr_args; i++) {
1513 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1514 tp->args[i].name, tp->args[i].type->fmt);
1515 }
1516
1517 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1518
1519 for (i = 0; i < tp->nr_args; i++) {
1520 if (strcmp(tp->args[i].type->name, "string") == 0)
1521 pos += snprintf(buf + pos, LEN_OR_ZERO,
1522 ", __get_str(%s)",
1523 tp->args[i].name);
1524 else
1525 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1526 tp->args[i].name);
1527 }
1528
1529#undef LEN_OR_ZERO
1530
1531 /* return the length of print_fmt */
1532 return pos;
1533}
1534
1535static int set_print_fmt(struct trace_probe *tp)
1536{
1537 int len;
1538 char *print_fmt;
1539
1540 /* First: called with 0 length to calculate the needed length */
1541 len = __set_print_fmt(tp, NULL, 0);
1542 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1543 if (!print_fmt)
1544 return -ENOMEM;
1545
1546 /* Second: actually write the @print_fmt */
1547 __set_print_fmt(tp, print_fmt, len + 1);
1548 tp->call.print_fmt = print_fmt;
1549
1550 return 0;
1551}
1552
1553#ifdef CONFIG_PERF_EVENTS
1554
1555/* Kprobe profile handler */
1556static __kprobes void kprobe_perf_func(struct kprobe *kp,
1557 struct pt_regs *regs)
1558{
1559 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1560 struct ftrace_event_call *call = &tp->call;
1561 struct kprobe_trace_entry_head *entry;
1562 struct hlist_head *head;
1563 int size, __size, dsize;
1564 int rctx;
1565
1566 dsize = __get_data_size(tp, regs);
1567 __size = sizeof(*entry) + tp->size + dsize;
1568 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1569 size -= sizeof(u32);
1570 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1571 "profile buffer not large enough"))
1572 return;
1573
1574 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1575 if (!entry)
1576 return;
1577
1578 entry->ip = (unsigned long)kp->addr;
1579 memset(&entry[1], 0, dsize);
1580 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1581
1582 head = this_cpu_ptr(call->perf_events);
1583 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1584}
1585
1586/* Kretprobe profile handler */
1587static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1588 struct pt_regs *regs)
1589{
1590 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1591 struct ftrace_event_call *call = &tp->call;
1592 struct kretprobe_trace_entry_head *entry;
1593 struct hlist_head *head;
1594 int size, __size, dsize;
1595 int rctx;
1596
1597 dsize = __get_data_size(tp, regs);
1598 __size = sizeof(*entry) + tp->size + dsize;
1599 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1600 size -= sizeof(u32);
1601 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1602 "profile buffer not large enough"))
1603 return;
1604
1605 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1606 if (!entry)
1607 return;
1608
1609 entry->func = (unsigned long)tp->rp.kp.addr;
1610 entry->ret_ip = (unsigned long)ri->ret_addr;
1611 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1612
1613 head = this_cpu_ptr(call->perf_events);
1614 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1615}
1616
1617static int probe_perf_enable(struct ftrace_event_call *call)
1618{
1619 struct trace_probe *tp = (struct trace_probe *)call->data;
1620
1621 tp->flags |= TP_FLAG_PROFILE;
1622
1623 if (probe_is_return(tp))
1624 return enable_kretprobe(&tp->rp);
1625 else
1626 return enable_kprobe(&tp->rp.kp);
1627}
1628
1629static void probe_perf_disable(struct ftrace_event_call *call)
1630{
1631 struct trace_probe *tp = (struct trace_probe *)call->data;
1632
1633 tp->flags &= ~TP_FLAG_PROFILE;
1634
1635 if (!(tp->flags & TP_FLAG_TRACE)) {
1636 if (probe_is_return(tp))
1637 disable_kretprobe(&tp->rp);
1638 else
1639 disable_kprobe(&tp->rp.kp);
1640 }
1641}
1642#endif /* CONFIG_PERF_EVENTS */
1643
1644static __kprobes
1645int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1646{
1647 switch (type) {
1648 case TRACE_REG_REGISTER:
1649 return probe_event_enable(event);
1650 case TRACE_REG_UNREGISTER:
1651 probe_event_disable(event);
1652 return 0;
1653
1654#ifdef CONFIG_PERF_EVENTS
1655 case TRACE_REG_PERF_REGISTER:
1656 return probe_perf_enable(event);
1657 case TRACE_REG_PERF_UNREGISTER:
1658 probe_perf_disable(event);
1659 return 0;
1660#endif
1661 }
1662 return 0;
1663}
1664
1665static __kprobes
1666int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1667{
1668 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1669
1670 if (tp->flags & TP_FLAG_TRACE)
1671 kprobe_trace_func(kp, regs);
1672#ifdef CONFIG_PERF_EVENTS
1673 if (tp->flags & TP_FLAG_PROFILE)
1674 kprobe_perf_func(kp, regs);
1675#endif
1676 return 0; /* We don't tweek kernel, so just return 0 */
1677}
1678
1679static __kprobes
1680int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1681{
1682 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1683
1684 if (tp->flags & TP_FLAG_TRACE)
1685 kretprobe_trace_func(ri, regs);
1686#ifdef CONFIG_PERF_EVENTS
1687 if (tp->flags & TP_FLAG_PROFILE)
1688 kretprobe_perf_func(ri, regs);
1689#endif
1690 return 0; /* We don't tweek kernel, so just return 0 */
1691}
1692
1693static struct trace_event_functions kretprobe_funcs = {
1694 .trace = print_kretprobe_event
1695};
1696
1697static struct trace_event_functions kprobe_funcs = {
1698 .trace = print_kprobe_event
1699};
1700
1701static int register_probe_event(struct trace_probe *tp)
1702{
1703 struct ftrace_event_call *call = &tp->call;
1704 int ret;
1705
1706 /* Initialize ftrace_event_call */
1707 INIT_LIST_HEAD(&call->class->fields);
1708 if (probe_is_return(tp)) {
1709 call->event.funcs = &kretprobe_funcs;
1710 call->class->define_fields = kretprobe_event_define_fields;
1711 } else {
1712 call->event.funcs = &kprobe_funcs;
1713 call->class->define_fields = kprobe_event_define_fields;
1714 }
1715 if (set_print_fmt(tp) < 0)
1716 return -ENOMEM;
1717 ret = register_ftrace_event(&call->event);
1718 if (!ret) {
1719 kfree(call->print_fmt);
1720 return -ENODEV;
1721 }
1722 call->flags = 0;
1723 call->class->reg = kprobe_register;
1724 call->data = tp;
1725 ret = trace_add_event_call(call);
1726 if (ret) {
1727 pr_info("Failed to register kprobe event: %s\n", call->name);
1728 kfree(call->print_fmt);
1729 unregister_ftrace_event(&call->event);
1730 }
1731 return ret;
1732}
1733
1734static void unregister_probe_event(struct trace_probe *tp)
1735{
1736 /* tp->event is unregistered in trace_remove_event_call() */
1737 trace_remove_event_call(&tp->call);
1738 kfree(tp->call.print_fmt);
1739}
1740
1741/* Make a debugfs interface for controling probe points */
1742static __init int init_kprobe_trace(void)
1743{
1744 struct dentry *d_tracer;
1745 struct dentry *entry;
1746
1747 d_tracer = tracing_init_dentry();
1748 if (!d_tracer)
1749 return 0;
1750
1751 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1752 NULL, &kprobe_events_ops);
1753
1754 /* Event list interface */
1755 if (!entry)
1756 pr_warning("Could not create debugfs "
1757 "'kprobe_events' entry\n");
1758
1759 /* Profile interface */
1760 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1761 NULL, &kprobe_profile_ops);
1762
1763 if (!entry)
1764 pr_warning("Could not create debugfs "
1765 "'kprobe_profile' entry\n");
1766 return 0;
1767}
1768fs_initcall(init_kprobe_trace);
1769
1770
1771#ifdef CONFIG_FTRACE_STARTUP_TEST
1772
1773static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1774 int a4, int a5, int a6)
1775{
1776 return a1 + a2 + a3 + a4 + a5 + a6;
1777}
1778
1779static __init int kprobe_trace_self_tests_init(void)
1780{
1781 int ret, warn = 0;
1782 int (*target)(int, int, int, int, int, int);
1783 struct trace_probe *tp;
1784
1785 target = kprobe_trace_selftest_target;
1786
1787 pr_info("Testing kprobe tracing: ");
1788
1789 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1790 "$stack $stack0 +0($stack)");
1791 if (WARN_ON_ONCE(ret)) {
1792 pr_warning("error on probing function entry.\n");
1793 warn++;
1794 } else {
1795 /* Enable trace point */
1796 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1797 if (WARN_ON_ONCE(tp == NULL)) {
1798 pr_warning("error on getting new probe.\n");
1799 warn++;
1800 } else
1801 probe_event_enable(&tp->call);
1802 }
1803
1804 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1805 "$retval");
1806 if (WARN_ON_ONCE(ret)) {
1807 pr_warning("error on probing function return.\n");
1808 warn++;
1809 } else {
1810 /* Enable trace point */
1811 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1812 if (WARN_ON_ONCE(tp == NULL)) {
1813 pr_warning("error on getting new probe.\n");
1814 warn++;
1815 } else
1816 probe_event_enable(&tp->call);
1817 }
1818
1819 if (warn)
1820 goto end;
1821
1822 ret = target(1, 2, 3, 4, 5, 6);
1823
1824 ret = command_trace_probe("-:testprobe");
1825 if (WARN_ON_ONCE(ret)) {
1826 pr_warning("error on deleting a probe.\n");
1827 warn++;
1828 }
1829
1830 ret = command_trace_probe("-:testprobe2");
1831 if (WARN_ON_ONCE(ret)) {
1832 pr_warning("error on deleting a probe.\n");
1833 warn++;
1834 }
1835
1836end:
1837 cleanup_all_probes();
1838 if (warn)
1839 pr_cont("NG: Some tests are failed. Please check them.\n");
1840 else
1841 pr_cont("OK\n");
1842 return 0;
1843}
1844
1845late_initcall(kprobe_trace_self_tests_init);
1846
1847#endif
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index d53b45ed0806..017fa376505d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/slab.h>
12#include <linux/time.h> 13#include <linux/time.h>
13 14
14#include <asm/atomic.h> 15#include <asm/atomic.h>
@@ -307,11 +308,13 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
307 struct trace_array_cpu *data, 308 struct trace_array_cpu *data,
308 struct mmiotrace_rw *rw) 309 struct mmiotrace_rw *rw)
309{ 310{
311 struct ftrace_event_call *call = &event_mmiotrace_rw;
312 struct ring_buffer *buffer = tr->buffer;
310 struct ring_buffer_event *event; 313 struct ring_buffer_event *event;
311 struct trace_mmiotrace_rw *entry; 314 struct trace_mmiotrace_rw *entry;
312 int pc = preempt_count(); 315 int pc = preempt_count();
313 316
314 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW, 317 event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW,
315 sizeof(*entry), 0, pc); 318 sizeof(*entry), 0, pc);
316 if (!event) { 319 if (!event) {
317 atomic_inc(&dropped_count); 320 atomic_inc(&dropped_count);
@@ -319,7 +322,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
319 } 322 }
320 entry = ring_buffer_event_data(event); 323 entry = ring_buffer_event_data(event);
321 entry->rw = *rw; 324 entry->rw = *rw;
322 trace_buffer_unlock_commit(tr, event, 0, pc); 325
326 if (!filter_check_discard(call, entry, buffer, event))
327 trace_buffer_unlock_commit(buffer, event, 0, pc);
323} 328}
324 329
325void mmio_trace_rw(struct mmiotrace_rw *rw) 330void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -333,11 +338,13 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
333 struct trace_array_cpu *data, 338 struct trace_array_cpu *data,
334 struct mmiotrace_map *map) 339 struct mmiotrace_map *map)
335{ 340{
341 struct ftrace_event_call *call = &event_mmiotrace_map;
342 struct ring_buffer *buffer = tr->buffer;
336 struct ring_buffer_event *event; 343 struct ring_buffer_event *event;
337 struct trace_mmiotrace_map *entry; 344 struct trace_mmiotrace_map *entry;
338 int pc = preempt_count(); 345 int pc = preempt_count();
339 346
340 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP, 347 event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP,
341 sizeof(*entry), 0, pc); 348 sizeof(*entry), 0, pc);
342 if (!event) { 349 if (!event) {
343 atomic_inc(&dropped_count); 350 atomic_inc(&dropped_count);
@@ -345,7 +352,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
345 } 352 }
346 entry = ring_buffer_event_data(event); 353 entry = ring_buffer_event_data(event);
347 entry->map = *map; 354 entry->map = *map;
348 trace_buffer_unlock_commit(tr, event, 0, pc); 355
356 if (!filter_check_discard(call, entry, buffer, event))
357 trace_buffer_unlock_commit(buffer, event, 0, pc);
349} 358}
350 359
351void mmio_trace_mapping(struct mmiotrace_map *map) 360void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e3..02272baa2206 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -16,21 +16,25 @@
16 16
17DECLARE_RWSEM(trace_event_mutex); 17DECLARE_RWSEM(trace_event_mutex);
18 18
19DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
20EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
21
22static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; 19static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 20
24static int next_event_type = __TRACE_LAST_TYPE + 1; 21static int next_event_type = __TRACE_LAST_TYPE + 1;
25 22
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 23int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 24{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 25 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
26 int ret;
29 27
30 s->buffer[len] = 0; 28 ret = seq_write(m, s->buffer, len);
31 seq_puts(m, s->buffer);
32 29
33 trace_seq_init(s); 30 /*
31 * Only reset this buffer if we successfully wrote to the
32 * seq_file buffer.
33 */
34 if (!ret)
35 trace_seq_init(s);
36
37 return ret;
34} 38}
35 39
36enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 40enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -70,6 +74,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
70 * @s: trace sequence descriptor 74 * @s: trace sequence descriptor
71 * @fmt: printf format string 75 * @fmt: printf format string
72 * 76 *
77 * It returns 0 if the trace oversizes the buffer's free
78 * space, 1 otherwise.
79 *
73 * The tracer may use either sequence operations or its own 80 * The tracer may use either sequence operations or its own
74 * copy to user routines. To simplify formating of a trace 81 * copy to user routines. To simplify formating of a trace
75 * trace_seq_printf is used to store strings into a special 82 * trace_seq_printf is used to store strings into a special
@@ -83,7 +90,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
83 va_list ap; 90 va_list ap;
84 int ret; 91 int ret;
85 92
86 if (!len) 93 if (s->full || !len)
87 return 0; 94 return 0;
88 95
89 va_start(ap, fmt); 96 va_start(ap, fmt);
@@ -91,12 +98,14 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
91 va_end(ap); 98 va_end(ap);
92 99
93 /* If we can't write it all, don't bother writing anything */ 100 /* If we can't write it all, don't bother writing anything */
94 if (ret >= len) 101 if (ret >= len) {
102 s->full = 1;
95 return 0; 103 return 0;
104 }
96 105
97 s->len += ret; 106 s->len += ret;
98 107
99 return len; 108 return 1;
100} 109}
101EXPORT_SYMBOL_GPL(trace_seq_printf); 110EXPORT_SYMBOL_GPL(trace_seq_printf);
102 111
@@ -117,14 +126,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
117 int len = (PAGE_SIZE - 1) - s->len; 126 int len = (PAGE_SIZE - 1) - s->len;
118 int ret; 127 int ret;
119 128
120 if (!len) 129 if (s->full || !len)
121 return 0; 130 return 0;
122 131
123 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 132 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
124 133
125 /* If we can't write it all, don't bother writing anything */ 134 /* If we can't write it all, don't bother writing anything */
126 if (ret >= len) 135 if (ret >= len) {
136 s->full = 1;
127 return 0; 137 return 0;
138 }
128 139
129 s->len += ret; 140 s->len += ret;
130 141
@@ -137,14 +148,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
137 int len = (PAGE_SIZE - 1) - s->len; 148 int len = (PAGE_SIZE - 1) - s->len;
138 int ret; 149 int ret;
139 150
140 if (!len) 151 if (s->full || !len)
141 return 0; 152 return 0;
142 153
143 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 154 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
144 155
145 /* If we can't write it all, don't bother writing anything */ 156 /* If we can't write it all, don't bother writing anything */
146 if (ret >= len) 157 if (ret >= len) {
158 s->full = 1;
147 return 0; 159 return 0;
160 }
148 161
149 s->len += ret; 162 s->len += ret;
150 163
@@ -165,9 +178,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
165{ 178{
166 int len = strlen(str); 179 int len = strlen(str);
167 180
168 if (len > ((PAGE_SIZE - 1) - s->len)) 181 if (s->full)
169 return 0; 182 return 0;
170 183
184 if (len > ((PAGE_SIZE - 1) - s->len)) {
185 s->full = 1;
186 return 0;
187 }
188
171 memcpy(s->buffer + s->len, str, len); 189 memcpy(s->buffer + s->len, str, len);
172 s->len += len; 190 s->len += len;
173 191
@@ -176,19 +194,30 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
176 194
177int trace_seq_putc(struct trace_seq *s, unsigned char c) 195int trace_seq_putc(struct trace_seq *s, unsigned char c)
178{ 196{
179 if (s->len >= (PAGE_SIZE - 1)) 197 if (s->full)
198 return 0;
199
200 if (s->len >= (PAGE_SIZE - 1)) {
201 s->full = 1;
180 return 0; 202 return 0;
203 }
181 204
182 s->buffer[s->len++] = c; 205 s->buffer[s->len++] = c;
183 206
184 return 1; 207 return 1;
185} 208}
209EXPORT_SYMBOL(trace_seq_putc);
186 210
187int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 211int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
188{ 212{
189 if (len > ((PAGE_SIZE - 1) - s->len)) 213 if (s->full)
190 return 0; 214 return 0;
191 215
216 if (len > ((PAGE_SIZE - 1) - s->len)) {
217 s->full = 1;
218 return 0;
219 }
220
192 memcpy(s->buffer + s->len, mem, len); 221 memcpy(s->buffer + s->len, mem, len);
193 s->len += len; 222 s->len += len;
194 223
@@ -201,6 +230,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
201 const unsigned char *data = mem; 230 const unsigned char *data = mem;
202 int i, j; 231 int i, j;
203 232
233 if (s->full)
234 return 0;
235
204#ifdef __BIG_ENDIAN 236#ifdef __BIG_ENDIAN
205 for (i = 0, j = 0; i < len; i++) { 237 for (i = 0, j = 0; i < len; i++) {
206#else 238#else
@@ -218,9 +250,14 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
218{ 250{
219 void *ret; 251 void *ret;
220 252
221 if (len > ((PAGE_SIZE - 1) - s->len)) 253 if (s->full)
222 return NULL; 254 return NULL;
223 255
256 if (len > ((PAGE_SIZE - 1) - s->len)) {
257 s->full = 1;
258 return NULL;
259 }
260
224 ret = s->buffer + s->len; 261 ret = s->buffer + s->len;
225 s->len += len; 262 s->len += len;
226 263
@@ -231,8 +268,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
231{ 268{
232 unsigned char *p; 269 unsigned char *p;
233 270
234 if (s->len >= (PAGE_SIZE - 1)) 271 if (s->full)
272 return 0;
273
274 if (s->len >= (PAGE_SIZE - 1)) {
275 s->full = 1;
235 return 0; 276 return 0;
277 }
278
236 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 279 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
237 if (!IS_ERR(p)) { 280 if (!IS_ERR(p)) {
238 p = mangle_path(s->buffer + s->len, p, "\n"); 281 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -245,6 +288,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
245 return 1; 288 return 1;
246 } 289 }
247 290
291 s->full = 1;
248 return 0; 292 return 0;
249} 293}
250 294
@@ -309,6 +353,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
309} 353}
310EXPORT_SYMBOL(ftrace_print_symbols_seq); 354EXPORT_SYMBOL(ftrace_print_symbols_seq);
311 355
356const char *
357ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
358{
359 int i;
360 const char *ret = p->buffer + p->len;
361
362 for (i = 0; i < buf_len; i++)
363 trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
364
365 trace_seq_putc(p, 0);
366
367 return ret;
368}
369EXPORT_SYMBOL(ftrace_print_hex_seq);
370
312#ifdef CONFIG_KRETPROBES 371#ifdef CONFIG_KRETPROBES
313static inline const char *kretprobed(const char *name) 372static inline const char *kretprobed(const char *name)
314{ 373{
@@ -371,6 +430,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
371 unsigned long vmstart = 0; 430 unsigned long vmstart = 0;
372 int ret = 1; 431 int ret = 1;
373 432
433 if (s->full)
434 return 0;
435
374 if (mm) { 436 if (mm) {
375 const struct vm_area_struct *vma; 437 const struct vm_area_struct *vma;
376 438
@@ -408,7 +470,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
408 * since individual threads might have already quit! 470 * since individual threads might have already quit!
409 */ 471 */
410 rcu_read_lock(); 472 rcu_read_lock();
411 task = find_task_by_vpid(entry->ent.tgid); 473 task = find_task_by_vpid(entry->tgid);
412 if (task) 474 if (task)
413 mm = get_task_mm(task); 475 mm = get_task_mm(task);
414 rcu_read_unlock(); 476 rcu_read_unlock();
@@ -461,18 +523,23 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
461 return ret; 523 return ret;
462} 524}
463 525
464static int 526/**
465lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) 527 * trace_print_lat_fmt - print the irq, preempt and lockdep fields
528 * @s: trace seq struct to write to
529 * @entry: The trace entry field from the ring buffer
530 *
531 * Prints the generic fields of irqs off, in hard or softirq, preempt
532 * count and lock depth.
533 */
534int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
466{ 535{
467 int hardirq, softirq; 536 int hardirq, softirq;
468 char comm[TASK_COMM_LEN]; 537 int ret;
469 538
470 trace_find_cmdline(entry->pid, comm);
471 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 539 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
472 softirq = entry->flags & TRACE_FLAG_SOFTIRQ; 540 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
473 541
474 if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c", 542 if (!trace_seq_printf(s, "%c%c%c",
475 comm, entry->pid, cpu,
476 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : 543 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
477 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 544 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
478 'X' : '.', 545 'X' : '.',
@@ -483,8 +550,31 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
483 return 0; 550 return 0;
484 551
485 if (entry->preempt_count) 552 if (entry->preempt_count)
486 return trace_seq_printf(s, "%x", entry->preempt_count); 553 ret = trace_seq_printf(s, "%x", entry->preempt_count);
487 return trace_seq_puts(s, "."); 554 else
555 ret = trace_seq_putc(s, '.');
556
557 if (!ret)
558 return 0;
559
560 if (entry->lock_depth < 0)
561 return trace_seq_putc(s, '.');
562
563 return trace_seq_printf(s, "%d", entry->lock_depth);
564}
565
566static int
567lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
568{
569 char comm[TASK_COMM_LEN];
570
571 trace_find_cmdline(entry->pid, comm);
572
573 if (!trace_seq_printf(s, "%8.8s-%-5d %3d",
574 comm, entry->pid, cpu))
575 return 0;
576
577 return trace_print_lat_fmt(s, entry);
488} 578}
489 579
490static unsigned long preempt_mark_thresh = 100; 580static unsigned long preempt_mark_thresh = 100;
@@ -649,6 +739,9 @@ int register_ftrace_event(struct trace_event *event)
649 if (WARN_ON(!event)) 739 if (WARN_ON(!event))
650 goto out; 740 goto out;
651 741
742 if (WARN_ON(!event->funcs))
743 goto out;
744
652 INIT_LIST_HEAD(&event->list); 745 INIT_LIST_HEAD(&event->list);
653 746
654 if (!event->type) { 747 if (!event->type) {
@@ -681,14 +774,14 @@ int register_ftrace_event(struct trace_event *event)
681 goto out; 774 goto out;
682 } 775 }
683 776
684 if (event->trace == NULL) 777 if (event->funcs->trace == NULL)
685 event->trace = trace_nop_print; 778 event->funcs->trace = trace_nop_print;
686 if (event->raw == NULL) 779 if (event->funcs->raw == NULL)
687 event->raw = trace_nop_print; 780 event->funcs->raw = trace_nop_print;
688 if (event->hex == NULL) 781 if (event->funcs->hex == NULL)
689 event->hex = trace_nop_print; 782 event->funcs->hex = trace_nop_print;
690 if (event->binary == NULL) 783 if (event->funcs->binary == NULL)
691 event->binary = trace_nop_print; 784 event->funcs->binary = trace_nop_print;
692 785
693 key = event->type & (EVENT_HASHSIZE - 1); 786 key = event->type & (EVENT_HASHSIZE - 1);
694 787
@@ -730,13 +823,15 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event);
730 * Standard events 823 * Standard events
731 */ 824 */
732 825
733enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags) 826enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
827 struct trace_event *event)
734{ 828{
735 return TRACE_TYPE_HANDLED; 829 return TRACE_TYPE_HANDLED;
736} 830}
737 831
738/* TRACE_FN */ 832/* TRACE_FN */
739static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) 833static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
834 struct trace_event *event)
740{ 835{
741 struct ftrace_entry *field; 836 struct ftrace_entry *field;
742 struct trace_seq *s = &iter->seq; 837 struct trace_seq *s = &iter->seq;
@@ -763,7 +858,8 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
763 return TRACE_TYPE_PARTIAL_LINE; 858 return TRACE_TYPE_PARTIAL_LINE;
764} 859}
765 860
766static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags) 861static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags,
862 struct trace_event *event)
767{ 863{
768 struct ftrace_entry *field; 864 struct ftrace_entry *field;
769 865
@@ -777,7 +873,8 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
777 return TRACE_TYPE_HANDLED; 873 return TRACE_TYPE_HANDLED;
778} 874}
779 875
780static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags) 876static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags,
877 struct trace_event *event)
781{ 878{
782 struct ftrace_entry *field; 879 struct ftrace_entry *field;
783 struct trace_seq *s = &iter->seq; 880 struct trace_seq *s = &iter->seq;
@@ -790,7 +887,8 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
790 return TRACE_TYPE_HANDLED; 887 return TRACE_TYPE_HANDLED;
791} 888}
792 889
793static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) 890static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags,
891 struct trace_event *event)
794{ 892{
795 struct ftrace_entry *field; 893 struct ftrace_entry *field;
796 struct trace_seq *s = &iter->seq; 894 struct trace_seq *s = &iter->seq;
@@ -803,14 +901,18 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
803 return TRACE_TYPE_HANDLED; 901 return TRACE_TYPE_HANDLED;
804} 902}
805 903
806static struct trace_event trace_fn_event = { 904static struct trace_event_functions trace_fn_funcs = {
807 .type = TRACE_FN,
808 .trace = trace_fn_trace, 905 .trace = trace_fn_trace,
809 .raw = trace_fn_raw, 906 .raw = trace_fn_raw,
810 .hex = trace_fn_hex, 907 .hex = trace_fn_hex,
811 .binary = trace_fn_bin, 908 .binary = trace_fn_bin,
812}; 909};
813 910
911static struct trace_event trace_fn_event = {
912 .type = TRACE_FN,
913 .funcs = &trace_fn_funcs,
914};
915
814/* TRACE_CTX an TRACE_WAKE */ 916/* TRACE_CTX an TRACE_WAKE */
815static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, 917static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
816 char *delim) 918 char *delim)
@@ -839,13 +941,14 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
839 return TRACE_TYPE_HANDLED; 941 return TRACE_TYPE_HANDLED;
840} 942}
841 943
842static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags) 944static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags,
945 struct trace_event *event)
843{ 946{
844 return trace_ctxwake_print(iter, "==>"); 947 return trace_ctxwake_print(iter, "==>");
845} 948}
846 949
847static enum print_line_t trace_wake_print(struct trace_iterator *iter, 950static enum print_line_t trace_wake_print(struct trace_iterator *iter,
848 int flags) 951 int flags, struct trace_event *event)
849{ 952{
850 return trace_ctxwake_print(iter, " +"); 953 return trace_ctxwake_print(iter, " +");
851} 954}
@@ -858,7 +961,7 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
858 trace_assign_type(field, iter->ent); 961 trace_assign_type(field, iter->ent);
859 962
860 if (!S) 963 if (!S)
861 task_state_char(field->prev_state); 964 S = task_state_char(field->prev_state);
862 T = task_state_char(field->next_state); 965 T = task_state_char(field->next_state);
863 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", 966 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
864 field->prev_pid, 967 field->prev_pid,
@@ -873,12 +976,14 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
873 return TRACE_TYPE_HANDLED; 976 return TRACE_TYPE_HANDLED;
874} 977}
875 978
876static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags) 979static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags,
980 struct trace_event *event)
877{ 981{
878 return trace_ctxwake_raw(iter, 0); 982 return trace_ctxwake_raw(iter, 0);
879} 983}
880 984
881static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags) 985static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags,
986 struct trace_event *event)
882{ 987{
883 return trace_ctxwake_raw(iter, '+'); 988 return trace_ctxwake_raw(iter, '+');
884} 989}
@@ -893,7 +998,7 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
893 trace_assign_type(field, iter->ent); 998 trace_assign_type(field, iter->ent);
894 999
895 if (!S) 1000 if (!S)
896 task_state_char(field->prev_state); 1001 S = task_state_char(field->prev_state);
897 T = task_state_char(field->next_state); 1002 T = task_state_char(field->next_state);
898 1003
899 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); 1004 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
@@ -907,18 +1012,20 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
907 return TRACE_TYPE_HANDLED; 1012 return TRACE_TYPE_HANDLED;
908} 1013}
909 1014
910static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags) 1015static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags,
1016 struct trace_event *event)
911{ 1017{
912 return trace_ctxwake_hex(iter, 0); 1018 return trace_ctxwake_hex(iter, 0);
913} 1019}
914 1020
915static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags) 1021static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags,
1022 struct trace_event *event)
916{ 1023{
917 return trace_ctxwake_hex(iter, '+'); 1024 return trace_ctxwake_hex(iter, '+');
918} 1025}
919 1026
920static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, 1027static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
921 int flags) 1028 int flags, struct trace_event *event)
922{ 1029{
923 struct ctx_switch_entry *field; 1030 struct ctx_switch_entry *field;
924 struct trace_seq *s = &iter->seq; 1031 struct trace_seq *s = &iter->seq;
@@ -935,81 +1042,34 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
935 return TRACE_TYPE_HANDLED; 1042 return TRACE_TYPE_HANDLED;
936} 1043}
937 1044
938static struct trace_event trace_ctx_event = { 1045static struct trace_event_functions trace_ctx_funcs = {
939 .type = TRACE_CTX,
940 .trace = trace_ctx_print, 1046 .trace = trace_ctx_print,
941 .raw = trace_ctx_raw, 1047 .raw = trace_ctx_raw,
942 .hex = trace_ctx_hex, 1048 .hex = trace_ctx_hex,
943 .binary = trace_ctxwake_bin, 1049 .binary = trace_ctxwake_bin,
944}; 1050};
945 1051
946static struct trace_event trace_wake_event = { 1052static struct trace_event trace_ctx_event = {
947 .type = TRACE_WAKE, 1053 .type = TRACE_CTX,
1054 .funcs = &trace_ctx_funcs,
1055};
1056
1057static struct trace_event_functions trace_wake_funcs = {
948 .trace = trace_wake_print, 1058 .trace = trace_wake_print,
949 .raw = trace_wake_raw, 1059 .raw = trace_wake_raw,
950 .hex = trace_wake_hex, 1060 .hex = trace_wake_hex,
951 .binary = trace_ctxwake_bin, 1061 .binary = trace_ctxwake_bin,
952}; 1062};
953 1063
954/* TRACE_SPECIAL */ 1064static struct trace_event trace_wake_event = {
955static enum print_line_t trace_special_print(struct trace_iterator *iter, 1065 .type = TRACE_WAKE,
956 int flags) 1066 .funcs = &trace_wake_funcs,
957{
958 struct special_entry *field;
959
960 trace_assign_type(field, iter->ent);
961
962 if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
963 field->arg1,
964 field->arg2,
965 field->arg3))
966 return TRACE_TYPE_PARTIAL_LINE;
967
968 return TRACE_TYPE_HANDLED;
969}
970
971static enum print_line_t trace_special_hex(struct trace_iterator *iter,
972 int flags)
973{
974 struct special_entry *field;
975 struct trace_seq *s = &iter->seq;
976
977 trace_assign_type(field, iter->ent);
978
979 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
980 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
981 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
982
983 return TRACE_TYPE_HANDLED;
984}
985
986static enum print_line_t trace_special_bin(struct trace_iterator *iter,
987 int flags)
988{
989 struct special_entry *field;
990 struct trace_seq *s = &iter->seq;
991
992 trace_assign_type(field, iter->ent);
993
994 SEQ_PUT_FIELD_RET(s, field->arg1);
995 SEQ_PUT_FIELD_RET(s, field->arg2);
996 SEQ_PUT_FIELD_RET(s, field->arg3);
997
998 return TRACE_TYPE_HANDLED;
999}
1000
1001static struct trace_event trace_special_event = {
1002 .type = TRACE_SPECIAL,
1003 .trace = trace_special_print,
1004 .raw = trace_special_print,
1005 .hex = trace_special_hex,
1006 .binary = trace_special_bin,
1007}; 1067};
1008 1068
1009/* TRACE_STACK */ 1069/* TRACE_STACK */
1010 1070
1011static enum print_line_t trace_stack_print(struct trace_iterator *iter, 1071static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1012 int flags) 1072 int flags, struct trace_event *event)
1013{ 1073{
1014 struct stack_entry *field; 1074 struct stack_entry *field;
1015 struct trace_seq *s = &iter->seq; 1075 struct trace_seq *s = &iter->seq;
@@ -1037,17 +1097,18 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1037 return TRACE_TYPE_PARTIAL_LINE; 1097 return TRACE_TYPE_PARTIAL_LINE;
1038} 1098}
1039 1099
1100static struct trace_event_functions trace_stack_funcs = {
1101 .trace = trace_stack_print,
1102};
1103
1040static struct trace_event trace_stack_event = { 1104static struct trace_event trace_stack_event = {
1041 .type = TRACE_STACK, 1105 .type = TRACE_STACK,
1042 .trace = trace_stack_print, 1106 .funcs = &trace_stack_funcs,
1043 .raw = trace_special_print,
1044 .hex = trace_special_hex,
1045 .binary = trace_special_bin,
1046}; 1107};
1047 1108
1048/* TRACE_USER_STACK */ 1109/* TRACE_USER_STACK */
1049static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, 1110static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1050 int flags) 1111 int flags, struct trace_event *event)
1051{ 1112{
1052 struct userstack_entry *field; 1113 struct userstack_entry *field;
1053 struct trace_seq *s = &iter->seq; 1114 struct trace_seq *s = &iter->seq;
@@ -1066,17 +1127,19 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1066 return TRACE_TYPE_PARTIAL_LINE; 1127 return TRACE_TYPE_PARTIAL_LINE;
1067} 1128}
1068 1129
1130static struct trace_event_functions trace_user_stack_funcs = {
1131 .trace = trace_user_stack_print,
1132};
1133
1069static struct trace_event trace_user_stack_event = { 1134static struct trace_event trace_user_stack_event = {
1070 .type = TRACE_USER_STACK, 1135 .type = TRACE_USER_STACK,
1071 .trace = trace_user_stack_print, 1136 .funcs = &trace_user_stack_funcs,
1072 .raw = trace_special_print,
1073 .hex = trace_special_hex,
1074 .binary = trace_special_bin,
1075}; 1137};
1076 1138
1077/* TRACE_BPRINT */ 1139/* TRACE_BPRINT */
1078static enum print_line_t 1140static enum print_line_t
1079trace_bprint_print(struct trace_iterator *iter, int flags) 1141trace_bprint_print(struct trace_iterator *iter, int flags,
1142 struct trace_event *event)
1080{ 1143{
1081 struct trace_entry *entry = iter->ent; 1144 struct trace_entry *entry = iter->ent;
1082 struct trace_seq *s = &iter->seq; 1145 struct trace_seq *s = &iter->seq;
@@ -1101,7 +1164,8 @@ trace_bprint_print(struct trace_iterator *iter, int flags)
1101 1164
1102 1165
1103static enum print_line_t 1166static enum print_line_t
1104trace_bprint_raw(struct trace_iterator *iter, int flags) 1167trace_bprint_raw(struct trace_iterator *iter, int flags,
1168 struct trace_event *event)
1105{ 1169{
1106 struct bprint_entry *field; 1170 struct bprint_entry *field;
1107 struct trace_seq *s = &iter->seq; 1171 struct trace_seq *s = &iter->seq;
@@ -1120,16 +1184,19 @@ trace_bprint_raw(struct trace_iterator *iter, int flags)
1120 return TRACE_TYPE_PARTIAL_LINE; 1184 return TRACE_TYPE_PARTIAL_LINE;
1121} 1185}
1122 1186
1187static struct trace_event_functions trace_bprint_funcs = {
1188 .trace = trace_bprint_print,
1189 .raw = trace_bprint_raw,
1190};
1123 1191
1124static struct trace_event trace_bprint_event = { 1192static struct trace_event trace_bprint_event = {
1125 .type = TRACE_BPRINT, 1193 .type = TRACE_BPRINT,
1126 .trace = trace_bprint_print, 1194 .funcs = &trace_bprint_funcs,
1127 .raw = trace_bprint_raw,
1128}; 1195};
1129 1196
1130/* TRACE_PRINT */ 1197/* TRACE_PRINT */
1131static enum print_line_t trace_print_print(struct trace_iterator *iter, 1198static enum print_line_t trace_print_print(struct trace_iterator *iter,
1132 int flags) 1199 int flags, struct trace_event *event)
1133{ 1200{
1134 struct print_entry *field; 1201 struct print_entry *field;
1135 struct trace_seq *s = &iter->seq; 1202 struct trace_seq *s = &iter->seq;
@@ -1148,7 +1215,8 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
1148 return TRACE_TYPE_PARTIAL_LINE; 1215 return TRACE_TYPE_PARTIAL_LINE;
1149} 1216}
1150 1217
1151static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) 1218static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
1219 struct trace_event *event)
1152{ 1220{
1153 struct print_entry *field; 1221 struct print_entry *field;
1154 1222
@@ -1163,18 +1231,21 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
1163 return TRACE_TYPE_PARTIAL_LINE; 1231 return TRACE_TYPE_PARTIAL_LINE;
1164} 1232}
1165 1233
1166static struct trace_event trace_print_event = { 1234static struct trace_event_functions trace_print_funcs = {
1167 .type = TRACE_PRINT,
1168 .trace = trace_print_print, 1235 .trace = trace_print_print,
1169 .raw = trace_print_raw, 1236 .raw = trace_print_raw,
1170}; 1237};
1171 1238
1239static struct trace_event trace_print_event = {
1240 .type = TRACE_PRINT,
1241 .funcs = &trace_print_funcs,
1242};
1243
1172 1244
1173static struct trace_event *events[] __initdata = { 1245static struct trace_event *events[] __initdata = {
1174 &trace_fn_event, 1246 &trace_fn_event,
1175 &trace_ctx_event, 1247 &trace_ctx_event,
1176 &trace_wake_event, 1248 &trace_wake_event,
1177 &trace_special_event,
1178 &trace_stack_event, 1249 &trace_stack_event,
1179 &trace_user_stack_event, 1250 &trace_user_stack_event,
1180 &trace_bprint_event, 1251 &trace_bprint_event,
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index d38bec4a9c30..c038eba0492b 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -25,7 +25,9 @@ extern void trace_event_read_unlock(void);
25extern struct trace_event *ftrace_find_event(int type); 25extern struct trace_event *ftrace_find_event(int type);
26 26
27extern enum print_line_t trace_nop_print(struct trace_iterator *iter, 27extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
28 int flags); 28 int flags, struct trace_event *event);
29extern int
30trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
29 31
30/* used by module unregistering */ 32/* used by module unregistering */
31extern int __unregister_ftrace_event(struct trace_event *event); 33extern int __unregister_ftrace_event(struct trace_event *event);
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
deleted file mode 100644
index 8a30d9874cd4..000000000000
--- a/kernel/trace/trace_power.c
+++ /dev/null
@@ -1,214 +0,0 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <trace/power.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19#include "trace_output.h"
20
21static struct trace_array *power_trace;
22static int __read_mostly trace_power_enabled;
23
24static void probe_power_start(struct power_trace *it, unsigned int type,
25 unsigned int level)
26{
27 if (!trace_power_enabled)
28 return;
29
30 memset(it, 0, sizeof(struct power_trace));
31 it->state = level;
32 it->type = type;
33 it->stamp = ktime_get();
34}
35
36
37static void probe_power_end(struct power_trace *it)
38{
39 struct ftrace_event_call *call = &event_power;
40 struct ring_buffer_event *event;
41 struct trace_power *entry;
42 struct trace_array_cpu *data;
43 struct trace_array *tr = power_trace;
44
45 if (!trace_power_enabled)
46 return;
47
48 preempt_disable();
49 it->end = ktime_get();
50 data = tr->data[smp_processor_id()];
51
52 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
53 sizeof(*entry), 0, 0);
54 if (!event)
55 goto out;
56 entry = ring_buffer_event_data(event);
57 entry->state_data = *it;
58 if (!filter_check_discard(call, entry, tr->buffer, event))
59 trace_buffer_unlock_commit(tr, event, 0, 0);
60 out:
61 preempt_enable();
62}
63
64static void probe_power_mark(struct power_trace *it, unsigned int type,
65 unsigned int level)
66{
67 struct ftrace_event_call *call = &event_power;
68 struct ring_buffer_event *event;
69 struct trace_power *entry;
70 struct trace_array_cpu *data;
71 struct trace_array *tr = power_trace;
72
73 if (!trace_power_enabled)
74 return;
75
76 memset(it, 0, sizeof(struct power_trace));
77 it->state = level;
78 it->type = type;
79 it->stamp = ktime_get();
80 preempt_disable();
81 it->end = it->stamp;
82 data = tr->data[smp_processor_id()];
83
84 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
85 sizeof(*entry), 0, 0);
86 if (!event)
87 goto out;
88 entry = ring_buffer_event_data(event);
89 entry->state_data = *it;
90 if (!filter_check_discard(call, entry, tr->buffer, event))
91 trace_buffer_unlock_commit(tr, event, 0, 0);
92 out:
93 preempt_enable();
94}
95
96static int tracing_power_register(void)
97{
98 int ret;
99
100 ret = register_trace_power_start(probe_power_start);
101 if (ret) {
102 pr_info("power trace: Couldn't activate tracepoint"
103 " probe to trace_power_start\n");
104 return ret;
105 }
106 ret = register_trace_power_end(probe_power_end);
107 if (ret) {
108 pr_info("power trace: Couldn't activate tracepoint"
109 " probe to trace_power_end\n");
110 goto fail_start;
111 }
112 ret = register_trace_power_mark(probe_power_mark);
113 if (ret) {
114 pr_info("power trace: Couldn't activate tracepoint"
115 " probe to trace_power_mark\n");
116 goto fail_end;
117 }
118 return ret;
119fail_end:
120 unregister_trace_power_end(probe_power_end);
121fail_start:
122 unregister_trace_power_start(probe_power_start);
123 return ret;
124}
125
126static void start_power_trace(struct trace_array *tr)
127{
128 trace_power_enabled = 1;
129}
130
131static void stop_power_trace(struct trace_array *tr)
132{
133 trace_power_enabled = 0;
134}
135
136static void power_trace_reset(struct trace_array *tr)
137{
138 trace_power_enabled = 0;
139 unregister_trace_power_start(probe_power_start);
140 unregister_trace_power_end(probe_power_end);
141 unregister_trace_power_mark(probe_power_mark);
142}
143
144
145static int power_trace_init(struct trace_array *tr)
146{
147 int cpu;
148 power_trace = tr;
149
150 trace_power_enabled = 1;
151 tracing_power_register();
152
153 for_each_cpu(cpu, cpu_possible_mask)
154 tracing_reset(tr, cpu);
155 return 0;
156}
157
158static enum print_line_t power_print_line(struct trace_iterator *iter)
159{
160 int ret = 0;
161 struct trace_entry *entry = iter->ent;
162 struct trace_power *field ;
163 struct power_trace *it;
164 struct trace_seq *s = &iter->seq;
165 struct timespec stamp;
166 struct timespec duration;
167
168 trace_assign_type(field, entry);
169 it = &field->state_data;
170 stamp = ktime_to_timespec(it->stamp);
171 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
172
173 if (entry->type == TRACE_POWER) {
174 if (it->type == POWER_CSTATE)
175 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
176 stamp.tv_sec,
177 stamp.tv_nsec,
178 it->state, iter->cpu,
179 duration.tv_sec,
180 duration.tv_nsec);
181 if (it->type == POWER_PSTATE)
182 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
183 stamp.tv_sec,
184 stamp.tv_nsec,
185 it->state, iter->cpu);
186 if (!ret)
187 return TRACE_TYPE_PARTIAL_LINE;
188 return TRACE_TYPE_HANDLED;
189 }
190 return TRACE_TYPE_UNHANDLED;
191}
192
193static void power_print_header(struct seq_file *s)
194{
195 seq_puts(s, "# TIMESTAMP STATE EVENT\n");
196 seq_puts(s, "# | | |\n");
197}
198
199static struct tracer power_tracer __read_mostly =
200{
201 .name = "power",
202 .init = power_trace_init,
203 .start = start_power_trace,
204 .stop = stop_power_trace,
205 .reset = power_trace_reset,
206 .print_line = power_print_line,
207 .print_header = power_print_header,
208};
209
210static int init_power_trace(void)
211{
212 return register_tracer(&power_tracer);
213}
214device_initcall(init_power_trace);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 9bece9687b62..2547d8813cf0 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -11,7 +11,6 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/marker.h>
15#include <linux/mutex.h> 14#include <linux/mutex.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/list.h> 16#include <linux/list.h>
@@ -155,25 +154,19 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
155EXPORT_SYMBOL_GPL(__ftrace_vprintk); 154EXPORT_SYMBOL_GPL(__ftrace_vprintk);
156 155
157static void * 156static void *
158t_next(struct seq_file *m, void *v, loff_t *pos) 157t_start(struct seq_file *m, loff_t *pos)
159{ 158{
160 const char **fmt = m->private; 159 const char **fmt = __start___trace_bprintk_fmt + *pos;
161 const char **next = fmt;
162
163 (*pos)++;
164 160
165 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt) 161 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
166 return NULL; 162 return NULL;
167
168 next = fmt;
169 m->private = ++next;
170
171 return fmt; 163 return fmt;
172} 164}
173 165
174static void *t_start(struct seq_file *m, loff_t *pos) 166static void *t_next(struct seq_file *m, void * v, loff_t *pos)
175{ 167{
176 return t_next(m, NULL, pos); 168 (*pos)++;
169 return t_start(m, pos);
177} 170}
178 171
179static int t_show(struct seq_file *m, void *v) 172static int t_show(struct seq_file *m, void *v)
@@ -182,7 +175,7 @@ static int t_show(struct seq_file *m, void *v)
182 const char *str = *fmt; 175 const char *str = *fmt;
183 int i; 176 int i;
184 177
185 seq_printf(m, "0x%lx : \"", (unsigned long)fmt); 178 seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
186 179
187 /* 180 /*
188 * Tabs and new lines need to be converted. 181 * Tabs and new lines need to be converted.
@@ -224,15 +217,7 @@ static const struct seq_operations show_format_seq_ops = {
224static int 217static int
225ftrace_formats_open(struct inode *inode, struct file *file) 218ftrace_formats_open(struct inode *inode, struct file *file)
226{ 219{
227 int ret; 220 return seq_open(file, &show_format_seq_ops);
228
229 ret = seq_open(file, &show_format_seq_ops);
230 if (!ret) {
231 struct seq_file *m = file->private_data;
232
233 m->private = __start___trace_bprintk_fmt;
234 }
235 return ret;
236} 221}
237 222
238static const struct file_operations ftrace_formats_fops = { 223static const struct file_operations ftrace_formats_fops = {
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index a98106dd979c..8f758d070c43 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -20,9 +20,37 @@ static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex); 20static DEFINE_MUTEX(sched_register_mutex);
21static int sched_stopped; 21static int sched_stopped;
22 22
23
24void
25tracing_sched_switch_trace(struct trace_array *tr,
26 struct task_struct *prev,
27 struct task_struct *next,
28 unsigned long flags, int pc)
29{
30 struct ftrace_event_call *call = &event_context_switch;
31 struct ring_buffer *buffer = tr->buffer;
32 struct ring_buffer_event *event;
33 struct ctx_switch_entry *entry;
34
35 event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
36 sizeof(*entry), flags, pc);
37 if (!event)
38 return;
39 entry = ring_buffer_event_data(event);
40 entry->prev_pid = prev->pid;
41 entry->prev_prio = prev->prio;
42 entry->prev_state = prev->state;
43 entry->next_pid = next->pid;
44 entry->next_prio = next->prio;
45 entry->next_state = next->state;
46 entry->next_cpu = task_cpu(next);
47
48 if (!filter_check_discard(call, entry, buffer, event))
49 trace_buffer_unlock_commit(buffer, event, flags, pc);
50}
51
23static void 52static void
24probe_sched_switch(struct rq *__rq, struct task_struct *prev, 53probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next)
25 struct task_struct *next)
26{ 54{
27 struct trace_array_cpu *data; 55 struct trace_array_cpu *data;
28 unsigned long flags; 56 unsigned long flags;
@@ -49,8 +77,38 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
49 local_irq_restore(flags); 77 local_irq_restore(flags);
50} 78}
51 79
80void
81tracing_sched_wakeup_trace(struct trace_array *tr,
82 struct task_struct *wakee,
83 struct task_struct *curr,
84 unsigned long flags, int pc)
85{
86 struct ftrace_event_call *call = &event_wakeup;
87 struct ring_buffer_event *event;
88 struct ctx_switch_entry *entry;
89 struct ring_buffer *buffer = tr->buffer;
90
91 event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
92 sizeof(*entry), flags, pc);
93 if (!event)
94 return;
95 entry = ring_buffer_event_data(event);
96 entry->prev_pid = curr->pid;
97 entry->prev_prio = curr->prio;
98 entry->prev_state = curr->state;
99 entry->next_pid = wakee->pid;
100 entry->next_prio = wakee->prio;
101 entry->next_state = wakee->state;
102 entry->next_cpu = task_cpu(wakee);
103
104 if (!filter_check_discard(call, entry, buffer, event))
105 ring_buffer_unlock_commit(buffer, event);
106 ftrace_trace_stack(tr->buffer, flags, 6, pc);
107 ftrace_trace_userstack(tr->buffer, flags, pc);
108}
109
52static void 110static void
53probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) 111probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
54{ 112{
55 struct trace_array_cpu *data; 113 struct trace_array_cpu *data;
56 unsigned long flags; 114 unsigned long flags;
@@ -80,21 +138,21 @@ static int tracing_sched_register(void)
80{ 138{
81 int ret; 139 int ret;
82 140
83 ret = register_trace_sched_wakeup(probe_sched_wakeup); 141 ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
84 if (ret) { 142 if (ret) {
85 pr_info("wakeup trace: Couldn't activate tracepoint" 143 pr_info("wakeup trace: Couldn't activate tracepoint"
86 " probe to kernel_sched_wakeup\n"); 144 " probe to kernel_sched_wakeup\n");
87 return ret; 145 return ret;
88 } 146 }
89 147
90 ret = register_trace_sched_wakeup_new(probe_sched_wakeup); 148 ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
91 if (ret) { 149 if (ret) {
92 pr_info("wakeup trace: Couldn't activate tracepoint" 150 pr_info("wakeup trace: Couldn't activate tracepoint"
93 " probe to kernel_sched_wakeup_new\n"); 151 " probe to kernel_sched_wakeup_new\n");
94 goto fail_deprobe; 152 goto fail_deprobe;
95 } 153 }
96 154
97 ret = register_trace_sched_switch(probe_sched_switch); 155 ret = register_trace_sched_switch(probe_sched_switch, NULL);
98 if (ret) { 156 if (ret) {
99 pr_info("sched trace: Couldn't activate tracepoint" 157 pr_info("sched trace: Couldn't activate tracepoint"
100 " probe to kernel_sched_switch\n"); 158 " probe to kernel_sched_switch\n");
@@ -103,17 +161,17 @@ static int tracing_sched_register(void)
103 161
104 return ret; 162 return ret;
105fail_deprobe_wake_new: 163fail_deprobe_wake_new:
106 unregister_trace_sched_wakeup_new(probe_sched_wakeup); 164 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
107fail_deprobe: 165fail_deprobe:
108 unregister_trace_sched_wakeup(probe_sched_wakeup); 166 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
109 return ret; 167 return ret;
110} 168}
111 169
112static void tracing_sched_unregister(void) 170static void tracing_sched_unregister(void)
113{ 171{
114 unregister_trace_sched_switch(probe_sched_switch); 172 unregister_trace_sched_switch(probe_sched_switch, NULL);
115 unregister_trace_sched_wakeup_new(probe_sched_wakeup); 173 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
116 unregister_trace_sched_wakeup(probe_sched_wakeup); 174 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
117} 175}
118 176
119static void tracing_start_sched_switch(void) 177static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index eacb27225173..7319559ed59f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -24,66 +24,106 @@ static int __read_mostly tracer_enabled;
24 24
25static struct task_struct *wakeup_task; 25static struct task_struct *wakeup_task;
26static int wakeup_cpu; 26static int wakeup_cpu;
27static int wakeup_current_cpu;
27static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
28static int wakeup_rt; 29static int wakeup_rt;
29 30
30static raw_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 33
34static void wakeup_reset(struct trace_array *tr);
33static void __wakeup_reset(struct trace_array *tr); 35static void __wakeup_reset(struct trace_array *tr);
36static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37static void wakeup_graph_return(struct ftrace_graph_ret *trace);
34 38
35static int save_lat_flag; 39static int save_lat_flag;
36 40
41#define TRACE_DISPLAY_GRAPH 1
42
43static struct tracer_opt trace_opts[] = {
44#ifdef CONFIG_FUNCTION_GRAPH_TRACER
45 /* display latency trace as call graph */
46 { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
47#endif
48 { } /* Empty entry */
49};
50
51static struct tracer_flags tracer_flags = {
52 .val = 0,
53 .opts = trace_opts,
54};
55
56#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
57
37#ifdef CONFIG_FUNCTION_TRACER 58#ifdef CONFIG_FUNCTION_TRACER
59
38/* 60/*
39 * irqsoff uses its own tracer function to keep the overhead down: 61 * Prologue for the wakeup function tracers.
62 *
63 * Returns 1 if it is OK to continue, and preemption
64 * is disabled and data->disabled is incremented.
65 * 0 if the trace is to be ignored, and preemption
66 * is not disabled and data->disabled is
67 * kept the same.
68 *
69 * Note, this function is also used outside this ifdef but
70 * inside the #ifdef of the function graph tracer below.
71 * This is OK, since the function graph tracer is
72 * dependent on the function tracer.
40 */ 73 */
41static void 74static int
42wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) 75func_prolog_preempt_disable(struct trace_array *tr,
76 struct trace_array_cpu **data,
77 int *pc)
43{ 78{
44 struct trace_array *tr = wakeup_trace;
45 struct trace_array_cpu *data;
46 unsigned long flags;
47 long disabled; 79 long disabled;
48 int resched;
49 int cpu; 80 int cpu;
50 int pc;
51 81
52 if (likely(!wakeup_task)) 82 if (likely(!wakeup_task))
53 return; 83 return 0;
54 84
55 pc = preempt_count(); 85 *pc = preempt_count();
56 resched = ftrace_preempt_disable(); 86 preempt_disable_notrace();
57 87
58 cpu = raw_smp_processor_id(); 88 cpu = raw_smp_processor_id();
59 data = tr->data[cpu]; 89 if (cpu != wakeup_current_cpu)
60 disabled = atomic_inc_return(&data->disabled); 90 goto out_enable;
91
92 *data = tr->data[cpu];
93 disabled = atomic_inc_return(&(*data)->disabled);
61 if (unlikely(disabled != 1)) 94 if (unlikely(disabled != 1))
62 goto out; 95 goto out;
63 96
64 local_irq_save(flags); 97 return 1;
65 __raw_spin_lock(&wakeup_lock);
66 98
67 if (unlikely(!wakeup_task)) 99out:
68 goto unlock; 100 atomic_dec(&(*data)->disabled);
69 101
70 /* 102out_enable:
71 * The task can't disappear because it needs to 103 preempt_enable_notrace();
72 * wake up first, and we have the wakeup_lock. 104 return 0;
73 */ 105}
74 if (task_cpu(wakeup_task) != cpu)
75 goto unlock;
76 106
77 trace_function(tr, ip, parent_ip, flags, pc); 107/*
108 * wakeup uses its own tracer function to keep the overhead down:
109 */
110static void
111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
112{
113 struct trace_array *tr = wakeup_trace;
114 struct trace_array_cpu *data;
115 unsigned long flags;
116 int pc;
117
118 if (!func_prolog_preempt_disable(tr, &data, &pc))
119 return;
78 120
79 unlock: 121 local_irq_save(flags);
80 __raw_spin_unlock(&wakeup_lock); 122 trace_function(tr, ip, parent_ip, flags, pc);
81 local_irq_restore(flags); 123 local_irq_restore(flags);
82 124
83 out:
84 atomic_dec(&data->disabled); 125 atomic_dec(&data->disabled);
85 126 preempt_enable_notrace();
86 ftrace_preempt_enable(resched);
87} 127}
88 128
89static struct ftrace_ops trace_ops __read_mostly = 129static struct ftrace_ops trace_ops __read_mostly =
@@ -92,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly =
92}; 132};
93#endif /* CONFIG_FUNCTION_TRACER */ 133#endif /* CONFIG_FUNCTION_TRACER */
94 134
135static int start_func_tracer(int graph)
136{
137 int ret;
138
139 if (!graph)
140 ret = register_ftrace_function(&trace_ops);
141 else
142 ret = register_ftrace_graph(&wakeup_graph_return,
143 &wakeup_graph_entry);
144
145 if (!ret && tracing_is_enabled())
146 tracer_enabled = 1;
147 else
148 tracer_enabled = 0;
149
150 return ret;
151}
152
153static void stop_func_tracer(int graph)
154{
155 tracer_enabled = 0;
156
157 if (!graph)
158 unregister_ftrace_function(&trace_ops);
159 else
160 unregister_ftrace_graph();
161}
162
163#ifdef CONFIG_FUNCTION_GRAPH_TRACER
164static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
165{
166
167 if (!(bit & TRACE_DISPLAY_GRAPH))
168 return -EINVAL;
169
170 if (!(is_graph() ^ set))
171 return 0;
172
173 stop_func_tracer(!set);
174
175 wakeup_reset(wakeup_trace);
176 tracing_max_latency = 0;
177
178 return start_func_tracer(set);
179}
180
181static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
182{
183 struct trace_array *tr = wakeup_trace;
184 struct trace_array_cpu *data;
185 unsigned long flags;
186 int pc, ret = 0;
187
188 if (!func_prolog_preempt_disable(tr, &data, &pc))
189 return 0;
190
191 local_save_flags(flags);
192 ret = __trace_graph_entry(tr, trace, flags, pc);
193 atomic_dec(&data->disabled);
194 preempt_enable_notrace();
195
196 return ret;
197}
198
199static void wakeup_graph_return(struct ftrace_graph_ret *trace)
200{
201 struct trace_array *tr = wakeup_trace;
202 struct trace_array_cpu *data;
203 unsigned long flags;
204 int pc;
205
206 if (!func_prolog_preempt_disable(tr, &data, &pc))
207 return;
208
209 local_save_flags(flags);
210 __trace_graph_return(tr, trace, flags, pc);
211 atomic_dec(&data->disabled);
212
213 preempt_enable_notrace();
214 return;
215}
216
217static void wakeup_trace_open(struct trace_iterator *iter)
218{
219 if (is_graph())
220 graph_trace_open(iter);
221}
222
223static void wakeup_trace_close(struct trace_iterator *iter)
224{
225 if (iter->private)
226 graph_trace_close(iter);
227}
228
229#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
230
231static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
232{
233 /*
234 * In graph mode call the graph tracer output function,
235 * otherwise go with the TRACE_FN event handler
236 */
237 if (is_graph())
238 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
239
240 return TRACE_TYPE_UNHANDLED;
241}
242
243static void wakeup_print_header(struct seq_file *s)
244{
245 if (is_graph())
246 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
247 else
248 trace_default_header(s);
249}
250
251static void
252__trace_function(struct trace_array *tr,
253 unsigned long ip, unsigned long parent_ip,
254 unsigned long flags, int pc)
255{
256 if (is_graph())
257 trace_graph_function(tr, ip, parent_ip, flags, pc);
258 else
259 trace_function(tr, ip, parent_ip, flags, pc);
260}
261#else
262#define __trace_function trace_function
263
264static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
265{
266 return -EINVAL;
267}
268
269static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
270{
271 return -1;
272}
273
274static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
275{
276 return TRACE_TYPE_UNHANDLED;
277}
278
279static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
280static void wakeup_print_header(struct seq_file *s) { }
281static void wakeup_trace_open(struct trace_iterator *iter) { }
282static void wakeup_trace_close(struct trace_iterator *iter) { }
283#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
284
95/* 285/*
96 * Should this new latency be reported/recorded? 286 * Should this new latency be reported/recorded?
97 */ 287 */
@@ -107,11 +297,19 @@ static int report_latency(cycle_t delta)
107 return 1; 297 return 1;
108} 298}
109 299
300static void
301probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
302{
303 if (task != wakeup_task)
304 return;
305
306 wakeup_current_cpu = cpu;
307}
308
110static void notrace 309static void notrace
111probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, 310probe_wakeup_sched_switch(void *ignore,
112 struct task_struct *next) 311 struct task_struct *prev, struct task_struct *next)
113{ 312{
114 unsigned long latency = 0, t0 = 0, t1 = 0;
115 struct trace_array_cpu *data; 313 struct trace_array_cpu *data;
116 cycle_t T0, T1, delta; 314 cycle_t T0, T1, delta;
117 unsigned long flags; 315 unsigned long flags;
@@ -145,7 +343,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
145 goto out; 343 goto out;
146 344
147 local_irq_save(flags); 345 local_irq_save(flags);
148 __raw_spin_lock(&wakeup_lock); 346 arch_spin_lock(&wakeup_lock);
149 347
150 /* We could race with grabbing wakeup_lock */ 348 /* We could race with grabbing wakeup_lock */
151 if (unlikely(!tracer_enabled || next != wakeup_task)) 349 if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -154,13 +352,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
154 /* The task we are waiting for is waking up */ 352 /* The task we are waiting for is waking up */
155 data = wakeup_trace->data[wakeup_cpu]; 353 data = wakeup_trace->data[wakeup_cpu];
156 354
157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); 355 __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 356 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
159 357
160 /*
161 * usecs conversion is slow so we try to delay the conversion
162 * as long as possible:
163 */
164 T0 = data->preempt_timestamp; 358 T0 = data->preempt_timestamp;
165 T1 = ftrace_now(cpu); 359 T1 = ftrace_now(cpu);
166 delta = T1-T0; 360 delta = T1-T0;
@@ -168,17 +362,14 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
168 if (!report_latency(delta)) 362 if (!report_latency(delta))
169 goto out_unlock; 363 goto out_unlock;
170 364
171 latency = nsecs_to_usecs(delta); 365 if (likely(!is_tracing_stopped())) {
172 366 tracing_max_latency = delta;
173 tracing_max_latency = delta; 367 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
174 t0 = nsecs_to_usecs(T0); 368 }
175 t1 = nsecs_to_usecs(T1);
176
177 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
178 369
179out_unlock: 370out_unlock:
180 __wakeup_reset(wakeup_trace); 371 __wakeup_reset(wakeup_trace);
181 __raw_spin_unlock(&wakeup_lock); 372 arch_spin_unlock(&wakeup_lock);
182 local_irq_restore(flags); 373 local_irq_restore(flags);
183out: 374out:
184 atomic_dec(&wakeup_trace->data[cpu]->disabled); 375 atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -186,11 +377,6 @@ out:
186 377
187static void __wakeup_reset(struct trace_array *tr) 378static void __wakeup_reset(struct trace_array *tr)
188{ 379{
189 int cpu;
190
191 for_each_possible_cpu(cpu)
192 tracing_reset(tr, cpu);
193
194 wakeup_cpu = -1; 380 wakeup_cpu = -1;
195 wakeup_prio = -1; 381 wakeup_prio = -1;
196 382
@@ -204,15 +390,17 @@ static void wakeup_reset(struct trace_array *tr)
204{ 390{
205 unsigned long flags; 391 unsigned long flags;
206 392
393 tracing_reset_online_cpus(tr);
394
207 local_irq_save(flags); 395 local_irq_save(flags);
208 __raw_spin_lock(&wakeup_lock); 396 arch_spin_lock(&wakeup_lock);
209 __wakeup_reset(tr); 397 __wakeup_reset(tr);
210 __raw_spin_unlock(&wakeup_lock); 398 arch_spin_unlock(&wakeup_lock);
211 local_irq_restore(flags); 399 local_irq_restore(flags);
212} 400}
213 401
214static void 402static void
215probe_wakeup(struct rq *rq, struct task_struct *p, int success) 403probe_wakeup(void *ignore, struct task_struct *p, int success)
216{ 404{
217 struct trace_array_cpu *data; 405 struct trace_array_cpu *data;
218 int cpu = smp_processor_id(); 406 int cpu = smp_processor_id();
@@ -237,7 +425,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
237 goto out; 425 goto out;
238 426
239 /* interrupts should be off from try_to_wake_up */ 427 /* interrupts should be off from try_to_wake_up */
240 __raw_spin_lock(&wakeup_lock); 428 arch_spin_lock(&wakeup_lock);
241 429
242 /* check for races. */ 430 /* check for races. */
243 if (!tracer_enabled || p->prio >= wakeup_prio) 431 if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -247,6 +435,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
247 __wakeup_reset(wakeup_trace); 435 __wakeup_reset(wakeup_trace);
248 436
249 wakeup_cpu = task_cpu(p); 437 wakeup_cpu = task_cpu(p);
438 wakeup_current_cpu = wakeup_cpu;
250 wakeup_prio = p->prio; 439 wakeup_prio = p->prio;
251 440
252 wakeup_task = p; 441 wakeup_task = p;
@@ -263,10 +452,10 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
263 * is not called by an assembly function (where as schedule is) 452 * is not called by an assembly function (where as schedule is)
264 * it should be safe to use it here. 453 * it should be safe to use it here.
265 */ 454 */
266 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 455 __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
267 456
268out_locked: 457out_locked:
269 __raw_spin_unlock(&wakeup_lock); 458 arch_spin_unlock(&wakeup_lock);
270out: 459out:
271 atomic_dec(&wakeup_trace->data[cpu]->disabled); 460 atomic_dec(&wakeup_trace->data[cpu]->disabled);
272} 461}
@@ -275,27 +464,34 @@ static void start_wakeup_tracer(struct trace_array *tr)
275{ 464{
276 int ret; 465 int ret;
277 466
278 ret = register_trace_sched_wakeup(probe_wakeup); 467 ret = register_trace_sched_wakeup(probe_wakeup, NULL);
279 if (ret) { 468 if (ret) {
280 pr_info("wakeup trace: Couldn't activate tracepoint" 469 pr_info("wakeup trace: Couldn't activate tracepoint"
281 " probe to kernel_sched_wakeup\n"); 470 " probe to kernel_sched_wakeup\n");
282 return; 471 return;
283 } 472 }
284 473
285 ret = register_trace_sched_wakeup_new(probe_wakeup); 474 ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
286 if (ret) { 475 if (ret) {
287 pr_info("wakeup trace: Couldn't activate tracepoint" 476 pr_info("wakeup trace: Couldn't activate tracepoint"
288 " probe to kernel_sched_wakeup_new\n"); 477 " probe to kernel_sched_wakeup_new\n");
289 goto fail_deprobe; 478 goto fail_deprobe;
290 } 479 }
291 480
292 ret = register_trace_sched_switch(probe_wakeup_sched_switch); 481 ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
293 if (ret) { 482 if (ret) {
294 pr_info("sched trace: Couldn't activate tracepoint" 483 pr_info("sched trace: Couldn't activate tracepoint"
295 " probe to kernel_sched_switch\n"); 484 " probe to kernel_sched_switch\n");
296 goto fail_deprobe_wake_new; 485 goto fail_deprobe_wake_new;
297 } 486 }
298 487
488 ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
489 if (ret) {
490 pr_info("wakeup trace: Couldn't activate tracepoint"
491 " probe to kernel_sched_migrate_task\n");
492 return;
493 }
494
299 wakeup_reset(tr); 495 wakeup_reset(tr);
300 496
301 /* 497 /*
@@ -307,27 +503,24 @@ static void start_wakeup_tracer(struct trace_array *tr)
307 */ 503 */
308 smp_wmb(); 504 smp_wmb();
309 505
310 register_ftrace_function(&trace_ops); 506 if (start_func_tracer(is_graph()))
311 507 printk(KERN_ERR "failed to start wakeup tracer\n");
312 if (tracing_is_enabled())
313 tracer_enabled = 1;
314 else
315 tracer_enabled = 0;
316 508
317 return; 509 return;
318fail_deprobe_wake_new: 510fail_deprobe_wake_new:
319 unregister_trace_sched_wakeup_new(probe_wakeup); 511 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
320fail_deprobe: 512fail_deprobe:
321 unregister_trace_sched_wakeup(probe_wakeup); 513 unregister_trace_sched_wakeup(probe_wakeup, NULL);
322} 514}
323 515
324static void stop_wakeup_tracer(struct trace_array *tr) 516static void stop_wakeup_tracer(struct trace_array *tr)
325{ 517{
326 tracer_enabled = 0; 518 tracer_enabled = 0;
327 unregister_ftrace_function(&trace_ops); 519 stop_func_tracer(is_graph());
328 unregister_trace_sched_switch(probe_wakeup_sched_switch); 520 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
329 unregister_trace_sched_wakeup_new(probe_wakeup); 521 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
330 unregister_trace_sched_wakeup(probe_wakeup); 522 unregister_trace_sched_wakeup(probe_wakeup, NULL);
523 unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
331} 524}
332 525
333static int __wakeup_tracer_init(struct trace_array *tr) 526static int __wakeup_tracer_init(struct trace_array *tr)
@@ -382,9 +575,16 @@ static struct tracer wakeup_tracer __read_mostly =
382 .start = wakeup_tracer_start, 575 .start = wakeup_tracer_start,
383 .stop = wakeup_tracer_stop, 576 .stop = wakeup_tracer_stop,
384 .print_max = 1, 577 .print_max = 1,
578 .print_header = wakeup_print_header,
579 .print_line = wakeup_print_line,
580 .flags = &tracer_flags,
581 .set_flag = wakeup_set_flag,
385#ifdef CONFIG_FTRACE_SELFTEST 582#ifdef CONFIG_FTRACE_SELFTEST
386 .selftest = trace_selftest_startup_wakeup, 583 .selftest = trace_selftest_startup_wakeup,
387#endif 584#endif
585 .open = wakeup_trace_open,
586 .close = wakeup_trace_close,
587 .use_max_tr = 1,
388}; 588};
389 589
390static struct tracer wakeup_rt_tracer __read_mostly = 590static struct tracer wakeup_rt_tracer __read_mostly =
@@ -396,9 +596,16 @@ static struct tracer wakeup_rt_tracer __read_mostly =
396 .stop = wakeup_tracer_stop, 596 .stop = wakeup_tracer_stop,
397 .wait_pipe = poll_wait_pipe, 597 .wait_pipe = poll_wait_pipe,
398 .print_max = 1, 598 .print_max = 1,
599 .print_header = wakeup_print_header,
600 .print_line = wakeup_print_line,
601 .flags = &tracer_flags,
602 .set_flag = wakeup_set_flag,
399#ifdef CONFIG_FTRACE_SELFTEST 603#ifdef CONFIG_FTRACE_SELFTEST
400 .selftest = trace_selftest_startup_wakeup, 604 .selftest = trace_selftest_startup_wakeup,
401#endif 605#endif
606 .open = wakeup_trace_open,
607 .close = wakeup_trace_close,
608 .use_max_tr = 1,
402}; 609};
403 610
404__init static int init_wakeup_tracer(void) 611__init static int init_wakeup_tracer(void)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 00dd6485bdd7..659732eba07c 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -3,6 +3,7 @@
3#include <linux/stringify.h> 3#include <linux/stringify.h>
4#include <linux/kthread.h> 4#include <linux/kthread.h>
5#include <linux/delay.h> 5#include <linux/delay.h>
6#include <linux/slab.h>
6 7
7static inline int trace_valid_entry(struct trace_entry *entry) 8static inline int trace_valid_entry(struct trace_entry *entry)
8{ 9{
@@ -12,11 +13,9 @@ static inline int trace_valid_entry(struct trace_entry *entry)
12 case TRACE_WAKE: 13 case TRACE_WAKE:
13 case TRACE_STACK: 14 case TRACE_STACK:
14 case TRACE_PRINT: 15 case TRACE_PRINT:
15 case TRACE_SPECIAL:
16 case TRACE_BRANCH: 16 case TRACE_BRANCH:
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES:
20 return 1; 19 return 1;
21 } 20 }
22 return 0; 21 return 0;
@@ -28,7 +27,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
28 struct trace_entry *entry; 27 struct trace_entry *entry;
29 unsigned int loops = 0; 28 unsigned int loops = 0;
30 29
31 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { 30 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) {
32 entry = ring_buffer_event_data(event); 31 entry = ring_buffer_event_data(event);
33 32
34 /* 33 /*
@@ -66,7 +65,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
66 65
67 /* Don't allow flipping of max traces now */ 66 /* Don't allow flipping of max traces now */
68 local_irq_save(flags); 67 local_irq_save(flags);
69 __raw_spin_lock(&ftrace_max_lock); 68 arch_spin_lock(&ftrace_max_lock);
70 69
71 cnt = ring_buffer_entries(tr->buffer); 70 cnt = ring_buffer_entries(tr->buffer);
72 71
@@ -84,7 +83,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
84 break; 83 break;
85 } 84 }
86 tracing_on(); 85 tracing_on();
87 __raw_spin_unlock(&ftrace_max_lock); 86 arch_spin_unlock(&ftrace_max_lock);
88 local_irq_restore(flags); 87 local_irq_restore(flags);
89 88
90 if (count) 89 if (count)
@@ -254,7 +253,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
254/* Maximum number of functions to trace before diagnosing a hang */ 253/* Maximum number of functions to trace before diagnosing a hang */
255#define GRAPH_MAX_FUNC_TEST 100000000 254#define GRAPH_MAX_FUNC_TEST 100000000
256 255
257static void __ftrace_dump(bool disable_tracing); 256static void
257__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode);
258static unsigned int graph_hang_thresh; 258static unsigned int graph_hang_thresh;
259 259
260/* Wrap the real function entry probe to avoid possible hanging */ 260/* Wrap the real function entry probe to avoid possible hanging */
@@ -265,7 +265,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
265 ftrace_graph_stop(); 265 ftrace_graph_stop();
266 printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); 266 printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
267 if (ftrace_dump_on_oops) 267 if (ftrace_dump_on_oops)
268 __ftrace_dump(false); 268 __ftrace_dump(false, DUMP_ALL);
269 return 0; 269 return 0;
270 } 270 }
271 271
@@ -288,6 +288,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
288 * to detect and recover from possible hangs 288 * to detect and recover from possible hangs
289 */ 289 */
290 tracing_reset_online_cpus(tr); 290 tracing_reset_online_cpus(tr);
291 set_graph_array(tr);
291 ret = register_ftrace_graph(&trace_graph_return, 292 ret = register_ftrace_graph(&trace_graph_return,
292 &trace_graph_entry_watchdog); 293 &trace_graph_entry_watchdog);
293 if (ret) { 294 if (ret) {
@@ -557,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
557static int trace_wakeup_test_thread(void *data) 558static int trace_wakeup_test_thread(void *data)
558{ 559{
559 /* Make this a RT thread, doesn't need to be too high */ 560 /* Make this a RT thread, doesn't need to be too high */
560 struct sched_param param = { .sched_priority = 5 }; 561 static const struct sched_param param = { .sched_priority = 5 };
561 struct completion *x = data; 562 struct completion *x = data;
562 563
563 sched_setscheduler(current, SCHED_FIFO, &param); 564 sched_setscheduler(current, SCHED_FIFO, &param);
@@ -688,38 +689,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
688} 689}
689#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 690#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
690 691
691#ifdef CONFIG_SYSPROF_TRACER
692int
693trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
694{
695 unsigned long count;
696 int ret;
697
698 /* start the tracing */
699 ret = tracer_init(trace, tr);
700 if (ret) {
701 warn_failed_init_tracer(trace, ret);
702 return ret;
703 }
704
705 /* Sleep for a 1/10 of a second */
706 msleep(100);
707 /* stop the tracing. */
708 tracing_stop();
709 /* check the trace buffer */
710 ret = trace_test_buffer(tr, &count);
711 trace->reset(tr);
712 tracing_start();
713
714 if (!ret && !count) {
715 printk(KERN_CONT ".. no entries found ..");
716 ret = -1;
717 }
718
719 return ret;
720}
721#endif /* CONFIG_SYSPROF_TRACER */
722
723#ifdef CONFIG_BRANCH_TRACER 692#ifdef CONFIG_BRANCH_TRACER
724int 693int
725trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) 694trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
@@ -752,58 +721,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
752} 721}
753#endif /* CONFIG_BRANCH_TRACER */ 722#endif /* CONFIG_BRANCH_TRACER */
754 723
755#ifdef CONFIG_HW_BRANCH_TRACER
756int
757trace_selftest_startup_hw_branches(struct tracer *trace,
758 struct trace_array *tr)
759{
760 struct trace_iterator *iter;
761 struct tracer tracer;
762 unsigned long count;
763 int ret;
764
765 if (!trace->open) {
766 printk(KERN_CONT "missing open function...");
767 return -1;
768 }
769
770 ret = tracer_init(trace, tr);
771 if (ret) {
772 warn_failed_init_tracer(trace, ret);
773 return ret;
774 }
775
776 /*
777 * The hw-branch tracer needs to collect the trace from the various
778 * cpu trace buffers - before tracing is stopped.
779 */
780 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
781 if (!iter)
782 return -ENOMEM;
783
784 memcpy(&tracer, trace, sizeof(tracer));
785
786 iter->trace = &tracer;
787 iter->tr = tr;
788 iter->pos = -1;
789 mutex_init(&iter->mutex);
790
791 trace->open(iter);
792
793 mutex_destroy(&iter->mutex);
794 kfree(iter);
795
796 tracing_stop();
797
798 ret = trace_test_buffer(tr, &count);
799 trace->reset(tr);
800 tracing_start();
801
802 if (!ret && !count) {
803 printk(KERN_CONT "no entries found..");
804 ret = -1;
805 }
806
807 return ret;
808}
809#endif /* CONFIG_HW_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71dbd..4c5dead0c239 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
27}; 27};
28 28
29static unsigned long max_stack_size; 29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 32
33static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
54 return; 54 return;
55 55
56 local_irq_save(flags); 56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock); 57 arch_spin_lock(&max_stack_lock);
58 58
59 /* a race could have already updated it */ 59 /* a race could have already updated it */
60 if (this_size <= max_stack_size) 60 if (this_size <= max_stack_size)
@@ -103,19 +103,19 @@ static inline void check_stack(void)
103 } 103 }
104 104
105 out: 105 out:
106 __raw_spin_unlock(&max_stack_lock); 106 arch_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags); 107 local_irq_restore(flags);
108} 108}
109 109
110static void 110static void
111stack_trace_call(unsigned long ip, unsigned long parent_ip) 111stack_trace_call(unsigned long ip, unsigned long parent_ip)
112{ 112{
113 int cpu, resched; 113 int cpu;
114 114
115 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
116 return; 116 return;
117 117
118 resched = ftrace_preempt_disable(); 118 preempt_disable_notrace();
119 119
120 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
121 /* no atomic needed, we only modify this variable by this cpu */ 121 /* no atomic needed, we only modify this variable by this cpu */
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
127 out: 127 out:
128 per_cpu(trace_active, cpu)--; 128 per_cpu(trace_active, cpu)--;
129 /* prevent recursion in schedule */ 129 /* prevent recursion in schedule */
130 ftrace_preempt_enable(resched); 130 preempt_enable_notrace();
131} 131}
132 132
133static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock); 175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -183,66 +195,62 @@ static const struct file_operations stack_max_size_fops = {
183 .open = tracing_open_generic, 195 .open = tracing_open_generic,
184 .read = stack_max_size_read, 196 .read = stack_max_size_read,
185 .write = stack_max_size_write, 197 .write = stack_max_size_write,
198 .llseek = default_llseek,
186}; 199};
187 200
188static void * 201static void *
189t_next(struct seq_file *m, void *v, loff_t *pos) 202__next(struct seq_file *m, loff_t *pos)
190{ 203{
191 long i; 204 long n = *pos - 1;
192 205
193 (*pos)++; 206 if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
194
195 if (v == SEQ_START_TOKEN)
196 i = 0;
197 else {
198 i = *(long *)v;
199 i++;
200 }
201
202 if (i >= max_stack_trace.nr_entries ||
203 stack_dump_trace[i] == ULONG_MAX)
204 return NULL; 207 return NULL;
205 208
206 m->private = (void *)i; 209 m->private = (void *)n;
207
208 return &m->private; 210 return &m->private;
209} 211}
210 212
213static void *
214t_next(struct seq_file *m, void *v, loff_t *pos)
215{
216 (*pos)++;
217 return __next(m, pos);
218}
219
211static void *t_start(struct seq_file *m, loff_t *pos) 220static void *t_start(struct seq_file *m, loff_t *pos)
212{ 221{
213 void *t = SEQ_START_TOKEN; 222 int cpu;
214 loff_t l = 0;
215 223
216 local_irq_disable(); 224 local_irq_disable();
217 __raw_spin_lock(&max_stack_lock); 225
226 cpu = smp_processor_id();
227 per_cpu(trace_active, cpu)++;
228
229 arch_spin_lock(&max_stack_lock);
218 230
219 if (*pos == 0) 231 if (*pos == 0)
220 return SEQ_START_TOKEN; 232 return SEQ_START_TOKEN;
221 233
222 for (; t && l < *pos; t = t_next(m, t, &l)) 234 return __next(m, pos);
223 ;
224
225 return t;
226} 235}
227 236
228static void t_stop(struct seq_file *m, void *p) 237static void t_stop(struct seq_file *m, void *p)
229{ 238{
230 __raw_spin_unlock(&max_stack_lock); 239 int cpu;
240
241 arch_spin_unlock(&max_stack_lock);
242
243 cpu = smp_processor_id();
244 per_cpu(trace_active, cpu)--;
245
231 local_irq_enable(); 246 local_irq_enable();
232} 247}
233 248
234static int trace_lookup_stack(struct seq_file *m, long i) 249static int trace_lookup_stack(struct seq_file *m, long i)
235{ 250{
236 unsigned long addr = stack_dump_trace[i]; 251 unsigned long addr = stack_dump_trace[i];
237#ifdef CONFIG_KALLSYMS
238 char str[KSYM_SYMBOL_LEN];
239
240 sprint_symbol(str, addr);
241 252
242 return seq_printf(m, "%s\n", str); 253 return seq_printf(m, "%pS\n", (void *)addr);
243#else
244 return seq_printf(m, "%p\n", (void*)addr);
245#endif
246} 254}
247 255
248static void print_disabled(struct seq_file *m) 256static void print_disabled(struct seq_file *m)
@@ -301,35 +309,32 @@ static const struct seq_operations stack_trace_seq_ops = {
301 309
302static int stack_trace_open(struct inode *inode, struct file *file) 310static int stack_trace_open(struct inode *inode, struct file *file)
303{ 311{
304 int ret; 312 return seq_open(file, &stack_trace_seq_ops);
305
306 ret = seq_open(file, &stack_trace_seq_ops);
307
308 return ret;
309} 313}
310 314
311static const struct file_operations stack_trace_fops = { 315static const struct file_operations stack_trace_fops = {
312 .open = stack_trace_open, 316 .open = stack_trace_open,
313 .read = seq_read, 317 .read = seq_read,
314 .llseek = seq_lseek, 318 .llseek = seq_lseek,
319 .release = seq_release,
315}; 320};
316 321
317int 322int
318stack_trace_sysctl(struct ctl_table *table, int write, 323stack_trace_sysctl(struct ctl_table *table, int write,
319 struct file *file, void __user *buffer, size_t *lenp, 324 void __user *buffer, size_t *lenp,
320 loff_t *ppos) 325 loff_t *ppos)
321{ 326{
322 int ret; 327 int ret;
323 328
324 mutex_lock(&stack_sysctl_mutex); 329 mutex_lock(&stack_sysctl_mutex);
325 330
326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 331 ret = proc_dointvec(table, write, buffer, lenp, ppos);
327 332
328 if (ret || !write || 333 if (ret || !write ||
329 (last_stack_tracer_enabled == stack_tracer_enabled)) 334 (last_stack_tracer_enabled == !!stack_tracer_enabled))
330 goto out; 335 goto out;
331 336
332 last_stack_tracer_enabled = stack_tracer_enabled; 337 last_stack_tracer_enabled = !!stack_tracer_enabled;
333 338
334 if (stack_tracer_enabled) 339 if (stack_tracer_enabled)
335 register_ftrace_function(&trace_ops); 340 register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index c00643733f4c..96cffb269e73 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -10,6 +10,7 @@
10 10
11 11
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/slab.h>
13#include <linux/rbtree.h> 14#include <linux/rbtree.h>
14#include <linux/debugfs.h> 15#include <linux/debugfs.h>
15#include "trace_stat.h" 16#include "trace_stat.h"
@@ -49,7 +50,8 @@ static struct dentry *stat_dir;
49 * but it will at least advance closer to the next one 50 * but it will at least advance closer to the next one
50 * to be released. 51 * to be released.
51 */ 52 */
52static struct rb_node *release_next(struct rb_node *node) 53static struct rb_node *release_next(struct tracer_stat *ts,
54 struct rb_node *node)
53{ 55{
54 struct stat_node *snode; 56 struct stat_node *snode;
55 struct rb_node *parent = rb_parent(node); 57 struct rb_node *parent = rb_parent(node);
@@ -67,26 +69,35 @@ static struct rb_node *release_next(struct rb_node *node)
67 parent->rb_right = NULL; 69 parent->rb_right = NULL;
68 70
69 snode = container_of(node, struct stat_node, node); 71 snode = container_of(node, struct stat_node, node);
72 if (ts->stat_release)
73 ts->stat_release(snode->stat);
70 kfree(snode); 74 kfree(snode);
71 75
72 return parent; 76 return parent;
73 } 77 }
74} 78}
75 79
76static void reset_stat_session(struct stat_session *session) 80static void __reset_stat_session(struct stat_session *session)
77{ 81{
78 struct rb_node *node = session->stat_root.rb_node; 82 struct rb_node *node = session->stat_root.rb_node;
79 83
80 while (node) 84 while (node)
81 node = release_next(node); 85 node = release_next(session->ts, node);
82 86
83 session->stat_root = RB_ROOT; 87 session->stat_root = RB_ROOT;
84} 88}
85 89
90static void reset_stat_session(struct stat_session *session)
91{
92 mutex_lock(&session->stat_mutex);
93 __reset_stat_session(session);
94 mutex_unlock(&session->stat_mutex);
95}
96
86static void destroy_session(struct stat_session *session) 97static void destroy_session(struct stat_session *session)
87{ 98{
88 debugfs_remove(session->file); 99 debugfs_remove(session->file);
89 reset_stat_session(session); 100 __reset_stat_session(session);
90 mutex_destroy(&session->stat_mutex); 101 mutex_destroy(&session->stat_mutex);
91 kfree(session); 102 kfree(session);
92} 103}
@@ -150,7 +161,7 @@ static int stat_seq_init(struct stat_session *session)
150 int i; 161 int i;
151 162
152 mutex_lock(&session->stat_mutex); 163 mutex_lock(&session->stat_mutex);
153 reset_stat_session(session); 164 __reset_stat_session(session);
154 165
155 if (!ts->stat_cmp) 166 if (!ts->stat_cmp)
156 ts->stat_cmp = dummy_cmp; 167 ts->stat_cmp = dummy_cmp;
@@ -183,7 +194,7 @@ exit:
183 return ret; 194 return ret;
184 195
185exit_free_rbtree: 196exit_free_rbtree:
186 reset_stat_session(session); 197 __reset_stat_session(session);
187 mutex_unlock(&session->stat_mutex); 198 mutex_unlock(&session->stat_mutex);
188 return ret; 199 return ret;
189} 200}
@@ -193,23 +204,23 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
193{ 204{
194 struct stat_session *session = s->private; 205 struct stat_session *session = s->private;
195 struct rb_node *node; 206 struct rb_node *node;
207 int n = *pos;
196 int i; 208 int i;
197 209
198 /* Prevent from tracer switch or rbtree modification */ 210 /* Prevent from tracer switch or rbtree modification */
199 mutex_lock(&session->stat_mutex); 211 mutex_lock(&session->stat_mutex);
200 212
201 /* If we are in the beginning of the file, print the headers */ 213 /* If we are in the beginning of the file, print the headers */
202 if (!*pos && session->ts->stat_headers) { 214 if (session->ts->stat_headers) {
203 (*pos)++; 215 if (n == 0)
204 return SEQ_START_TOKEN; 216 return SEQ_START_TOKEN;
217 n--;
205 } 218 }
206 219
207 node = rb_first(&session->stat_root); 220 node = rb_first(&session->stat_root);
208 for (i = 0; node && i < *pos; i++) 221 for (i = 0; node && i < n; i++)
209 node = rb_next(node); 222 node = rb_next(node);
210 223
211 (*pos)++;
212
213 return node; 224 return node;
214} 225}
215 226
@@ -254,16 +265,21 @@ static const struct seq_operations trace_stat_seq_ops = {
254static int tracing_stat_open(struct inode *inode, struct file *file) 265static int tracing_stat_open(struct inode *inode, struct file *file)
255{ 266{
256 int ret; 267 int ret;
257 268 struct seq_file *m;
258 struct stat_session *session = inode->i_private; 269 struct stat_session *session = inode->i_private;
259 270
271 ret = stat_seq_init(session);
272 if (ret)
273 return ret;
274
260 ret = seq_open(file, &trace_stat_seq_ops); 275 ret = seq_open(file, &trace_stat_seq_ops);
261 if (!ret) { 276 if (ret) {
262 struct seq_file *m = file->private_data; 277 reset_stat_session(session);
263 m->private = session; 278 return ret;
264 ret = stat_seq_init(session);
265 } 279 }
266 280
281 m = file->private_data;
282 m->private = session;
267 return ret; 283 return ret;
268} 284}
269 285
@@ -274,11 +290,9 @@ static int tracing_stat_release(struct inode *i, struct file *f)
274{ 290{
275 struct stat_session *session = i->i_private; 291 struct stat_session *session = i->i_private;
276 292
277 mutex_lock(&session->stat_mutex);
278 reset_stat_session(session); 293 reset_stat_session(session);
279 mutex_unlock(&session->stat_mutex);
280 294
281 return 0; 295 return seq_release(i, f);
282} 296}
283 297
284static const struct file_operations tracing_stat_fops = { 298static const struct file_operations tracing_stat_fops = {
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index f3546a2cd826..8f03914b9a6a 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -18,6 +18,8 @@ struct tracer_stat {
18 int (*stat_cmp)(void *p1, void *p2); 18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */ 19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p); 20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Release an entry */
22 void (*stat_release)(void *stat);
21 /* Print the headers of your stat entries */ 23 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s); 24 int (*stat_headers)(struct seq_file *s);
23}; 25};
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5e579645ac86..bac752f0cfb5 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,33 +1,109 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h>
3#include <linux/slab.h>
2#include <linux/kernel.h> 4#include <linux/kernel.h>
5#include <linux/ftrace.h>
6#include <linux/perf_event.h>
3#include <asm/syscall.h> 7#include <asm/syscall.h>
4 8
5#include "trace_output.h" 9#include "trace_output.h"
6#include "trace.h" 10#include "trace.h"
7 11
8/* Keep a counter of the syscall tracing users */
9static int refcount;
10
11/* Prevent from races on thread flags toggling */
12static DEFINE_MUTEX(syscall_trace_lock); 12static DEFINE_MUTEX(syscall_trace_lock);
13static int sys_refcount_enter;
14static int sys_refcount_exit;
15static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17
18static int syscall_enter_register(struct ftrace_event_call *event,
19 enum trace_reg type);
20static int syscall_exit_register(struct ftrace_event_call *event,
21 enum trace_reg type);
22
23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25
26/* All syscall exit events have the same fields */
27static LIST_HEAD(syscall_exit_fields);
28
29static struct list_head *
30syscall_get_enter_fields(struct ftrace_event_call *call)
31{
32 struct syscall_metadata *entry = call->data;
33
34 return &entry->enter_fields;
35}
36
37static struct list_head *
38syscall_get_exit_fields(struct ftrace_event_call *call)
39{
40 return &syscall_exit_fields;
41}
42
43struct trace_event_functions enter_syscall_print_funcs = {
44 .trace = print_syscall_enter,
45};
13 46
14/* Option to display the parameters types */ 47struct trace_event_functions exit_syscall_print_funcs = {
15enum { 48 .trace = print_syscall_exit,
16 TRACE_SYSCALLS_OPT_TYPES = 0x1,
17}; 49};
18 50
19static struct tracer_opt syscalls_opts[] = { 51struct ftrace_event_class event_class_syscall_enter = {
20 { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) }, 52 .system = "syscalls",
21 { } 53 .reg = syscall_enter_register,
54 .define_fields = syscall_enter_define_fields,
55 .get_fields = syscall_get_enter_fields,
56 .raw_init = init_syscall_trace,
22}; 57};
23 58
24static struct tracer_flags syscalls_flags = { 59struct ftrace_event_class event_class_syscall_exit = {
25 .val = 0, /* By default: no parameters types */ 60 .system = "syscalls",
26 .opts = syscalls_opts 61 .reg = syscall_exit_register,
62 .define_fields = syscall_exit_define_fields,
63 .get_fields = syscall_get_exit_fields,
64 .raw_init = init_syscall_trace,
27}; 65};
28 66
67extern unsigned long __start_syscalls_metadata[];
68extern unsigned long __stop_syscalls_metadata[];
69
70static struct syscall_metadata **syscalls_metadata;
71
72static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
73{
74 struct syscall_metadata *start;
75 struct syscall_metadata *stop;
76 char str[KSYM_SYMBOL_LEN];
77
78
79 start = (struct syscall_metadata *)__start_syscalls_metadata;
80 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
81 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
82
83 for ( ; start < stop; start++) {
84 /*
85 * Only compare after the "sys" prefix. Archs that use
86 * syscall wrappers may have syscalls symbols aliases prefixed
87 * with "SyS" instead of "sys", leading to an unwanted
88 * mismatch.
89 */
90 if (start->name && !strcmp(start->name + 3, str + 3))
91 return start;
92 }
93 return NULL;
94}
95
96static struct syscall_metadata *syscall_nr_to_meta(int nr)
97{
98 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
99 return NULL;
100
101 return syscalls_metadata[nr];
102}
103
29enum print_line_t 104enum print_line_t
30print_syscall_enter(struct trace_iterator *iter, int flags) 105print_syscall_enter(struct trace_iterator *iter, int flags,
106 struct trace_event *event)
31{ 107{
32 struct trace_seq *s = &iter->seq; 108 struct trace_seq *s = &iter->seq;
33 struct trace_entry *ent = iter->ent; 109 struct trace_entry *ent = iter->ent;
@@ -35,40 +111,52 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
35 struct syscall_metadata *entry; 111 struct syscall_metadata *entry;
36 int i, ret, syscall; 112 int i, ret, syscall;
37 113
38 trace_assign_type(trace, ent); 114 trace = (typeof(trace))ent;
39
40 syscall = trace->nr; 115 syscall = trace->nr;
41
42 entry = syscall_nr_to_meta(syscall); 116 entry = syscall_nr_to_meta(syscall);
117
43 if (!entry) 118 if (!entry)
44 goto end; 119 goto end;
45 120
121 if (entry->enter_event->event.type != ent->type) {
122 WARN_ON_ONCE(1);
123 goto end;
124 }
125
46 ret = trace_seq_printf(s, "%s(", entry->name); 126 ret = trace_seq_printf(s, "%s(", entry->name);
47 if (!ret) 127 if (!ret)
48 return TRACE_TYPE_PARTIAL_LINE; 128 return TRACE_TYPE_PARTIAL_LINE;
49 129
50 for (i = 0; i < entry->nb_args; i++) { 130 for (i = 0; i < entry->nb_args; i++) {
51 /* parameter types */ 131 /* parameter types */
52 if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { 132 if (trace_flags & TRACE_ITER_VERBOSE) {
53 ret = trace_seq_printf(s, "%s ", entry->types[i]); 133 ret = trace_seq_printf(s, "%s ", entry->types[i]);
54 if (!ret) 134 if (!ret)
55 return TRACE_TYPE_PARTIAL_LINE; 135 return TRACE_TYPE_PARTIAL_LINE;
56 } 136 }
57 /* parameter values */ 137 /* parameter values */
58 ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], 138 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
59 trace->args[i], 139 trace->args[i],
60 i == entry->nb_args - 1 ? ")" : ","); 140 i == entry->nb_args - 1 ? "" : ", ");
61 if (!ret) 141 if (!ret)
62 return TRACE_TYPE_PARTIAL_LINE; 142 return TRACE_TYPE_PARTIAL_LINE;
63 } 143 }
64 144
145 ret = trace_seq_putc(s, ')');
146 if (!ret)
147 return TRACE_TYPE_PARTIAL_LINE;
148
65end: 149end:
66 trace_seq_printf(s, "\n"); 150 ret = trace_seq_putc(s, '\n');
151 if (!ret)
152 return TRACE_TYPE_PARTIAL_LINE;
153
67 return TRACE_TYPE_HANDLED; 154 return TRACE_TYPE_HANDLED;
68} 155}
69 156
70enum print_line_t 157enum print_line_t
71print_syscall_exit(struct trace_iterator *iter, int flags) 158print_syscall_exit(struct trace_iterator *iter, int flags,
159 struct trace_event *event)
72{ 160{
73 struct trace_seq *s = &iter->seq; 161 struct trace_seq *s = &iter->seq;
74 struct trace_entry *ent = iter->ent; 162 struct trace_entry *ent = iter->ent;
@@ -77,16 +165,20 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
77 struct syscall_metadata *entry; 165 struct syscall_metadata *entry;
78 int ret; 166 int ret;
79 167
80 trace_assign_type(trace, ent); 168 trace = (typeof(trace))ent;
81
82 syscall = trace->nr; 169 syscall = trace->nr;
83
84 entry = syscall_nr_to_meta(syscall); 170 entry = syscall_nr_to_meta(syscall);
171
85 if (!entry) { 172 if (!entry) {
86 trace_seq_printf(s, "\n"); 173 trace_seq_printf(s, "\n");
87 return TRACE_TYPE_HANDLED; 174 return TRACE_TYPE_HANDLED;
88 } 175 }
89 176
177 if (entry->exit_event->event.type != ent->type) {
178 WARN_ON_ONCE(1);
179 return TRACE_TYPE_UNHANDLED;
180 }
181
90 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 182 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
91 trace->ret); 183 trace->ret);
92 if (!ret) 184 if (!ret)
@@ -95,62 +187,127 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
95 return TRACE_TYPE_HANDLED; 187 return TRACE_TYPE_HANDLED;
96} 188}
97 189
98void start_ftrace_syscalls(void) 190extern char *__bad_type_size(void);
191
192#define SYSCALL_FIELD(type, name) \
193 sizeof(type) != sizeof(trace.name) ? \
194 __bad_type_size() : \
195 #type, #name, offsetof(typeof(trace), name), \
196 sizeof(trace.name), is_signed_type(type)
197
198static
199int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
99{ 200{
100 unsigned long flags; 201 int i;
101 struct task_struct *g, *t; 202 int pos = 0;
102 203
103 mutex_lock(&syscall_trace_lock); 204 /* When len=0, we just calculate the needed length */
205#define LEN_OR_ZERO (len ? len - pos : 0)
206
207 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
208 for (i = 0; i < entry->nb_args; i++) {
209 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
210 entry->args[i], sizeof(unsigned long),
211 i == entry->nb_args - 1 ? "" : ", ");
212 }
213 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
214
215 for (i = 0; i < entry->nb_args; i++) {
216 pos += snprintf(buf + pos, LEN_OR_ZERO,
217 ", ((unsigned long)(REC->%s))", entry->args[i]);
218 }
104 219
105 /* Don't enable the flag on the tasks twice */ 220#undef LEN_OR_ZERO
106 if (++refcount != 1)
107 goto unlock;
108 221
109 arch_init_ftrace_syscalls(); 222 /* return the length of print_fmt */
110 read_lock_irqsave(&tasklist_lock, flags); 223 return pos;
224}
111 225
112 do_each_thread(g, t) { 226static int set_syscall_print_fmt(struct ftrace_event_call *call)
113 set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); 227{
114 } while_each_thread(g, t); 228 char *print_fmt;
229 int len;
230 struct syscall_metadata *entry = call->data;
115 231
116 read_unlock_irqrestore(&tasklist_lock, flags); 232 if (entry->enter_event != call) {
233 call->print_fmt = "\"0x%lx\", REC->ret";
234 return 0;
235 }
117 236
118unlock: 237 /* First: called with 0 length to calculate the needed length */
119 mutex_unlock(&syscall_trace_lock); 238 len = __set_enter_print_fmt(entry, NULL, 0);
239
240 print_fmt = kmalloc(len + 1, GFP_KERNEL);
241 if (!print_fmt)
242 return -ENOMEM;
243
244 /* Second: actually write the @print_fmt */
245 __set_enter_print_fmt(entry, print_fmt, len + 1);
246 call->print_fmt = print_fmt;
247
248 return 0;
120} 249}
121 250
122void stop_ftrace_syscalls(void) 251static void free_syscall_print_fmt(struct ftrace_event_call *call)
123{ 252{
124 unsigned long flags; 253 struct syscall_metadata *entry = call->data;
125 struct task_struct *g, *t;
126 254
127 mutex_lock(&syscall_trace_lock); 255 if (entry->enter_event == call)
256 kfree(call->print_fmt);
257}
128 258
129 /* There are perhaps still some users */ 259static int syscall_enter_define_fields(struct ftrace_event_call *call)
130 if (--refcount) 260{
131 goto unlock; 261 struct syscall_trace_enter trace;
262 struct syscall_metadata *meta = call->data;
263 int ret;
264 int i;
265 int offset = offsetof(typeof(trace), args);
266
267 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
268 if (ret)
269 return ret;
270
271 for (i = 0; i < meta->nb_args; i++) {
272 ret = trace_define_field(call, meta->types[i],
273 meta->args[i], offset,
274 sizeof(unsigned long), 0,
275 FILTER_OTHER);
276 offset += sizeof(unsigned long);
277 }
132 278
133 read_lock_irqsave(&tasklist_lock, flags); 279 return ret;
280}
134 281
135 do_each_thread(g, t) { 282static int syscall_exit_define_fields(struct ftrace_event_call *call)
136 clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); 283{
137 } while_each_thread(g, t); 284 struct syscall_trace_exit trace;
285 int ret;
138 286
139 read_unlock_irqrestore(&tasklist_lock, flags); 287 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
288 if (ret)
289 return ret;
140 290
141unlock: 291 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
142 mutex_unlock(&syscall_trace_lock); 292 FILTER_OTHER);
293
294 return ret;
143} 295}
144 296
145void ftrace_syscall_enter(struct pt_regs *regs) 297void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
146{ 298{
147 struct syscall_trace_enter *entry; 299 struct syscall_trace_enter *entry;
148 struct syscall_metadata *sys_data; 300 struct syscall_metadata *sys_data;
149 struct ring_buffer_event *event; 301 struct ring_buffer_event *event;
302 struct ring_buffer *buffer;
150 int size; 303 int size;
151 int syscall_nr; 304 int syscall_nr;
152 305
153 syscall_nr = syscall_get_nr(current, regs); 306 syscall_nr = syscall_get_nr(current, regs);
307 if (syscall_nr < 0)
308 return;
309 if (!test_bit(syscall_nr, enabled_enter_syscalls))
310 return;
154 311
155 sys_data = syscall_nr_to_meta(syscall_nr); 312 sys_data = syscall_nr_to_meta(syscall_nr);
156 if (!sys_data) 313 if (!sys_data)
@@ -158,8 +315,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
158 315
159 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 316 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
160 317
161 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, 318 event = trace_current_buffer_lock_reserve(&buffer,
162 0, 0); 319 sys_data->enter_event->event.type, size, 0, 0);
163 if (!event) 320 if (!event)
164 return; 321 return;
165 322
@@ -167,25 +324,31 @@ void ftrace_syscall_enter(struct pt_regs *regs)
167 entry->nr = syscall_nr; 324 entry->nr = syscall_nr;
168 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 325 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
169 326
170 trace_current_buffer_unlock_commit(event, 0, 0); 327 if (!filter_current_check_discard(buffer, sys_data->enter_event,
171 trace_wake_up(); 328 entry, event))
329 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
172} 330}
173 331
174void ftrace_syscall_exit(struct pt_regs *regs) 332void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
175{ 333{
176 struct syscall_trace_exit *entry; 334 struct syscall_trace_exit *entry;
177 struct syscall_metadata *sys_data; 335 struct syscall_metadata *sys_data;
178 struct ring_buffer_event *event; 336 struct ring_buffer_event *event;
337 struct ring_buffer *buffer;
179 int syscall_nr; 338 int syscall_nr;
180 339
181 syscall_nr = syscall_get_nr(current, regs); 340 syscall_nr = syscall_get_nr(current, regs);
341 if (syscall_nr < 0)
342 return;
343 if (!test_bit(syscall_nr, enabled_exit_syscalls))
344 return;
182 345
183 sys_data = syscall_nr_to_meta(syscall_nr); 346 sys_data = syscall_nr_to_meta(syscall_nr);
184 if (!sys_data) 347 if (!sys_data)
185 return; 348 return;
186 349
187 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, 350 event = trace_current_buffer_lock_reserve(&buffer,
188 sizeof(*entry), 0, 0); 351 sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
189 if (!event) 352 if (!event)
190 return; 353 return;
191 354
@@ -193,58 +356,325 @@ void ftrace_syscall_exit(struct pt_regs *regs)
193 entry->nr = syscall_nr; 356 entry->nr = syscall_nr;
194 entry->ret = syscall_get_return_value(current, regs); 357 entry->ret = syscall_get_return_value(current, regs);
195 358
196 trace_current_buffer_unlock_commit(event, 0, 0); 359 if (!filter_current_check_discard(buffer, sys_data->exit_event,
197 trace_wake_up(); 360 entry, event))
361 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
198} 362}
199 363
200static int init_syscall_tracer(struct trace_array *tr) 364int reg_event_syscall_enter(struct ftrace_event_call *call)
201{ 365{
202 start_ftrace_syscalls(); 366 int ret = 0;
367 int num;
368
369 num = ((struct syscall_metadata *)call->data)->syscall_nr;
370 if (num < 0 || num >= NR_syscalls)
371 return -ENOSYS;
372 mutex_lock(&syscall_trace_lock);
373 if (!sys_refcount_enter)
374 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
375 if (!ret) {
376 set_bit(num, enabled_enter_syscalls);
377 sys_refcount_enter++;
378 }
379 mutex_unlock(&syscall_trace_lock);
380 return ret;
381}
382
383void unreg_event_syscall_enter(struct ftrace_event_call *call)
384{
385 int num;
386
387 num = ((struct syscall_metadata *)call->data)->syscall_nr;
388 if (num < 0 || num >= NR_syscalls)
389 return;
390 mutex_lock(&syscall_trace_lock);
391 sys_refcount_enter--;
392 clear_bit(num, enabled_enter_syscalls);
393 if (!sys_refcount_enter)
394 unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
395 mutex_unlock(&syscall_trace_lock);
396}
397
398int reg_event_syscall_exit(struct ftrace_event_call *call)
399{
400 int ret = 0;
401 int num;
402
403 num = ((struct syscall_metadata *)call->data)->syscall_nr;
404 if (num < 0 || num >= NR_syscalls)
405 return -ENOSYS;
406 mutex_lock(&syscall_trace_lock);
407 if (!sys_refcount_exit)
408 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
409 if (!ret) {
410 set_bit(num, enabled_exit_syscalls);
411 sys_refcount_exit++;
412 }
413 mutex_unlock(&syscall_trace_lock);
414 return ret;
415}
416
417void unreg_event_syscall_exit(struct ftrace_event_call *call)
418{
419 int num;
420
421 num = ((struct syscall_metadata *)call->data)->syscall_nr;
422 if (num < 0 || num >= NR_syscalls)
423 return;
424 mutex_lock(&syscall_trace_lock);
425 sys_refcount_exit--;
426 clear_bit(num, enabled_exit_syscalls);
427 if (!sys_refcount_exit)
428 unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
429 mutex_unlock(&syscall_trace_lock);
430}
431
432int init_syscall_trace(struct ftrace_event_call *call)
433{
434 int id;
435
436 if (set_syscall_print_fmt(call) < 0)
437 return -ENOMEM;
438
439 id = trace_event_raw_init(call);
440
441 if (id < 0) {
442 free_syscall_print_fmt(call);
443 return id;
444 }
445
446 return id;
447}
448
449unsigned long __init arch_syscall_addr(int nr)
450{
451 return (unsigned long)sys_call_table[nr];
452}
453
454int __init init_ftrace_syscalls(void)
455{
456 struct syscall_metadata *meta;
457 unsigned long addr;
458 int i;
459
460 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
461 NR_syscalls, GFP_KERNEL);
462 if (!syscalls_metadata) {
463 WARN_ON(1);
464 return -ENOMEM;
465 }
466
467 for (i = 0; i < NR_syscalls; i++) {
468 addr = arch_syscall_addr(i);
469 meta = find_syscall_meta(addr);
470 if (!meta)
471 continue;
472
473 meta->syscall_nr = i;
474 syscalls_metadata[i] = meta;
475 }
203 476
204 return 0; 477 return 0;
205} 478}
479core_initcall(init_ftrace_syscalls);
480
481#ifdef CONFIG_PERF_EVENTS
482
483static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
484static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
485static int sys_perf_refcount_enter;
486static int sys_perf_refcount_exit;
206 487
207static void reset_syscall_tracer(struct trace_array *tr) 488static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
208{ 489{
209 stop_ftrace_syscalls(); 490 struct syscall_metadata *sys_data;
210 tracing_reset_online_cpus(tr); 491 struct syscall_trace_enter *rec;
492 struct hlist_head *head;
493 int syscall_nr;
494 int rctx;
495 int size;
496
497 syscall_nr = syscall_get_nr(current, regs);
498 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
499 return;
500
501 sys_data = syscall_nr_to_meta(syscall_nr);
502 if (!sys_data)
503 return;
504
505 /* get the size after alignment with the u32 buffer size field */
506 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
507 size = ALIGN(size + sizeof(u32), sizeof(u64));
508 size -= sizeof(u32);
509
510 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
511 "perf buffer not large enough"))
512 return;
513
514 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
515 sys_data->enter_event->event.type, regs, &rctx);
516 if (!rec)
517 return;
518
519 rec->nr = syscall_nr;
520 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
521 (unsigned long *)&rec->args);
522
523 head = this_cpu_ptr(sys_data->enter_event->perf_events);
524 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
211} 525}
212 526
213static struct trace_event syscall_enter_event = { 527int perf_sysenter_enable(struct ftrace_event_call *call)
214 .type = TRACE_SYSCALL_ENTER, 528{
215 .trace = print_syscall_enter, 529 int ret = 0;
216}; 530 int num;
217 531
218static struct trace_event syscall_exit_event = { 532 num = ((struct syscall_metadata *)call->data)->syscall_nr;
219 .type = TRACE_SYSCALL_EXIT,
220 .trace = print_syscall_exit,
221};
222 533
223static struct tracer syscall_tracer __read_mostly = { 534 mutex_lock(&syscall_trace_lock);
224 .name = "syscall", 535 if (!sys_perf_refcount_enter)
225 .init = init_syscall_tracer, 536 ret = register_trace_sys_enter(perf_syscall_enter, NULL);
226 .reset = reset_syscall_tracer, 537 if (ret) {
227 .flags = &syscalls_flags, 538 pr_info("event trace: Could not activate"
228}; 539 "syscall entry trace point");
540 } else {
541 set_bit(num, enabled_perf_enter_syscalls);
542 sys_perf_refcount_enter++;
543 }
544 mutex_unlock(&syscall_trace_lock);
545 return ret;
546}
229 547
230__init int register_ftrace_syscalls(void) 548void perf_sysenter_disable(struct ftrace_event_call *call)
231{ 549{
232 int ret; 550 int num;
233 551
234 ret = register_ftrace_event(&syscall_enter_event); 552 num = ((struct syscall_metadata *)call->data)->syscall_nr;
235 if (!ret) { 553
236 printk(KERN_WARNING "event %d failed to register\n", 554 mutex_lock(&syscall_trace_lock);
237 syscall_enter_event.type); 555 sys_perf_refcount_enter--;
238 WARN_ON_ONCE(1); 556 clear_bit(num, enabled_perf_enter_syscalls);
557 if (!sys_perf_refcount_enter)
558 unregister_trace_sys_enter(perf_syscall_enter, NULL);
559 mutex_unlock(&syscall_trace_lock);
560}
561
562static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
563{
564 struct syscall_metadata *sys_data;
565 struct syscall_trace_exit *rec;
566 struct hlist_head *head;
567 int syscall_nr;
568 int rctx;
569 int size;
570
571 syscall_nr = syscall_get_nr(current, regs);
572 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
573 return;
574
575 sys_data = syscall_nr_to_meta(syscall_nr);
576 if (!sys_data)
577 return;
578
579 /* We can probably do that at build time */
580 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
581 size -= sizeof(u32);
582
583 /*
584 * Impossible, but be paranoid with the future
585 * How to put this check outside runtime?
586 */
587 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
588 "exit event has grown above perf buffer size"))
589 return;
590
591 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
592 sys_data->exit_event->event.type, regs, &rctx);
593 if (!rec)
594 return;
595
596 rec->nr = syscall_nr;
597 rec->ret = syscall_get_return_value(current, regs);
598
599 head = this_cpu_ptr(sys_data->exit_event->perf_events);
600 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
601}
602
603int perf_sysexit_enable(struct ftrace_event_call *call)
604{
605 int ret = 0;
606 int num;
607
608 num = ((struct syscall_metadata *)call->data)->syscall_nr;
609
610 mutex_lock(&syscall_trace_lock);
611 if (!sys_perf_refcount_exit)
612 ret = register_trace_sys_exit(perf_syscall_exit, NULL);
613 if (ret) {
614 pr_info("event trace: Could not activate"
615 "syscall exit trace point");
616 } else {
617 set_bit(num, enabled_perf_exit_syscalls);
618 sys_perf_refcount_exit++;
239 } 619 }
620 mutex_unlock(&syscall_trace_lock);
621 return ret;
622}
240 623
241 ret = register_ftrace_event(&syscall_exit_event); 624void perf_sysexit_disable(struct ftrace_event_call *call)
242 if (!ret) { 625{
243 printk(KERN_WARNING "event %d failed to register\n", 626 int num;
244 syscall_exit_event.type); 627
245 WARN_ON_ONCE(1); 628 num = ((struct syscall_metadata *)call->data)->syscall_nr;
629
630 mutex_lock(&syscall_trace_lock);
631 sys_perf_refcount_exit--;
632 clear_bit(num, enabled_perf_exit_syscalls);
633 if (!sys_perf_refcount_exit)
634 unregister_trace_sys_exit(perf_syscall_exit, NULL);
635 mutex_unlock(&syscall_trace_lock);
636}
637
638#endif /* CONFIG_PERF_EVENTS */
639
640static int syscall_enter_register(struct ftrace_event_call *event,
641 enum trace_reg type)
642{
643 switch (type) {
644 case TRACE_REG_REGISTER:
645 return reg_event_syscall_enter(event);
646 case TRACE_REG_UNREGISTER:
647 unreg_event_syscall_enter(event);
648 return 0;
649
650#ifdef CONFIG_PERF_EVENTS
651 case TRACE_REG_PERF_REGISTER:
652 return perf_sysenter_enable(event);
653 case TRACE_REG_PERF_UNREGISTER:
654 perf_sysenter_disable(event);
655 return 0;
656#endif
246 } 657 }
658 return 0;
659}
247 660
248 return register_tracer(&syscall_tracer); 661static int syscall_exit_register(struct ftrace_event_call *event,
662 enum trace_reg type)
663{
664 switch (type) {
665 case TRACE_REG_REGISTER:
666 return reg_event_syscall_exit(event);
667 case TRACE_REG_UNREGISTER:
668 unreg_event_syscall_exit(event);
669 return 0;
670
671#ifdef CONFIG_PERF_EVENTS
672 case TRACE_REG_PERF_REGISTER:
673 return perf_sysexit_enable(event);
674 case TRACE_REG_PERF_UNREGISTER:
675 perf_sysexit_disable(event);
676 return 0;
677#endif
678 }
679 return 0;
249} 680}
250device_initcall(register_ftrace_syscalls);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
deleted file mode 100644
index f6693969287d..000000000000
--- a/kernel/trace/trace_sysprof.c
+++ /dev/null
@@ -1,328 +0,0 @@
1/*
2 * trace stack traces
3 *
4 * Copyright (C) 2004-2008, Soeren Sandmann
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/kallsyms.h>
9#include <linux/debugfs.h>
10#include <linux/hrtimer.h>
11#include <linux/uaccess.h>
12#include <linux/ftrace.h>
13#include <linux/module.h>
14#include <linux/irq.h>
15#include <linux/fs.h>
16
17#include <asm/stacktrace.h>
18
19#include "trace.h"
20
21static struct trace_array *sysprof_trace;
22static int __read_mostly tracer_enabled;
23
24/*
25 * 1 msec sample interval by default:
26 */
27static unsigned long sample_period = 1000000;
28static const unsigned int sample_max_depth = 512;
29
30static DEFINE_MUTEX(sample_timer_lock);
31/*
32 * Per CPU hrtimers that do the profiling:
33 */
34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35
36struct stack_frame {
37 const void __user *next_fp;
38 unsigned long return_address;
39};
40
41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
42{
43 int ret;
44
45 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
46 return 0;
47
48 ret = 1;
49 pagefault_disable();
50 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
51 ret = 0;
52 pagefault_enable();
53
54 return ret;
55}
56
57struct backtrace_info {
58 struct trace_array_cpu *data;
59 struct trace_array *tr;
60 int pos;
61};
62
63static void
64backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
65{
66 /* Ignore warnings */
67}
68
69static void backtrace_warning(void *data, char *msg)
70{
71 /* Ignore warnings */
72}
73
74static int backtrace_stack(void *data, char *name)
75{
76 /* Don't bother with IRQ stacks for now */
77 return -1;
78}
79
80static void backtrace_address(void *data, unsigned long addr, int reliable)
81{
82 struct backtrace_info *info = data;
83
84 if (info->pos < sample_max_depth && reliable) {
85 __trace_special(info->tr, info->data, 1, addr, 0);
86
87 info->pos++;
88 }
89}
90
91static const struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack,
95 .address = backtrace_address,
96};
97
98static int
99trace_kernel(struct pt_regs *regs, struct trace_array *tr,
100 struct trace_array_cpu *data)
101{
102 struct backtrace_info info;
103 unsigned long bp;
104 char *stack;
105
106 info.tr = tr;
107 info.data = data;
108 info.pos = 1;
109
110 __trace_special(info.tr, info.data, 1, regs->ip, 0);
111
112 stack = ((char *)regs + sizeof(struct pt_regs));
113#ifdef CONFIG_FRAME_POINTER
114 bp = regs->bp;
115#else
116 bp = 0;
117#endif
118
119 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
120
121 return info.pos;
122}
123
124static void timer_notify(struct pt_regs *regs, int cpu)
125{
126 struct trace_array_cpu *data;
127 struct stack_frame frame;
128 struct trace_array *tr;
129 const void __user *fp;
130 int is_user;
131 int i;
132
133 if (!regs)
134 return;
135
136 tr = sysprof_trace;
137 data = tr->data[cpu];
138 is_user = user_mode(regs);
139
140 if (!current || current->pid == 0)
141 return;
142
143 if (is_user && current->state != TASK_RUNNING)
144 return;
145
146 __trace_special(tr, data, 0, 0, current->pid);
147
148 if (!is_user)
149 i = trace_kernel(regs, tr, data);
150 else
151 i = 0;
152
153 /*
154 * Trace user stack if we are not a kernel thread
155 */
156 if (current->mm && i < sample_max_depth) {
157 regs = (struct pt_regs *)current->thread.sp0 - 1;
158
159 fp = (void __user *)regs->bp;
160
161 __trace_special(tr, data, 2, regs->ip, 0);
162
163 while (i < sample_max_depth) {
164 frame.next_fp = NULL;
165 frame.return_address = 0;
166 if (!copy_stack_frame(fp, &frame))
167 break;
168 if ((unsigned long)fp < regs->sp)
169 break;
170
171 __trace_special(tr, data, 2, frame.return_address,
172 (unsigned long)fp);
173 fp = frame.next_fp;
174
175 i++;
176 }
177
178 }
179
180 /*
181 * Special trace entry if we overflow the max depth:
182 */
183 if (i == sample_max_depth)
184 __trace_special(tr, data, -1, -1, -1);
185
186 __trace_special(tr, data, 3, current->pid, i);
187}
188
189static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
190{
191 /* trace here */
192 timer_notify(get_irq_regs(), smp_processor_id());
193
194 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
195
196 return HRTIMER_RESTART;
197}
198
199static void start_stack_timer(void *unused)
200{
201 struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn;
205
206 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
207 HRTIMER_MODE_REL_PINNED);
208}
209
210static void start_stack_timers(void)
211{
212 on_each_cpu(start_stack_timer, NULL, 1);
213}
214
215static void stop_stack_timer(int cpu)
216{
217 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
218
219 hrtimer_cancel(hrtimer);
220}
221
222static void stop_stack_timers(void)
223{
224 int cpu;
225
226 for_each_online_cpu(cpu)
227 stop_stack_timer(cpu);
228}
229
230static void stop_stack_trace(struct trace_array *tr)
231{
232 mutex_lock(&sample_timer_lock);
233 stop_stack_timers();
234 tracer_enabled = 0;
235 mutex_unlock(&sample_timer_lock);
236}
237
238static int stack_trace_init(struct trace_array *tr)
239{
240 sysprof_trace = tr;
241
242 tracing_start_cmdline_record();
243
244 mutex_lock(&sample_timer_lock);
245 start_stack_timers();
246 tracer_enabled = 1;
247 mutex_unlock(&sample_timer_lock);
248 return 0;
249}
250
251static void stack_trace_reset(struct trace_array *tr)
252{
253 tracing_stop_cmdline_record();
254 stop_stack_trace(tr);
255}
256
257static struct tracer stack_trace __read_mostly =
258{
259 .name = "sysprof",
260 .init = stack_trace_init,
261 .reset = stack_trace_reset,
262#ifdef CONFIG_FTRACE_SELFTEST
263 .selftest = trace_selftest_startup_sysprof,
264#endif
265};
266
267__init static int init_stack_trace(void)
268{
269 return register_tracer(&stack_trace);
270}
271device_initcall(init_stack_trace);
272
273#define MAX_LONG_DIGITS 22
274
275static ssize_t
276sysprof_sample_read(struct file *filp, char __user *ubuf,
277 size_t cnt, loff_t *ppos)
278{
279 char buf[MAX_LONG_DIGITS];
280 int r;
281
282 r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
283
284 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
285}
286
287static ssize_t
288sysprof_sample_write(struct file *filp, const char __user *ubuf,
289 size_t cnt, loff_t *ppos)
290{
291 char buf[MAX_LONG_DIGITS];
292 unsigned long val;
293
294 if (cnt > MAX_LONG_DIGITS-1)
295 cnt = MAX_LONG_DIGITS-1;
296
297 if (copy_from_user(&buf, ubuf, cnt))
298 return -EFAULT;
299
300 buf[cnt] = 0;
301
302 val = simple_strtoul(buf, NULL, 10);
303 /*
304 * Enforce a minimum sample period of 100 usecs:
305 */
306 if (val < 100)
307 val = 100;
308
309 mutex_lock(&sample_timer_lock);
310 stop_stack_timers();
311 sample_period = val * 1000;
312 start_stack_timers();
313 mutex_unlock(&sample_timer_lock);
314
315 return cnt;
316}
317
318static const struct file_operations sysprof_sample_fops = {
319 .read = sysprof_sample_read,
320 .write = sysprof_sample_write,
321};
322
323void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
324{
325
326 trace_create_file("sysprof_sample_period", 0644,
327 d_tracer, NULL, &sysprof_sample_fops);
328}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 97fcea4acce1..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,8 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/slab.h>
13#include <linux/kref.h>
12#include "trace_stat.h" 14#include "trace_stat.h"
13#include "trace.h" 15#include "trace.h"
14 16
@@ -16,6 +18,7 @@
16/* A cpu workqueue thread */ 18/* A cpu workqueue thread */
17struct cpu_workqueue_stats { 19struct cpu_workqueue_stats {
18 struct list_head list; 20 struct list_head list;
21 struct kref kref;
19 int cpu; 22 int cpu;
20 pid_t pid; 23 pid_t pid;
21/* Can be inserted from interrupt or user context, need to be atomic */ 24/* Can be inserted from interrupt or user context, need to be atomic */
@@ -39,9 +42,15 @@ struct workqueue_global_stats {
39static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); 42static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
40#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) 43#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
41 44
45static void cpu_workqueue_stat_free(struct kref *kref)
46{
47 kfree(container_of(kref, struct cpu_workqueue_stats, kref));
48}
49
42/* Insertion of a work */ 50/* Insertion of a work */
43static void 51static void
44probe_workqueue_insertion(struct task_struct *wq_thread, 52probe_workqueue_insertion(void *ignore,
53 struct task_struct *wq_thread,
45 struct work_struct *work) 54 struct work_struct *work)
46{ 55{
47 int cpu = cpumask_first(&wq_thread->cpus_allowed); 56 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -62,7 +71,8 @@ found:
62 71
63/* Execution of a work */ 72/* Execution of a work */
64static void 73static void
65probe_workqueue_execution(struct task_struct *wq_thread, 74probe_workqueue_execution(void *ignore,
75 struct task_struct *wq_thread,
66 struct work_struct *work) 76 struct work_struct *work)
67{ 77{
68 int cpu = cpumask_first(&wq_thread->cpus_allowed); 78 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -82,7 +92,8 @@ found:
82} 92}
83 93
84/* Creation of a cpu workqueue thread */ 94/* Creation of a cpu workqueue thread */
85static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) 95static void probe_workqueue_creation(void *ignore,
96 struct task_struct *wq_thread, int cpu)
86{ 97{
87 struct cpu_workqueue_stats *cws; 98 struct cpu_workqueue_stats *cws;
88 unsigned long flags; 99 unsigned long flags;
@@ -96,8 +107,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
96 return; 107 return;
97 } 108 }
98 INIT_LIST_HEAD(&cws->list); 109 INIT_LIST_HEAD(&cws->list);
110 kref_init(&cws->kref);
99 cws->cpu = cpu; 111 cws->cpu = cpu;
100
101 cws->pid = wq_thread->pid; 112 cws->pid = wq_thread->pid;
102 113
103 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 114 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
@@ -106,7 +117,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
106} 117}
107 118
108/* Destruction of a cpu workqueue thread */ 119/* Destruction of a cpu workqueue thread */
109static void probe_workqueue_destruction(struct task_struct *wq_thread) 120static void
121probe_workqueue_destruction(void *ignore, struct task_struct *wq_thread)
110{ 122{
111 /* Workqueue only execute on one cpu */ 123 /* Workqueue only execute on one cpu */
112 int cpu = cpumask_first(&wq_thread->cpus_allowed); 124 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -118,7 +130,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
118 list) { 130 list) {
119 if (node->pid == wq_thread->pid) { 131 if (node->pid == wq_thread->pid) {
120 list_del(&node->list); 132 list_del(&node->list);
121 kfree(node); 133 kref_put(&node->kref, cpu_workqueue_stat_free);
122 goto found; 134 goto found;
123 } 135 }
124 } 136 }
@@ -137,9 +149,11 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
137 149
138 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 150 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
139 151
140 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) 152 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
141 ret = list_entry(workqueue_cpu_stat(cpu)->list.next, 153 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
142 struct cpu_workqueue_stats, list); 154 struct cpu_workqueue_stats, list);
155 kref_get(&ret->kref);
156 }
143 157
144 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 158 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
145 159
@@ -162,9 +176,9 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
162static void *workqueue_stat_next(void *prev, int idx) 176static void *workqueue_stat_next(void *prev, int idx)
163{ 177{
164 struct cpu_workqueue_stats *prev_cws = prev; 178 struct cpu_workqueue_stats *prev_cws = prev;
179 struct cpu_workqueue_stats *ret;
165 int cpu = prev_cws->cpu; 180 int cpu = prev_cws->cpu;
166 unsigned long flags; 181 unsigned long flags;
167 void *ret = NULL;
168 182
169 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 183 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
170 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { 184 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
@@ -175,11 +189,14 @@ static void *workqueue_stat_next(void *prev, int idx)
175 return NULL; 189 return NULL;
176 } while (!(ret = workqueue_stat_start_cpu(cpu))); 190 } while (!(ret = workqueue_stat_start_cpu(cpu)));
177 return ret; 191 return ret;
192 } else {
193 ret = list_entry(prev_cws->list.next,
194 struct cpu_workqueue_stats, list);
195 kref_get(&ret->kref);
178 } 196 }
179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 197 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
180 198
181 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, 199 return ret;
182 list);
183} 200}
184 201
185static int workqueue_stat_show(struct seq_file *s, void *p) 202static int workqueue_stat_show(struct seq_file *s, void *p)
@@ -203,6 +220,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
203 return 0; 220 return 0;
204} 221}
205 222
223static void workqueue_stat_release(void *stat)
224{
225 struct cpu_workqueue_stats *node = stat;
226
227 kref_put(&node->kref, cpu_workqueue_stat_free);
228}
229
206static int workqueue_stat_headers(struct seq_file *s) 230static int workqueue_stat_headers(struct seq_file *s)
207{ 231{
208 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 232 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
@@ -215,6 +239,7 @@ struct tracer_stat workqueue_stats __read_mostly = {
215 .stat_start = workqueue_stat_start, 239 .stat_start = workqueue_stat_start,
216 .stat_next = workqueue_stat_next, 240 .stat_next = workqueue_stat_next,
217 .stat_show = workqueue_stat_show, 241 .stat_show = workqueue_stat_show,
242 .stat_release = workqueue_stat_release,
218 .stat_headers = workqueue_stat_headers 243 .stat_headers = workqueue_stat_headers
219}; 244};
220 245
@@ -238,35 +263,35 @@ int __init trace_workqueue_early_init(void)
238{ 263{
239 int ret, cpu; 264 int ret, cpu;
240 265
241 ret = register_trace_workqueue_insertion(probe_workqueue_insertion); 266 for_each_possible_cpu(cpu) {
267 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
268 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
269 }
270
271 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
242 if (ret) 272 if (ret)
243 goto out; 273 goto out;
244 274
245 ret = register_trace_workqueue_execution(probe_workqueue_execution); 275 ret = register_trace_workqueue_execution(probe_workqueue_execution, NULL);
246 if (ret) 276 if (ret)
247 goto no_insertion; 277 goto no_insertion;
248 278
249 ret = register_trace_workqueue_creation(probe_workqueue_creation); 279 ret = register_trace_workqueue_creation(probe_workqueue_creation, NULL);
250 if (ret) 280 if (ret)
251 goto no_execution; 281 goto no_execution;
252 282
253 ret = register_trace_workqueue_destruction(probe_workqueue_destruction); 283 ret = register_trace_workqueue_destruction(probe_workqueue_destruction, NULL);
254 if (ret) 284 if (ret)
255 goto no_creation; 285 goto no_creation;
256 286
257 for_each_possible_cpu(cpu) {
258 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
259 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
260 }
261
262 return 0; 287 return 0;
263 288
264no_creation: 289no_creation:
265 unregister_trace_workqueue_creation(probe_workqueue_creation); 290 unregister_trace_workqueue_creation(probe_workqueue_creation, NULL);
266no_execution: 291no_execution:
267 unregister_trace_workqueue_execution(probe_workqueue_execution); 292 unregister_trace_workqueue_execution(probe_workqueue_execution, NULL);
268no_insertion: 293no_insertion:
269 unregister_trace_workqueue_insertion(probe_workqueue_insertion); 294 unregister_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
270out: 295out:
271 pr_warning("trace_workqueue: unable to trace workqueues\n"); 296 pr_warning("trace_workqueue: unable to trace workqueues\n");
272 297