diff options
author | Paul Mundt <lethal@linux-sh.org> | 2011-01-13 01:06:28 -0500 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2011-01-13 01:06:28 -0500 |
commit | f43dc23d5ea91fca257be02138a255f02d98e806 (patch) | |
tree | b29722f6e965316e90ac97abf79923ced250dc21 /kernel/trace | |
parent | f8e53553f452dcbf67cb89c8cba63a1cd6eb4cc0 (diff) | |
parent | 4162cf64973df51fc885825bc9ca4d055891c49f (diff) |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into common/serial-rework
Conflicts:
arch/sh/kernel/cpu/sh2/setup-sh7619.c
arch/sh/kernel/cpu/sh2a/setup-mxg.c
arch/sh/kernel/cpu/sh2a/setup-sh7201.c
arch/sh/kernel/cpu/sh2a/setup-sh7203.c
arch/sh/kernel/cpu/sh2a/setup-sh7206.c
arch/sh/kernel/cpu/sh3/setup-sh7705.c
arch/sh/kernel/cpu/sh3/setup-sh770x.c
arch/sh/kernel/cpu/sh3/setup-sh7710.c
arch/sh/kernel/cpu/sh3/setup-sh7720.c
arch/sh/kernel/cpu/sh4/setup-sh4-202.c
arch/sh/kernel/cpu/sh4/setup-sh7750.c
arch/sh/kernel/cpu/sh4/setup-sh7760.c
arch/sh/kernel/cpu/sh4a/setup-sh7343.c
arch/sh/kernel/cpu/sh4a/setup-sh7366.c
arch/sh/kernel/cpu/sh4a/setup-sh7722.c
arch/sh/kernel/cpu/sh4a/setup-sh7723.c
arch/sh/kernel/cpu/sh4a/setup-sh7724.c
arch/sh/kernel/cpu/sh4a/setup-sh7763.c
arch/sh/kernel/cpu/sh4a/setup-sh7770.c
arch/sh/kernel/cpu/sh4a/setup-sh7780.c
arch/sh/kernel/cpu/sh4a/setup-sh7785.c
arch/sh/kernel/cpu/sh4a/setup-sh7786.c
arch/sh/kernel/cpu/sh4a/setup-shx3.c
arch/sh/kernel/cpu/sh5/setup-sh5.c
drivers/serial/sh-sci.c
drivers/serial/sh-sci.h
include/linux/serial_sci.h
Diffstat (limited to 'kernel/trace')
40 files changed, 9416 insertions, 5378 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1551f47e7669..14674dce77a6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -11,38 +11,48 @@ config NOP_TRACER | |||
11 | 11 | ||
12 | config HAVE_FTRACE_NMI_ENTER | 12 | config HAVE_FTRACE_NMI_ENTER |
13 | bool | 13 | bool |
14 | help | ||
15 | See Documentation/trace/ftrace-design.txt | ||
14 | 16 | ||
15 | config HAVE_FUNCTION_TRACER | 17 | config HAVE_FUNCTION_TRACER |
16 | bool | 18 | bool |
19 | help | ||
20 | See Documentation/trace/ftrace-design.txt | ||
17 | 21 | ||
18 | config HAVE_FUNCTION_GRAPH_TRACER | 22 | config HAVE_FUNCTION_GRAPH_TRACER |
19 | bool | 23 | bool |
24 | help | ||
25 | See Documentation/trace/ftrace-design.txt | ||
20 | 26 | ||
21 | config HAVE_FUNCTION_GRAPH_FP_TEST | 27 | config HAVE_FUNCTION_GRAPH_FP_TEST |
22 | bool | 28 | bool |
23 | help | 29 | help |
24 | An arch may pass in a unique value (frame pointer) to both the | 30 | See Documentation/trace/ftrace-design.txt |
25 | entering and exiting of a function. On exit, the value is compared | ||
26 | and if it does not match, then it will panic the kernel. | ||
27 | 31 | ||
28 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST | 32 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST |
29 | bool | 33 | bool |
30 | help | 34 | help |
31 | This gets selected when the arch tests the function_trace_stop | 35 | See Documentation/trace/ftrace-design.txt |
32 | variable at the mcount call site. Otherwise, this variable | ||
33 | is tested by the called function. | ||
34 | 36 | ||
35 | config HAVE_DYNAMIC_FTRACE | 37 | config HAVE_DYNAMIC_FTRACE |
36 | bool | 38 | bool |
39 | help | ||
40 | See Documentation/trace/ftrace-design.txt | ||
37 | 41 | ||
38 | config HAVE_FTRACE_MCOUNT_RECORD | 42 | config HAVE_FTRACE_MCOUNT_RECORD |
39 | bool | 43 | bool |
44 | help | ||
45 | See Documentation/trace/ftrace-design.txt | ||
40 | 46 | ||
41 | config HAVE_HW_BRANCH_TRACER | 47 | config HAVE_SYSCALL_TRACEPOINTS |
42 | bool | 48 | bool |
49 | help | ||
50 | See Documentation/trace/ftrace-design.txt | ||
43 | 51 | ||
44 | config HAVE_FTRACE_SYSCALLS | 52 | config HAVE_C_RECORDMCOUNT |
45 | bool | 53 | bool |
54 | help | ||
55 | C version of recordmcount available? | ||
46 | 56 | ||
47 | config TRACER_MAX_TRACE | 57 | config TRACER_MAX_TRACE |
48 | bool | 58 | bool |
@@ -59,16 +69,36 @@ config EVENT_TRACING | |||
59 | select CONTEXT_SWITCH_TRACER | 69 | select CONTEXT_SWITCH_TRACER |
60 | bool | 70 | bool |
61 | 71 | ||
72 | config EVENT_POWER_TRACING_DEPRECATED | ||
73 | depends on EVENT_TRACING | ||
74 | bool "Deprecated power event trace API, to be removed" | ||
75 | default y | ||
76 | help | ||
77 | Provides old power event types: | ||
78 | C-state/idle accounting events: | ||
79 | power:power_start | ||
80 | power:power_end | ||
81 | and old cpufreq accounting event: | ||
82 | power:power_frequency | ||
83 | This is for userspace compatibility | ||
84 | and will vanish after 5 kernel iterations, | ||
85 | namely 2.6.41. | ||
86 | |||
62 | config CONTEXT_SWITCH_TRACER | 87 | config CONTEXT_SWITCH_TRACER |
63 | select MARKERS | ||
64 | bool | 88 | bool |
65 | 89 | ||
90 | config RING_BUFFER_ALLOW_SWAP | ||
91 | bool | ||
92 | help | ||
93 | Allow the use of ring_buffer_swap_cpu. | ||
94 | Adds a very slight overhead to tracing when enabled. | ||
95 | |||
66 | # All tracer options should select GENERIC_TRACER. For those options that are | 96 | # All tracer options should select GENERIC_TRACER. For those options that are |
67 | # enabled by all tracers (context switch and event tracer) they select TRACING. | 97 | # enabled by all tracers (context switch and event tracer) they select TRACING. |
68 | # This allows those options to appear when no other tracer is selected. But the | 98 | # This allows those options to appear when no other tracer is selected. But the |
69 | # options do not appear when something else selects it. We need the two options | 99 | # options do not appear when something else selects it. We need the two options |
70 | # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the | 100 | # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the |
71 | # hidding of the automatic options options. | 101 | # hiding of the automatic options. |
72 | 102 | ||
73 | config TRACING | 103 | config TRACING |
74 | bool | 104 | bool |
@@ -104,21 +134,21 @@ menuconfig FTRACE | |||
104 | bool "Tracers" | 134 | bool "Tracers" |
105 | default y if DEBUG_KERNEL | 135 | default y if DEBUG_KERNEL |
106 | help | 136 | help |
107 | Enable the kernel tracing infrastructure. | 137 | Enable the kernel tracing infrastructure. |
108 | 138 | ||
109 | if FTRACE | 139 | if FTRACE |
110 | 140 | ||
111 | config FUNCTION_TRACER | 141 | config FUNCTION_TRACER |
112 | bool "Kernel Function Tracer" | 142 | bool "Kernel Function Tracer" |
113 | depends on HAVE_FUNCTION_TRACER | 143 | depends on HAVE_FUNCTION_TRACER |
114 | select FRAME_POINTER | 144 | select FRAME_POINTER if !ARM_UNWIND && !S390 |
115 | select KALLSYMS | 145 | select KALLSYMS |
116 | select GENERIC_TRACER | 146 | select GENERIC_TRACER |
117 | select CONTEXT_SWITCH_TRACER | 147 | select CONTEXT_SWITCH_TRACER |
118 | help | 148 | help |
119 | Enable the kernel to trace every kernel function. This is done | 149 | Enable the kernel to trace every kernel function. This is done |
120 | by using a compiler feature to insert a small, 5-byte No-Operation | 150 | by using a compiler feature to insert a small, 5-byte No-Operation |
121 | instruction to the beginning of every kernel function, which NOP | 151 | instruction at the beginning of every kernel function, which NOP |
122 | sequence is then dynamically patched into a tracer call when | 152 | sequence is then dynamically patched into a tracer call when |
123 | tracing is enabled by the administrator. If it's runtime disabled | 153 | tracing is enabled by the administrator. If it's runtime disabled |
124 | (the bootup default), then the overhead of the instructions is very | 154 | (the bootup default), then the overhead of the instructions is very |
@@ -135,7 +165,7 @@ config FUNCTION_GRAPH_TRACER | |||
135 | and its entry. | 165 | and its entry. |
136 | Its first purpose is to trace the duration of functions and | 166 | Its first purpose is to trace the duration of functions and |
137 | draw a call graph for each thread with some information like | 167 | draw a call graph for each thread with some information like |
138 | the return value. This is done by setting the current return | 168 | the return value. This is done by setting the current return |
139 | address on the current task structure into a stack of calls. | 169 | address on the current task structure into a stack of calls. |
140 | 170 | ||
141 | 171 | ||
@@ -143,10 +173,11 @@ config IRQSOFF_TRACER | |||
143 | bool "Interrupts-off Latency Tracer" | 173 | bool "Interrupts-off Latency Tracer" |
144 | default n | 174 | default n |
145 | depends on TRACE_IRQFLAGS_SUPPORT | 175 | depends on TRACE_IRQFLAGS_SUPPORT |
146 | depends on GENERIC_TIME | 176 | depends on !ARCH_USES_GETTIMEOFFSET |
147 | select TRACE_IRQFLAGS | 177 | select TRACE_IRQFLAGS |
148 | select GENERIC_TRACER | 178 | select GENERIC_TRACER |
149 | select TRACER_MAX_TRACE | 179 | select TRACER_MAX_TRACE |
180 | select RING_BUFFER_ALLOW_SWAP | ||
150 | help | 181 | help |
151 | This option measures the time spent in irqs-off critical | 182 | This option measures the time spent in irqs-off critical |
152 | sections, with microsecond accuracy. | 183 | sections, with microsecond accuracy. |
@@ -157,19 +188,20 @@ config IRQSOFF_TRACER | |||
157 | 188 | ||
158 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency | 189 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
159 | 190 | ||
160 | (Note that kernel size and overhead increases with this option | 191 | (Note that kernel size and overhead increase with this option |
161 | enabled. This option and the preempt-off timing option can be | 192 | enabled. This option and the preempt-off timing option can be |
162 | used together or separately.) | 193 | used together or separately.) |
163 | 194 | ||
164 | config PREEMPT_TRACER | 195 | config PREEMPT_TRACER |
165 | bool "Preemption-off Latency Tracer" | 196 | bool "Preemption-off Latency Tracer" |
166 | default n | 197 | default n |
167 | depends on GENERIC_TIME | 198 | depends on !ARCH_USES_GETTIMEOFFSET |
168 | depends on PREEMPT | 199 | depends on PREEMPT |
169 | select GENERIC_TRACER | 200 | select GENERIC_TRACER |
170 | select TRACER_MAX_TRACE | 201 | select TRACER_MAX_TRACE |
202 | select RING_BUFFER_ALLOW_SWAP | ||
171 | help | 203 | help |
172 | This option measures the time spent in preemption off critical | 204 | This option measures the time spent in preemption-off critical |
173 | sections, with microsecond accuracy. | 205 | sections, with microsecond accuracy. |
174 | 206 | ||
175 | The default measurement method is a maximum search, which is | 207 | The default measurement method is a maximum search, which is |
@@ -178,19 +210,10 @@ config PREEMPT_TRACER | |||
178 | 210 | ||
179 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency | 211 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
180 | 212 | ||
181 | (Note that kernel size and overhead increases with this option | 213 | (Note that kernel size and overhead increase with this option |
182 | enabled. This option and the irqs-off timing option can be | 214 | enabled. This option and the irqs-off timing option can be |
183 | used together or separately.) | 215 | used together or separately.) |
184 | 216 | ||
185 | config SYSPROF_TRACER | ||
186 | bool "Sysprof Tracer" | ||
187 | depends on X86 | ||
188 | select GENERIC_TRACER | ||
189 | select CONTEXT_SWITCH_TRACER | ||
190 | help | ||
191 | This tracer provides the trace needed by the 'Sysprof' userspace | ||
192 | tool. | ||
193 | |||
194 | config SCHED_TRACER | 217 | config SCHED_TRACER |
195 | bool "Scheduling Latency Tracer" | 218 | bool "Scheduling Latency Tracer" |
196 | select GENERIC_TRACER | 219 | select GENERIC_TRACER |
@@ -205,35 +228,18 @@ config ENABLE_DEFAULT_TRACERS | |||
205 | depends on !GENERIC_TRACER | 228 | depends on !GENERIC_TRACER |
206 | select TRACING | 229 | select TRACING |
207 | help | 230 | help |
208 | This tracer hooks to various trace points in the kernel | 231 | This tracer hooks to various trace points in the kernel, |
209 | allowing the user to pick and choose which trace point they | 232 | allowing the user to pick and choose which trace point they |
210 | want to trace. It also includes the sched_switch tracer plugin. | 233 | want to trace. It also includes the sched_switch tracer plugin. |
211 | 234 | ||
212 | config FTRACE_SYSCALLS | 235 | config FTRACE_SYSCALLS |
213 | bool "Trace syscalls" | 236 | bool "Trace syscalls" |
214 | depends on HAVE_FTRACE_SYSCALLS | 237 | depends on HAVE_SYSCALL_TRACEPOINTS |
215 | select GENERIC_TRACER | 238 | select GENERIC_TRACER |
216 | select KALLSYMS | 239 | select KALLSYMS |
217 | help | 240 | help |
218 | Basic tracer to catch the syscall entry and exit events. | 241 | Basic tracer to catch the syscall entry and exit events. |
219 | 242 | ||
220 | config BOOT_TRACER | ||
221 | bool "Trace boot initcalls" | ||
222 | select GENERIC_TRACER | ||
223 | select CONTEXT_SWITCH_TRACER | ||
224 | help | ||
225 | This tracer helps developers to optimize boot times: it records | ||
226 | the timings of the initcalls and traces key events and the identity | ||
227 | of tasks that can cause boot delays, such as context-switches. | ||
228 | |||
229 | Its aim is to be parsed by the /scripts/bootgraph.pl tool to | ||
230 | produce pretty graphics about boot inefficiencies, giving a visual | ||
231 | representation of the delays during initcalls - but the raw | ||
232 | /debug/tracing/trace text output is readable too. | ||
233 | |||
234 | You must pass in ftrace=initcall to the kernel command line | ||
235 | to enable this on bootup. | ||
236 | |||
237 | config TRACE_BRANCH_PROFILING | 243 | config TRACE_BRANCH_PROFILING |
238 | bool | 244 | bool |
239 | select GENERIC_TRACER | 245 | select GENERIC_TRACER |
@@ -248,19 +254,19 @@ choice | |||
248 | The likely/unlikely profiler only looks at the conditions that | 254 | The likely/unlikely profiler only looks at the conditions that |
249 | are annotated with a likely or unlikely macro. | 255 | are annotated with a likely or unlikely macro. |
250 | 256 | ||
251 | The "all branch" profiler will profile every if statement in the | 257 | The "all branch" profiler will profile every if-statement in the |
252 | kernel. This profiler will also enable the likely/unlikely | 258 | kernel. This profiler will also enable the likely/unlikely |
253 | profiler as well. | 259 | profiler. |
254 | 260 | ||
255 | Either of the above profilers add a bit of overhead to the system. | 261 | Either of the above profilers adds a bit of overhead to the system. |
256 | If unsure choose "No branch profiling". | 262 | If unsure, choose "No branch profiling". |
257 | 263 | ||
258 | config BRANCH_PROFILE_NONE | 264 | config BRANCH_PROFILE_NONE |
259 | bool "No branch profiling" | 265 | bool "No branch profiling" |
260 | help | 266 | help |
261 | No branch profiling. Branch profiling adds a bit of overhead. | 267 | No branch profiling. Branch profiling adds a bit of overhead. |
262 | Only enable it if you want to analyse the branching behavior. | 268 | Only enable it if you want to analyse the branching behavior. |
263 | Otherwise keep it disabled. | 269 | Otherwise keep it disabled. |
264 | 270 | ||
265 | config PROFILE_ANNOTATED_BRANCHES | 271 | config PROFILE_ANNOTATED_BRANCHES |
266 | bool "Trace likely/unlikely profiler" | 272 | bool "Trace likely/unlikely profiler" |
@@ -271,7 +277,7 @@ config PROFILE_ANNOTATED_BRANCHES | |||
271 | 277 | ||
272 | /sys/kernel/debug/tracing/profile_annotated_branch | 278 | /sys/kernel/debug/tracing/profile_annotated_branch |
273 | 279 | ||
274 | Note: this will add a significant overhead, only turn this | 280 | Note: this will add a significant overhead; only turn this |
275 | on if you need to profile the system's use of these macros. | 281 | on if you need to profile the system's use of these macros. |
276 | 282 | ||
277 | config PROFILE_ALL_BRANCHES | 283 | config PROFILE_ALL_BRANCHES |
@@ -288,7 +294,7 @@ config PROFILE_ALL_BRANCHES | |||
288 | 294 | ||
289 | This configuration, when enabled, will impose a great overhead | 295 | This configuration, when enabled, will impose a great overhead |
290 | on the system. This should only be enabled when the system | 296 | on the system. This should only be enabled when the system |
291 | is to be analyzed | 297 | is to be analyzed in much detail. |
292 | endchoice | 298 | endchoice |
293 | 299 | ||
294 | config TRACING_BRANCHES | 300 | config TRACING_BRANCHES |
@@ -313,16 +319,6 @@ config BRANCH_TRACER | |||
313 | 319 | ||
314 | Say N if unsure. | 320 | Say N if unsure. |
315 | 321 | ||
316 | config POWER_TRACER | ||
317 | bool "Trace power consumption behavior" | ||
318 | depends on X86 | ||
319 | select GENERIC_TRACER | ||
320 | help | ||
321 | This tracer helps developers to analyze and optimize the kernels | ||
322 | power management decisions, specifically the C-state and P-state | ||
323 | behavior. | ||
324 | |||
325 | |||
326 | config STACK_TRACER | 322 | config STACK_TRACER |
327 | bool "Trace max stack" | 323 | bool "Trace max stack" |
328 | depends on HAVE_FUNCTION_TRACER | 324 | depends on HAVE_FUNCTION_TRACER |
@@ -347,47 +343,8 @@ config STACK_TRACER | |||
347 | 343 | ||
348 | Say N if unsure. | 344 | Say N if unsure. |
349 | 345 | ||
350 | config HW_BRANCH_TRACER | ||
351 | depends on HAVE_HW_BRANCH_TRACER | ||
352 | bool "Trace hw branches" | ||
353 | select GENERIC_TRACER | ||
354 | help | ||
355 | This tracer records all branches on the system in a circular | ||
356 | buffer giving access to the last N branches for each cpu. | ||
357 | |||
358 | config KMEMTRACE | ||
359 | bool "Trace SLAB allocations" | ||
360 | select GENERIC_TRACER | ||
361 | help | ||
362 | kmemtrace provides tracing for slab allocator functions, such as | ||
363 | kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected | ||
364 | data is then fed to the userspace application in order to analyse | ||
365 | allocation hotspots, internal fragmentation and so on, making it | ||
366 | possible to see how well an allocator performs, as well as debug | ||
367 | and profile kernel code. | ||
368 | |||
369 | This requires an userspace application to use. See | ||
370 | Documentation/trace/kmemtrace.txt for more information. | ||
371 | |||
372 | Saying Y will make the kernel somewhat larger and slower. However, | ||
373 | if you disable kmemtrace at run-time or boot-time, the performance | ||
374 | impact is minimal (depending on the arch the kernel is built for). | ||
375 | |||
376 | If unsure, say N. | ||
377 | |||
378 | config WORKQUEUE_TRACER | ||
379 | bool "Trace workqueues" | ||
380 | select GENERIC_TRACER | ||
381 | help | ||
382 | The workqueue tracer provides some statistical informations | ||
383 | about each cpu workqueue thread such as the number of the | ||
384 | works inserted and executed since their creation. It can help | ||
385 | to evaluate the amount of work each of them have to perform. | ||
386 | For example it can help a developer to decide whether he should | ||
387 | choose a per cpu workqueue instead of a singlethreaded one. | ||
388 | |||
389 | config BLK_DEV_IO_TRACE | 346 | config BLK_DEV_IO_TRACE |
390 | bool "Support for tracing block io actions" | 347 | bool "Support for tracing block IO actions" |
391 | depends on SYSFS | 348 | depends on SYSFS |
392 | depends on BLOCK | 349 | depends on BLOCK |
393 | select RELAY | 350 | select RELAY |
@@ -411,38 +368,55 @@ config BLK_DEV_IO_TRACE | |||
411 | 368 | ||
412 | If unsure, say N. | 369 | If unsure, say N. |
413 | 370 | ||
371 | config KPROBE_EVENT | ||
372 | depends on KPROBES | ||
373 | depends on HAVE_REGS_AND_STACK_ACCESS_API | ||
374 | bool "Enable kprobes-based dynamic events" | ||
375 | select TRACING | ||
376 | default y | ||
377 | help | ||
378 | This allows the user to add tracing events (similar to tracepoints) | ||
379 | on the fly via the ftrace interface. See | ||
380 | Documentation/trace/kprobetrace.txt for more details. | ||
381 | |||
382 | Those events can be inserted wherever kprobes can probe, and record | ||
383 | various register and memory values. | ||
384 | |||
385 | This option is also required by perf-probe subcommand of perf tools. | ||
386 | If you want to use perf tools, this option is strongly recommended. | ||
387 | |||
414 | config DYNAMIC_FTRACE | 388 | config DYNAMIC_FTRACE |
415 | bool "enable/disable ftrace tracepoints dynamically" | 389 | bool "enable/disable ftrace tracepoints dynamically" |
416 | depends on FUNCTION_TRACER | 390 | depends on FUNCTION_TRACER |
417 | depends on HAVE_DYNAMIC_FTRACE | 391 | depends on HAVE_DYNAMIC_FTRACE |
418 | default y | 392 | default y |
419 | help | 393 | help |
420 | This option will modify all the calls to ftrace dynamically | 394 | This option will modify all the calls to ftrace dynamically |
421 | (will patch them out of the binary image and replaces them | 395 | (will patch them out of the binary image and replace them |
422 | with a No-Op instruction) as they are called. A table is | 396 | with a No-Op instruction) as they are called. A table is |
423 | created to dynamically enable them again. | 397 | created to dynamically enable them again. |
424 | 398 | ||
425 | This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise | 399 | This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but |
426 | has native performance as long as no tracing is active. | 400 | otherwise has native performance as long as no tracing is active. |
427 | 401 | ||
428 | The changes to the code are done by a kernel thread that | 402 | The changes to the code are done by a kernel thread that |
429 | wakes up once a second and checks to see if any ftrace calls | 403 | wakes up once a second and checks to see if any ftrace calls |
430 | were made. If so, it runs stop_machine (stops all CPUS) | 404 | were made. If so, it runs stop_machine (stops all CPUS) |
431 | and modifies the code to jump over the call to ftrace. | 405 | and modifies the code to jump over the call to ftrace. |
432 | 406 | ||
433 | config FUNCTION_PROFILER | 407 | config FUNCTION_PROFILER |
434 | bool "Kernel function profiler" | 408 | bool "Kernel function profiler" |
435 | depends on FUNCTION_TRACER | 409 | depends on FUNCTION_TRACER |
436 | default n | 410 | default n |
437 | help | 411 | help |
438 | This option enables the kernel function profiler. A file is created | 412 | This option enables the kernel function profiler. A file is created |
439 | in debugfs called function_profile_enabled which defaults to zero. | 413 | in debugfs called function_profile_enabled which defaults to zero. |
440 | When a 1 is echoed into this file profiling begins, and when a | 414 | When a 1 is echoed into this file profiling begins, and when a |
441 | zero is entered, profiling stops. A file in the trace_stats | 415 | zero is entered, profiling stops. A "functions" file is created in |
442 | directory called functions, that show the list of functions that | 416 | the trace_stats directory; this file shows the list of functions that |
443 | have been hit and their counters. | 417 | have been hit and their counters. |
444 | 418 | ||
445 | If in doubt, say N | 419 | If in doubt, say N. |
446 | 420 | ||
447 | config FTRACE_MCOUNT_RECORD | 421 | config FTRACE_MCOUNT_RECORD |
448 | def_bool y | 422 | def_bool y |
@@ -462,6 +436,18 @@ config FTRACE_STARTUP_TEST | |||
462 | functioning properly. It will do tests on all the configured | 436 | functioning properly. It will do tests on all the configured |
463 | tracers of ftrace. | 437 | tracers of ftrace. |
464 | 438 | ||
439 | config EVENT_TRACE_TEST_SYSCALLS | ||
440 | bool "Run selftest on syscall events" | ||
441 | depends on FTRACE_STARTUP_TEST | ||
442 | help | ||
443 | This option will also enable testing every syscall event. | ||
444 | It only enables the event and disables it and runs various loads | ||
445 | with the event enabled. This adds a bit more time for kernel boot | ||
446 | up since it runs this on every system call defined. | ||
447 | |||
448 | TBD - enable a way to actually call the syscalls as we test their | ||
449 | events | ||
450 | |||
465 | config MMIOTRACE | 451 | config MMIOTRACE |
466 | bool "Memory mapped IO tracing" | 452 | bool "Memory mapped IO tracing" |
467 | depends on HAVE_MMIOTRACE_SUPPORT && PCI | 453 | depends on HAVE_MMIOTRACE_SUPPORT && PCI |
@@ -489,8 +475,8 @@ config RING_BUFFER_BENCHMARK | |||
489 | tristate "Ring buffer benchmark stress tester" | 475 | tristate "Ring buffer benchmark stress tester" |
490 | depends on RING_BUFFER | 476 | depends on RING_BUFFER |
491 | help | 477 | help |
492 | This option creates a test to stress the ring buffer and bench mark it. | 478 | This option creates a test to stress the ring buffer and benchmark it. |
493 | It creates its own ring buffer such that it will not interfer with | 479 | It creates its own ring buffer such that it will not interfere with |
494 | any other users of the ring buffer (such as ftrace). It then creates | 480 | any other users of the ring buffer (such as ftrace). It then creates |
495 | a producer and consumer that will run for 10 seconds and sleep for | 481 | a producer and consumer that will run for 10 seconds and sleep for |
496 | 10 seconds. Each interval it will print out the number of events | 482 | 10 seconds. Each interval it will print out the number of events |
@@ -499,7 +485,7 @@ config RING_BUFFER_BENCHMARK | |||
499 | It does not disable interrupts or raise its priority, so it may be | 485 | It does not disable interrupts or raise its priority, so it may be |
500 | affected by processes that are running. | 486 | affected by processes that are running. |
501 | 487 | ||
502 | If unsure, say N | 488 | If unsure, say N. |
503 | 489 | ||
504 | endif # FTRACE | 490 | endif # FTRACE |
505 | 491 | ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 844164dca90a..761c510a06c5 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o | |||
30 | obj-$(CONFIG_TRACING) += trace_stat.o | 30 | obj-$(CONFIG_TRACING) += trace_stat.o |
31 | obj-$(CONFIG_TRACING) += trace_printk.o | 31 | obj-$(CONFIG_TRACING) += trace_printk.o |
32 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o | 32 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o |
33 | obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o | ||
34 | obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o | 33 | obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o |
35 | obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o | 34 | obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o |
36 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o | 35 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o |
@@ -38,12 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o | |||
38 | obj-$(CONFIG_NOP_TRACER) += trace_nop.o | 37 | obj-$(CONFIG_NOP_TRACER) += trace_nop.o |
39 | obj-$(CONFIG_STACK_TRACER) += trace_stack.o | 38 | obj-$(CONFIG_STACK_TRACER) += trace_stack.o |
40 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o | 39 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o |
41 | obj-$(CONFIG_BOOT_TRACER) += trace_boot.o | ||
42 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o | 40 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o |
43 | obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o | 41 | obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o |
44 | obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o | ||
45 | obj-$(CONFIG_POWER_TRACER) += trace_power.o | ||
46 | obj-$(CONFIG_KMEMTRACE) += kmemtrace.o | ||
47 | obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o | 42 | obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o |
48 | obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o | 43 | obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o |
49 | ifeq ($(CONFIG_BLOCK),y) | 44 | ifeq ($(CONFIG_BLOCK),y) |
@@ -52,7 +47,14 @@ endif | |||
52 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o | 47 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o |
53 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o | 48 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o |
54 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 49 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
55 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 50 | ifeq ($(CONFIG_PERF_EVENTS),y) |
51 | obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o | ||
52 | endif | ||
56 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 53 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
54 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | ||
55 | obj-$(CONFIG_TRACEPOINTS) += power-traces.o | ||
56 | ifeq ($(CONFIG_TRACING),y) | ||
57 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o | ||
58 | endif | ||
57 | 59 | ||
58 | libftrace-y := ftrace.o | 60 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 39af8af6fc30..7b8ec0281548 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/percpu.h> | 21 | #include <linux/percpu.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | ||
24 | #include <linux/debugfs.h> | 25 | #include <linux/debugfs.h> |
25 | #include <linux/time.h> | 26 | #include <linux/time.h> |
26 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
@@ -64,13 +65,15 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, | |||
64 | { | 65 | { |
65 | struct blk_io_trace *t; | 66 | struct blk_io_trace *t; |
66 | struct ring_buffer_event *event = NULL; | 67 | struct ring_buffer_event *event = NULL; |
68 | struct ring_buffer *buffer = NULL; | ||
67 | int pc = 0; | 69 | int pc = 0; |
68 | int cpu = smp_processor_id(); | 70 | int cpu = smp_processor_id(); |
69 | bool blk_tracer = blk_tracer_enabled; | 71 | bool blk_tracer = blk_tracer_enabled; |
70 | 72 | ||
71 | if (blk_tracer) { | 73 | if (blk_tracer) { |
74 | buffer = blk_tr->buffer; | ||
72 | pc = preempt_count(); | 75 | pc = preempt_count(); |
73 | event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, | 76 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
74 | sizeof(*t) + len, | 77 | sizeof(*t) + len, |
75 | 0, pc); | 78 | 0, pc); |
76 | if (!event) | 79 | if (!event) |
@@ -95,7 +98,7 @@ record_it: | |||
95 | memcpy((void *) t + sizeof(*t), data, len); | 98 | memcpy((void *) t + sizeof(*t), data, len); |
96 | 99 | ||
97 | if (blk_tracer) | 100 | if (blk_tracer) |
98 | trace_buffer_unlock_commit(blk_tr, event, 0, pc); | 101 | trace_buffer_unlock_commit(buffer, event, 0, pc); |
99 | } | 102 | } |
100 | } | 103 | } |
101 | 104 | ||
@@ -165,9 +168,11 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | |||
165 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), | 168 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), |
166 | BLK_TC_ACT(BLK_TC_WRITE) }; | 169 | BLK_TC_ACT(BLK_TC_WRITE) }; |
167 | 170 | ||
171 | #define BLK_TC_RAHEAD BLK_TC_AHEAD | ||
172 | |||
168 | /* The ilog2() calls fall out because they're constant */ | 173 | /* The ilog2() calls fall out because they're constant */ |
169 | #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ | 174 | #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ |
170 | (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name)) | 175 | (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) |
171 | 176 | ||
172 | /* | 177 | /* |
173 | * The worker for the various blk_add_trace*() types. Fills out a | 178 | * The worker for the various blk_add_trace*() types. Fills out a |
@@ -178,6 +183,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
178 | { | 183 | { |
179 | struct task_struct *tsk = current; | 184 | struct task_struct *tsk = current; |
180 | struct ring_buffer_event *event = NULL; | 185 | struct ring_buffer_event *event = NULL; |
186 | struct ring_buffer *buffer = NULL; | ||
181 | struct blk_io_trace *t; | 187 | struct blk_io_trace *t; |
182 | unsigned long flags = 0; | 188 | unsigned long flags = 0; |
183 | unsigned long *sequence; | 189 | unsigned long *sequence; |
@@ -189,9 +195,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
189 | return; | 195 | return; |
190 | 196 | ||
191 | what |= ddir_act[rw & WRITE]; | 197 | what |= ddir_act[rw & WRITE]; |
192 | what |= MASK_TC_BIT(rw, BARRIER); | 198 | what |= MASK_TC_BIT(rw, SYNC); |
193 | what |= MASK_TC_BIT(rw, SYNCIO); | 199 | what |= MASK_TC_BIT(rw, RAHEAD); |
194 | what |= MASK_TC_BIT(rw, AHEAD); | ||
195 | what |= MASK_TC_BIT(rw, META); | 200 | what |= MASK_TC_BIT(rw, META); |
196 | what |= MASK_TC_BIT(rw, DISCARD); | 201 | what |= MASK_TC_BIT(rw, DISCARD); |
197 | 202 | ||
@@ -203,8 +208,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
203 | if (blk_tracer) { | 208 | if (blk_tracer) { |
204 | tracing_record_cmdline(current); | 209 | tracing_record_cmdline(current); |
205 | 210 | ||
211 | buffer = blk_tr->buffer; | ||
206 | pc = preempt_count(); | 212 | pc = preempt_count(); |
207 | event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, | 213 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
208 | sizeof(*t) + pdu_len, | 214 | sizeof(*t) + pdu_len, |
209 | 0, pc); | 215 | 0, pc); |
210 | if (!event) | 216 | if (!event) |
@@ -251,7 +257,7 @@ record_it: | |||
251 | memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); | 257 | memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); |
252 | 258 | ||
253 | if (blk_tracer) { | 259 | if (blk_tracer) { |
254 | trace_buffer_unlock_commit(blk_tr, event, 0, pc); | 260 | trace_buffer_unlock_commit(buffer, event, 0, pc); |
255 | return; | 261 | return; |
256 | } | 262 | } |
257 | } | 263 | } |
@@ -266,8 +272,8 @@ static void blk_trace_free(struct blk_trace *bt) | |||
266 | { | 272 | { |
267 | debugfs_remove(bt->msg_file); | 273 | debugfs_remove(bt->msg_file); |
268 | debugfs_remove(bt->dropped_file); | 274 | debugfs_remove(bt->dropped_file); |
269 | debugfs_remove(bt->dir); | ||
270 | relay_close(bt->rchan); | 275 | relay_close(bt->rchan); |
276 | debugfs_remove(bt->dir); | ||
271 | free_percpu(bt->sequence); | 277 | free_percpu(bt->sequence); |
272 | free_percpu(bt->msg_data); | 278 | free_percpu(bt->msg_data); |
273 | kfree(bt); | 279 | kfree(bt); |
@@ -317,6 +323,7 @@ static const struct file_operations blk_dropped_fops = { | |||
317 | .owner = THIS_MODULE, | 323 | .owner = THIS_MODULE, |
318 | .open = blk_dropped_open, | 324 | .open = blk_dropped_open, |
319 | .read = blk_dropped_read, | 325 | .read = blk_dropped_read, |
326 | .llseek = default_llseek, | ||
320 | }; | 327 | }; |
321 | 328 | ||
322 | static int blk_msg_open(struct inode *inode, struct file *filp) | 329 | static int blk_msg_open(struct inode *inode, struct file *filp) |
@@ -356,6 +363,7 @@ static const struct file_operations blk_msg_fops = { | |||
356 | .owner = THIS_MODULE, | 363 | .owner = THIS_MODULE, |
357 | .open = blk_msg_open, | 364 | .open = blk_msg_open, |
358 | .write = blk_msg_write, | 365 | .write = blk_msg_write, |
366 | .llseek = noop_llseek, | ||
359 | }; | 367 | }; |
360 | 368 | ||
361 | /* | 369 | /* |
@@ -377,18 +385,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | |||
377 | 385 | ||
378 | static int blk_remove_buf_file_callback(struct dentry *dentry) | 386 | static int blk_remove_buf_file_callback(struct dentry *dentry) |
379 | { | 387 | { |
380 | struct dentry *parent = dentry->d_parent; | ||
381 | debugfs_remove(dentry); | 388 | debugfs_remove(dentry); |
382 | 389 | ||
383 | /* | ||
384 | * this will fail for all but the last file, but that is ok. what we | ||
385 | * care about is the top level buts->name directory going away, when | ||
386 | * the last trace file is gone. Then we don't have to rmdir() that | ||
387 | * manually on trace stop, so it nicely solves the issue with | ||
388 | * force killing of running traces. | ||
389 | */ | ||
390 | |||
391 | debugfs_remove(parent); | ||
392 | return 0; | 390 | return 0; |
393 | } | 391 | } |
394 | 392 | ||
@@ -545,13 +543,49 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
545 | if (ret) | 543 | if (ret) |
546 | return ret; | 544 | return ret; |
547 | 545 | ||
548 | if (copy_to_user(arg, &buts, sizeof(buts))) | 546 | if (copy_to_user(arg, &buts, sizeof(buts))) { |
547 | blk_trace_remove(q); | ||
549 | return -EFAULT; | 548 | return -EFAULT; |
550 | 549 | } | |
551 | return 0; | 550 | return 0; |
552 | } | 551 | } |
553 | EXPORT_SYMBOL_GPL(blk_trace_setup); | 552 | EXPORT_SYMBOL_GPL(blk_trace_setup); |
554 | 553 | ||
554 | #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) | ||
555 | static int compat_blk_trace_setup(struct request_queue *q, char *name, | ||
556 | dev_t dev, struct block_device *bdev, | ||
557 | char __user *arg) | ||
558 | { | ||
559 | struct blk_user_trace_setup buts; | ||
560 | struct compat_blk_user_trace_setup cbuts; | ||
561 | int ret; | ||
562 | |||
563 | if (copy_from_user(&cbuts, arg, sizeof(cbuts))) | ||
564 | return -EFAULT; | ||
565 | |||
566 | buts = (struct blk_user_trace_setup) { | ||
567 | .act_mask = cbuts.act_mask, | ||
568 | .buf_size = cbuts.buf_size, | ||
569 | .buf_nr = cbuts.buf_nr, | ||
570 | .start_lba = cbuts.start_lba, | ||
571 | .end_lba = cbuts.end_lba, | ||
572 | .pid = cbuts.pid, | ||
573 | }; | ||
574 | memcpy(&buts.name, &cbuts.name, 32); | ||
575 | |||
576 | ret = do_blk_trace_setup(q, name, dev, bdev, &buts); | ||
577 | if (ret) | ||
578 | return ret; | ||
579 | |||
580 | if (copy_to_user(arg, &buts.name, 32)) { | ||
581 | blk_trace_remove(q); | ||
582 | return -EFAULT; | ||
583 | } | ||
584 | |||
585 | return 0; | ||
586 | } | ||
587 | #endif | ||
588 | |||
555 | int blk_trace_startstop(struct request_queue *q, int start) | 589 | int blk_trace_startstop(struct request_queue *q, int start) |
556 | { | 590 | { |
557 | int ret; | 591 | int ret; |
@@ -611,6 +645,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
611 | bdevname(bdev, b); | 645 | bdevname(bdev, b); |
612 | ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); | 646 | ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); |
613 | break; | 647 | break; |
648 | #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) | ||
649 | case BLKTRACESETUP32: | ||
650 | bdevname(bdev, b); | ||
651 | ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); | ||
652 | break; | ||
653 | #endif | ||
614 | case BLKTRACESTART: | 654 | case BLKTRACESTART: |
615 | start = 1; | 655 | start = 1; |
616 | case BLKTRACESTOP: | 656 | case BLKTRACESTOP: |
@@ -664,10 +704,13 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, | |||
664 | if (likely(!bt)) | 704 | if (likely(!bt)) |
665 | return; | 705 | return; |
666 | 706 | ||
667 | if (blk_discard_rq(rq)) | 707 | if (rq->cmd_flags & REQ_DISCARD) |
668 | rw |= (1 << BIO_RW_DISCARD); | 708 | rw |= REQ_DISCARD; |
709 | |||
710 | if (rq->cmd_flags & REQ_SECURE) | ||
711 | rw |= REQ_SECURE; | ||
669 | 712 | ||
670 | if (blk_pc_request(rq)) { | 713 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
671 | what |= BLK_TC_ACT(BLK_TC_PC); | 714 | what |= BLK_TC_ACT(BLK_TC_PC); |
672 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, | 715 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, |
673 | what, rq->errors, rq->cmd_len, rq->cmd); | 716 | what, rq->errors, rq->cmd_len, rq->cmd); |
@@ -678,28 +721,33 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, | |||
678 | } | 721 | } |
679 | } | 722 | } |
680 | 723 | ||
681 | static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) | 724 | static void blk_add_trace_rq_abort(void *ignore, |
725 | struct request_queue *q, struct request *rq) | ||
682 | { | 726 | { |
683 | blk_add_trace_rq(q, rq, BLK_TA_ABORT); | 727 | blk_add_trace_rq(q, rq, BLK_TA_ABORT); |
684 | } | 728 | } |
685 | 729 | ||
686 | static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) | 730 | static void blk_add_trace_rq_insert(void *ignore, |
731 | struct request_queue *q, struct request *rq) | ||
687 | { | 732 | { |
688 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); | 733 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); |
689 | } | 734 | } |
690 | 735 | ||
691 | static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) | 736 | static void blk_add_trace_rq_issue(void *ignore, |
737 | struct request_queue *q, struct request *rq) | ||
692 | { | 738 | { |
693 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); | 739 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); |
694 | } | 740 | } |
695 | 741 | ||
696 | static void blk_add_trace_rq_requeue(struct request_queue *q, | 742 | static void blk_add_trace_rq_requeue(void *ignore, |
743 | struct request_queue *q, | ||
697 | struct request *rq) | 744 | struct request *rq) |
698 | { | 745 | { |
699 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); | 746 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); |
700 | } | 747 | } |
701 | 748 | ||
702 | static void blk_add_trace_rq_complete(struct request_queue *q, | 749 | static void blk_add_trace_rq_complete(void *ignore, |
750 | struct request_queue *q, | ||
703 | struct request *rq) | 751 | struct request *rq) |
704 | { | 752 | { |
705 | blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); | 753 | blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); |
@@ -727,34 +775,40 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, | |||
727 | !bio_flagged(bio, BIO_UPTODATE), 0, NULL); | 775 | !bio_flagged(bio, BIO_UPTODATE), 0, NULL); |
728 | } | 776 | } |
729 | 777 | ||
730 | static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) | 778 | static void blk_add_trace_bio_bounce(void *ignore, |
779 | struct request_queue *q, struct bio *bio) | ||
731 | { | 780 | { |
732 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); | 781 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); |
733 | } | 782 | } |
734 | 783 | ||
735 | static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) | 784 | static void blk_add_trace_bio_complete(void *ignore, |
785 | struct request_queue *q, struct bio *bio) | ||
736 | { | 786 | { |
737 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); | 787 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); |
738 | } | 788 | } |
739 | 789 | ||
740 | static void blk_add_trace_bio_backmerge(struct request_queue *q, | 790 | static void blk_add_trace_bio_backmerge(void *ignore, |
791 | struct request_queue *q, | ||
741 | struct bio *bio) | 792 | struct bio *bio) |
742 | { | 793 | { |
743 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); | 794 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); |
744 | } | 795 | } |
745 | 796 | ||
746 | static void blk_add_trace_bio_frontmerge(struct request_queue *q, | 797 | static void blk_add_trace_bio_frontmerge(void *ignore, |
798 | struct request_queue *q, | ||
747 | struct bio *bio) | 799 | struct bio *bio) |
748 | { | 800 | { |
749 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); | 801 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); |
750 | } | 802 | } |
751 | 803 | ||
752 | static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) | 804 | static void blk_add_trace_bio_queue(void *ignore, |
805 | struct request_queue *q, struct bio *bio) | ||
753 | { | 806 | { |
754 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); | 807 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); |
755 | } | 808 | } |
756 | 809 | ||
757 | static void blk_add_trace_getrq(struct request_queue *q, | 810 | static void blk_add_trace_getrq(void *ignore, |
811 | struct request_queue *q, | ||
758 | struct bio *bio, int rw) | 812 | struct bio *bio, int rw) |
759 | { | 813 | { |
760 | if (bio) | 814 | if (bio) |
@@ -768,7 +822,8 @@ static void blk_add_trace_getrq(struct request_queue *q, | |||
768 | } | 822 | } |
769 | 823 | ||
770 | 824 | ||
771 | static void blk_add_trace_sleeprq(struct request_queue *q, | 825 | static void blk_add_trace_sleeprq(void *ignore, |
826 | struct request_queue *q, | ||
772 | struct bio *bio, int rw) | 827 | struct bio *bio, int rw) |
773 | { | 828 | { |
774 | if (bio) | 829 | if (bio) |
@@ -782,7 +837,7 @@ static void blk_add_trace_sleeprq(struct request_queue *q, | |||
782 | } | 837 | } |
783 | } | 838 | } |
784 | 839 | ||
785 | static void blk_add_trace_plug(struct request_queue *q) | 840 | static void blk_add_trace_plug(void *ignore, struct request_queue *q) |
786 | { | 841 | { |
787 | struct blk_trace *bt = q->blk_trace; | 842 | struct blk_trace *bt = q->blk_trace; |
788 | 843 | ||
@@ -790,7 +845,7 @@ static void blk_add_trace_plug(struct request_queue *q) | |||
790 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); | 845 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); |
791 | } | 846 | } |
792 | 847 | ||
793 | static void blk_add_trace_unplug_io(struct request_queue *q) | 848 | static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) |
794 | { | 849 | { |
795 | struct blk_trace *bt = q->blk_trace; | 850 | struct blk_trace *bt = q->blk_trace; |
796 | 851 | ||
@@ -803,7 +858,7 @@ static void blk_add_trace_unplug_io(struct request_queue *q) | |||
803 | } | 858 | } |
804 | } | 859 | } |
805 | 860 | ||
806 | static void blk_add_trace_unplug_timer(struct request_queue *q) | 861 | static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q) |
807 | { | 862 | { |
808 | struct blk_trace *bt = q->blk_trace; | 863 | struct blk_trace *bt = q->blk_trace; |
809 | 864 | ||
@@ -816,7 +871,8 @@ static void blk_add_trace_unplug_timer(struct request_queue *q) | |||
816 | } | 871 | } |
817 | } | 872 | } |
818 | 873 | ||
819 | static void blk_add_trace_split(struct request_queue *q, struct bio *bio, | 874 | static void blk_add_trace_split(void *ignore, |
875 | struct request_queue *q, struct bio *bio, | ||
820 | unsigned int pdu) | 876 | unsigned int pdu) |
821 | { | 877 | { |
822 | struct blk_trace *bt = q->blk_trace; | 878 | struct blk_trace *bt = q->blk_trace; |
@@ -832,6 +888,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, | |||
832 | 888 | ||
833 | /** | 889 | /** |
834 | * blk_add_trace_remap - Add a trace for a remap operation | 890 | * blk_add_trace_remap - Add a trace for a remap operation |
891 | * @ignore: trace callback data parameter (not used) | ||
835 | * @q: queue the io is for | 892 | * @q: queue the io is for |
836 | * @bio: the source bio | 893 | * @bio: the source bio |
837 | * @dev: target device | 894 | * @dev: target device |
@@ -842,8 +899,9 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, | |||
842 | * it spans a stripe (or similar). Add a trace for that action. | 899 | * it spans a stripe (or similar). Add a trace for that action. |
843 | * | 900 | * |
844 | **/ | 901 | **/ |
845 | static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, | 902 | static void blk_add_trace_remap(void *ignore, |
846 | dev_t dev, sector_t from) | 903 | struct request_queue *q, struct bio *bio, |
904 | dev_t dev, sector_t from) | ||
847 | { | 905 | { |
848 | struct blk_trace *bt = q->blk_trace; | 906 | struct blk_trace *bt = q->blk_trace; |
849 | struct blk_io_trace_remap r; | 907 | struct blk_io_trace_remap r; |
@@ -861,6 +919,39 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, | |||
861 | } | 919 | } |
862 | 920 | ||
863 | /** | 921 | /** |
922 | * blk_add_trace_rq_remap - Add a trace for a request-remap operation | ||
923 | * @ignore: trace callback data parameter (not used) | ||
924 | * @q: queue the io is for | ||
925 | * @rq: the source request | ||
926 | * @dev: target device | ||
927 | * @from: source sector | ||
928 | * | ||
929 | * Description: | ||
930 | * Device mapper remaps request to other devices. | ||
931 | * Add a trace for that action. | ||
932 | * | ||
933 | **/ | ||
934 | static void blk_add_trace_rq_remap(void *ignore, | ||
935 | struct request_queue *q, | ||
936 | struct request *rq, dev_t dev, | ||
937 | sector_t from) | ||
938 | { | ||
939 | struct blk_trace *bt = q->blk_trace; | ||
940 | struct blk_io_trace_remap r; | ||
941 | |||
942 | if (likely(!bt)) | ||
943 | return; | ||
944 | |||
945 | r.device_from = cpu_to_be32(dev); | ||
946 | r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); | ||
947 | r.sector_from = cpu_to_be64(from); | ||
948 | |||
949 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), | ||
950 | rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, | ||
951 | sizeof(r), &r); | ||
952 | } | ||
953 | |||
954 | /** | ||
864 | * blk_add_driver_data - Add binary message with driver-specific data | 955 | * blk_add_driver_data - Add binary message with driver-specific data |
865 | * @q: queue the io is for | 956 | * @q: queue the io is for |
866 | * @rq: io request | 957 | * @rq: io request |
@@ -880,7 +971,7 @@ void blk_add_driver_data(struct request_queue *q, | |||
880 | if (likely(!bt)) | 971 | if (likely(!bt)) |
881 | return; | 972 | return; |
882 | 973 | ||
883 | if (blk_pc_request(rq)) | 974 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) |
884 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, | 975 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, |
885 | BLK_TA_DRV_DATA, rq->errors, len, data); | 976 | BLK_TA_DRV_DATA, rq->errors, len, data); |
886 | else | 977 | else |
@@ -893,61 +984,64 @@ static void blk_register_tracepoints(void) | |||
893 | { | 984 | { |
894 | int ret; | 985 | int ret; |
895 | 986 | ||
896 | ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); | 987 | ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); |
988 | WARN_ON(ret); | ||
989 | ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); | ||
897 | WARN_ON(ret); | 990 | WARN_ON(ret); |
898 | ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); | 991 | ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
899 | WARN_ON(ret); | 992 | WARN_ON(ret); |
900 | ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); | 993 | ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
901 | WARN_ON(ret); | 994 | WARN_ON(ret); |
902 | ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); | 995 | ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
903 | WARN_ON(ret); | 996 | WARN_ON(ret); |
904 | ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); | 997 | ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); |
905 | WARN_ON(ret); | 998 | WARN_ON(ret); |
906 | ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); | 999 | ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); |
907 | WARN_ON(ret); | 1000 | WARN_ON(ret); |
908 | ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); | 1001 | ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); |
909 | WARN_ON(ret); | 1002 | WARN_ON(ret); |
910 | ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); | 1003 | ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); |
911 | WARN_ON(ret); | 1004 | WARN_ON(ret); |
912 | ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); | 1005 | ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); |
913 | WARN_ON(ret); | 1006 | WARN_ON(ret); |
914 | ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); | 1007 | ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); |
915 | WARN_ON(ret); | 1008 | WARN_ON(ret); |
916 | ret = register_trace_block_getrq(blk_add_trace_getrq); | 1009 | ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); |
917 | WARN_ON(ret); | 1010 | WARN_ON(ret); |
918 | ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); | 1011 | ret = register_trace_block_plug(blk_add_trace_plug, NULL); |
919 | WARN_ON(ret); | 1012 | WARN_ON(ret); |
920 | ret = register_trace_block_plug(blk_add_trace_plug); | 1013 | ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); |
921 | WARN_ON(ret); | 1014 | WARN_ON(ret); |
922 | ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); | 1015 | ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); |
923 | WARN_ON(ret); | 1016 | WARN_ON(ret); |
924 | ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); | 1017 | ret = register_trace_block_split(blk_add_trace_split, NULL); |
925 | WARN_ON(ret); | 1018 | WARN_ON(ret); |
926 | ret = register_trace_block_split(blk_add_trace_split); | 1019 | ret = register_trace_block_remap(blk_add_trace_remap, NULL); |
927 | WARN_ON(ret); | 1020 | WARN_ON(ret); |
928 | ret = register_trace_block_remap(blk_add_trace_remap); | 1021 | ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
929 | WARN_ON(ret); | 1022 | WARN_ON(ret); |
930 | } | 1023 | } |
931 | 1024 | ||
932 | static void blk_unregister_tracepoints(void) | 1025 | static void blk_unregister_tracepoints(void) |
933 | { | 1026 | { |
934 | unregister_trace_block_remap(blk_add_trace_remap); | 1027 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
935 | unregister_trace_block_split(blk_add_trace_split); | 1028 | unregister_trace_block_remap(blk_add_trace_remap, NULL); |
936 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io); | 1029 | unregister_trace_block_split(blk_add_trace_split, NULL); |
937 | unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); | 1030 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); |
938 | unregister_trace_block_plug(blk_add_trace_plug); | 1031 | unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); |
939 | unregister_trace_block_sleeprq(blk_add_trace_sleeprq); | 1032 | unregister_trace_block_plug(blk_add_trace_plug, NULL); |
940 | unregister_trace_block_getrq(blk_add_trace_getrq); | 1033 | unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); |
941 | unregister_trace_block_bio_queue(blk_add_trace_bio_queue); | 1034 | unregister_trace_block_getrq(blk_add_trace_getrq, NULL); |
942 | unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); | 1035 | unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); |
943 | unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); | 1036 | unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); |
944 | unregister_trace_block_bio_complete(blk_add_trace_bio_complete); | 1037 | unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); |
945 | unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); | 1038 | unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); |
946 | unregister_trace_block_rq_complete(blk_add_trace_rq_complete); | 1039 | unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); |
947 | unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); | 1040 | unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
948 | unregister_trace_block_rq_issue(blk_add_trace_rq_issue); | 1041 | unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
949 | unregister_trace_block_rq_insert(blk_add_trace_rq_insert); | 1042 | unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
950 | unregister_trace_block_rq_abort(blk_add_trace_rq_abort); | 1043 | unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); |
1044 | unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); | ||
951 | 1045 | ||
952 | tracepoint_synchronize_unregister(); | 1046 | tracepoint_synchronize_unregister(); |
953 | } | 1047 | } |
@@ -1290,7 +1384,7 @@ out: | |||
1290 | } | 1384 | } |
1291 | 1385 | ||
1292 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, | 1386 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, |
1293 | int flags) | 1387 | int flags, struct trace_event *event) |
1294 | { | 1388 | { |
1295 | return print_one_line(iter, false); | 1389 | return print_one_line(iter, false); |
1296 | } | 1390 | } |
@@ -1312,7 +1406,8 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) | |||
1312 | } | 1406 | } |
1313 | 1407 | ||
1314 | static enum print_line_t | 1408 | static enum print_line_t |
1315 | blk_trace_event_print_binary(struct trace_iterator *iter, int flags) | 1409 | blk_trace_event_print_binary(struct trace_iterator *iter, int flags, |
1410 | struct trace_event *event) | ||
1316 | { | 1411 | { |
1317 | return blk_trace_synthesize_old_trace(iter) ? | 1412 | return blk_trace_synthesize_old_trace(iter) ? |
1318 | TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 1413 | TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
@@ -1350,12 +1445,16 @@ static struct tracer blk_tracer __read_mostly = { | |||
1350 | .set_flag = blk_tracer_set_flag, | 1445 | .set_flag = blk_tracer_set_flag, |
1351 | }; | 1446 | }; |
1352 | 1447 | ||
1353 | static struct trace_event trace_blk_event = { | 1448 | static struct trace_event_functions trace_blk_event_funcs = { |
1354 | .type = TRACE_BLK, | ||
1355 | .trace = blk_trace_event_print, | 1449 | .trace = blk_trace_event_print, |
1356 | .binary = blk_trace_event_print_binary, | 1450 | .binary = blk_trace_event_print_binary, |
1357 | }; | 1451 | }; |
1358 | 1452 | ||
1453 | static struct trace_event trace_blk_event = { | ||
1454 | .type = TRACE_BLK, | ||
1455 | .funcs = &trace_blk_event_funcs, | ||
1456 | }; | ||
1457 | |||
1359 | static int __init init_blk_tracer(void) | 1458 | static int __init init_blk_tracer(void) |
1360 | { | 1459 | { |
1361 | if (!register_ftrace_event(&trace_blk_event)) { | 1460 | if (!register_ftrace_event(&trace_blk_event)) { |
@@ -1550,10 +1649,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
1550 | struct block_device *bdev; | 1649 | struct block_device *bdev; |
1551 | ssize_t ret = -ENXIO; | 1650 | ssize_t ret = -ENXIO; |
1552 | 1651 | ||
1553 | lock_kernel(); | ||
1554 | bdev = bdget(part_devt(p)); | 1652 | bdev = bdget(part_devt(p)); |
1555 | if (bdev == NULL) | 1653 | if (bdev == NULL) |
1556 | goto out_unlock_kernel; | 1654 | goto out; |
1557 | 1655 | ||
1558 | q = blk_trace_get_queue(bdev); | 1656 | q = blk_trace_get_queue(bdev); |
1559 | if (q == NULL) | 1657 | if (q == NULL) |
@@ -1581,8 +1679,7 @@ out_unlock_bdev: | |||
1581 | mutex_unlock(&bdev->bd_mutex); | 1679 | mutex_unlock(&bdev->bd_mutex); |
1582 | out_bdput: | 1680 | out_bdput: |
1583 | bdput(bdev); | 1681 | bdput(bdev); |
1584 | out_unlock_kernel: | 1682 | out: |
1585 | unlock_kernel(); | ||
1586 | return ret; | 1683 | return ret; |
1587 | } | 1684 | } |
1588 | 1685 | ||
@@ -1612,11 +1709,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
1612 | 1709 | ||
1613 | ret = -ENXIO; | 1710 | ret = -ENXIO; |
1614 | 1711 | ||
1615 | lock_kernel(); | ||
1616 | p = dev_to_part(dev); | 1712 | p = dev_to_part(dev); |
1617 | bdev = bdget(part_devt(p)); | 1713 | bdev = bdget(part_devt(p)); |
1618 | if (bdev == NULL) | 1714 | if (bdev == NULL) |
1619 | goto out_unlock_kernel; | 1715 | goto out; |
1620 | 1716 | ||
1621 | q = blk_trace_get_queue(bdev); | 1717 | q = blk_trace_get_queue(bdev); |
1622 | if (q == NULL) | 1718 | if (q == NULL) |
@@ -1651,8 +1747,6 @@ out_unlock_bdev: | |||
1651 | mutex_unlock(&bdev->bd_mutex); | 1747 | mutex_unlock(&bdev->bd_mutex); |
1652 | out_bdput: | 1748 | out_bdput: |
1653 | bdput(bdev); | 1749 | bdput(bdev); |
1654 | out_unlock_kernel: | ||
1655 | unlock_kernel(); | ||
1656 | out: | 1750 | out: |
1657 | return ret ? ret : count; | 1751 | return ret ? ret : count; |
1658 | } | 1752 | } |
@@ -1662,6 +1756,11 @@ int blk_trace_init_sysfs(struct device *dev) | |||
1662 | return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); | 1756 | return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); |
1663 | } | 1757 | } |
1664 | 1758 | ||
1759 | void blk_trace_remove_sysfs(struct device *dev) | ||
1760 | { | ||
1761 | sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); | ||
1762 | } | ||
1763 | |||
1665 | #endif /* CONFIG_BLK_DEV_IO_TRACE */ | 1764 | #endif /* CONFIG_BLK_DEV_IO_TRACE */ |
1666 | 1765 | ||
1667 | #ifdef CONFIG_EVENT_TRACING | 1766 | #ifdef CONFIG_EVENT_TRACING |
@@ -1672,7 +1771,7 @@ void blk_dump_cmd(char *buf, struct request *rq) | |||
1672 | int len = rq->cmd_len; | 1771 | int len = rq->cmd_len; |
1673 | unsigned char *cmd = rq->cmd; | 1772 | unsigned char *cmd = rq->cmd; |
1674 | 1773 | ||
1675 | if (!blk_pc_request(rq)) { | 1774 | if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { |
1676 | buf[0] = '\0'; | 1775 | buf[0] = '\0'; |
1677 | return; | 1776 | return; |
1678 | } | 1777 | } |
@@ -1697,21 +1796,21 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1697 | 1796 | ||
1698 | if (rw & WRITE) | 1797 | if (rw & WRITE) |
1699 | rwbs[i++] = 'W'; | 1798 | rwbs[i++] = 'W'; |
1700 | else if (rw & 1 << BIO_RW_DISCARD) | 1799 | else if (rw & REQ_DISCARD) |
1701 | rwbs[i++] = 'D'; | 1800 | rwbs[i++] = 'D'; |
1702 | else if (bytes) | 1801 | else if (bytes) |
1703 | rwbs[i++] = 'R'; | 1802 | rwbs[i++] = 'R'; |
1704 | else | 1803 | else |
1705 | rwbs[i++] = 'N'; | 1804 | rwbs[i++] = 'N'; |
1706 | 1805 | ||
1707 | if (rw & 1 << BIO_RW_AHEAD) | 1806 | if (rw & REQ_RAHEAD) |
1708 | rwbs[i++] = 'A'; | 1807 | rwbs[i++] = 'A'; |
1709 | if (rw & 1 << BIO_RW_BARRIER) | 1808 | if (rw & REQ_SYNC) |
1710 | rwbs[i++] = 'B'; | ||
1711 | if (rw & 1 << BIO_RW_SYNCIO) | ||
1712 | rwbs[i++] = 'S'; | 1809 | rwbs[i++] = 'S'; |
1713 | if (rw & 1 << BIO_RW_META) | 1810 | if (rw & REQ_META) |
1714 | rwbs[i++] = 'M'; | 1811 | rwbs[i++] = 'M'; |
1812 | if (rw & REQ_SECURE) | ||
1813 | rwbs[i++] = 'E'; | ||
1715 | 1814 | ||
1716 | rwbs[i] = '\0'; | 1815 | rwbs[i] = '\0'; |
1717 | } | 1816 | } |
@@ -1721,8 +1820,11 @@ void blk_fill_rwbs_rq(char *rwbs, struct request *rq) | |||
1721 | int rw = rq->cmd_flags & 0x03; | 1820 | int rw = rq->cmd_flags & 0x03; |
1722 | int bytes; | 1821 | int bytes; |
1723 | 1822 | ||
1724 | if (blk_discard_rq(rq)) | 1823 | if (rq->cmd_flags & REQ_DISCARD) |
1725 | rw |= (1 << BIO_RW_DISCARD); | 1824 | rw |= REQ_DISCARD; |
1825 | |||
1826 | if (rq->cmd_flags & REQ_SECURE) | ||
1827 | rw |= REQ_SECURE; | ||
1726 | 1828 | ||
1727 | bytes = blk_rq_bytes(rq); | 1829 | bytes = blk_rq_bytes(rq); |
1728 | 1830 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3718d55fb4c3..f3dadae83883 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -22,12 +22,13 @@ | |||
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
24 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
25 | #include <linux/kprobes.h> | ||
26 | #include <linux/ftrace.h> | 25 | #include <linux/ftrace.h> |
27 | #include <linux/sysctl.h> | 26 | #include <linux/sysctl.h> |
27 | #include <linux/slab.h> | ||
28 | #include <linux/ctype.h> | 28 | #include <linux/ctype.h> |
29 | #include <linux/list.h> | 29 | #include <linux/list.h> |
30 | #include <linux/hash.h> | 30 | #include <linux/hash.h> |
31 | #include <linux/rcupdate.h> | ||
31 | 32 | ||
32 | #include <trace/events/sched.h> | 33 | #include <trace/events/sched.h> |
33 | 34 | ||
@@ -60,6 +61,13 @@ static int last_ftrace_enabled; | |||
60 | /* Quick disabling of function tracer. */ | 61 | /* Quick disabling of function tracer. */ |
61 | int function_trace_stop; | 62 | int function_trace_stop; |
62 | 63 | ||
64 | /* List for set_ftrace_pid's pids. */ | ||
65 | LIST_HEAD(ftrace_pids); | ||
66 | struct ftrace_pid { | ||
67 | struct list_head list; | ||
68 | struct pid *pid; | ||
69 | }; | ||
70 | |||
63 | /* | 71 | /* |
64 | * ftrace_disabled is set when an anomaly is discovered. | 72 | * ftrace_disabled is set when an anomaly is discovered. |
65 | * ftrace_disabled is much stronger than ftrace_enabled. | 73 | * ftrace_disabled is much stronger than ftrace_enabled. |
@@ -78,18 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; | |||
78 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; | 86 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; |
79 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; | 87 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; |
80 | 88 | ||
89 | /* | ||
90 | * Traverse the ftrace_list, invoking all entries. The reason that we | ||
91 | * can use rcu_dereference_raw() is that elements removed from this list | ||
92 | * are simply leaked, so there is no need to interact with a grace-period | ||
93 | * mechanism. The rcu_dereference_raw() calls are needed to handle | ||
94 | * concurrent insertions into the ftrace_list. | ||
95 | * | ||
96 | * Silly Alpha and silly pointer-speculation compiler optimizations! | ||
97 | */ | ||
81 | static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) | 98 | static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) |
82 | { | 99 | { |
83 | struct ftrace_ops *op = ftrace_list; | 100 | struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/ |
84 | |||
85 | /* in case someone actually ports this to alpha! */ | ||
86 | read_barrier_depends(); | ||
87 | 101 | ||
88 | while (op != &ftrace_list_end) { | 102 | while (op != &ftrace_list_end) { |
89 | /* silly alpha */ | ||
90 | read_barrier_depends(); | ||
91 | op->func(ip, parent_ip); | 103 | op->func(ip, parent_ip); |
92 | op = op->next; | 104 | op = rcu_dereference_raw(op->next); /*see above*/ |
93 | }; | 105 | }; |
94 | } | 106 | } |
95 | 107 | ||
@@ -144,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) | |||
144 | * the ops->next pointer is valid before another CPU sees | 156 | * the ops->next pointer is valid before another CPU sees |
145 | * the ops pointer included into the ftrace_list. | 157 | * the ops pointer included into the ftrace_list. |
146 | */ | 158 | */ |
147 | smp_wmb(); | 159 | rcu_assign_pointer(ftrace_list, ops); |
148 | ftrace_list = ops; | ||
149 | 160 | ||
150 | if (ftrace_enabled) { | 161 | if (ftrace_enabled) { |
151 | ftrace_func_t func; | 162 | ftrace_func_t func; |
@@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) | |||
155 | else | 166 | else |
156 | func = ftrace_list_func; | 167 | func = ftrace_list_func; |
157 | 168 | ||
158 | if (ftrace_pid_trace) { | 169 | if (!list_empty(&ftrace_pids)) { |
159 | set_ftrace_pid_function(func); | 170 | set_ftrace_pid_function(func); |
160 | func = ftrace_pid_func; | 171 | func = ftrace_pid_func; |
161 | } | 172 | } |
@@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
203 | if (ftrace_list->next == &ftrace_list_end) { | 214 | if (ftrace_list->next == &ftrace_list_end) { |
204 | ftrace_func_t func = ftrace_list->func; | 215 | ftrace_func_t func = ftrace_list->func; |
205 | 216 | ||
206 | if (ftrace_pid_trace) { | 217 | if (!list_empty(&ftrace_pids)) { |
207 | set_ftrace_pid_function(func); | 218 | set_ftrace_pid_function(func); |
208 | func = ftrace_pid_func; | 219 | func = ftrace_pid_func; |
209 | } | 220 | } |
@@ -225,9 +236,13 @@ static void ftrace_update_pid_func(void) | |||
225 | if (ftrace_trace_function == ftrace_stub) | 236 | if (ftrace_trace_function == ftrace_stub) |
226 | return; | 237 | return; |
227 | 238 | ||
239 | #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
228 | func = ftrace_trace_function; | 240 | func = ftrace_trace_function; |
241 | #else | ||
242 | func = __ftrace_trace_function; | ||
243 | #endif | ||
229 | 244 | ||
230 | if (ftrace_pid_trace) { | 245 | if (!list_empty(&ftrace_pids)) { |
231 | set_ftrace_pid_function(func); | 246 | set_ftrace_pid_function(func); |
232 | func = ftrace_pid_func; | 247 | func = ftrace_pid_func; |
233 | } else { | 248 | } else { |
@@ -249,6 +264,7 @@ struct ftrace_profile { | |||
249 | unsigned long counter; | 264 | unsigned long counter; |
250 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 265 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
251 | unsigned long long time; | 266 | unsigned long long time; |
267 | unsigned long long time_squared; | ||
252 | #endif | 268 | #endif |
253 | }; | 269 | }; |
254 | 270 | ||
@@ -291,7 +307,9 @@ function_stat_next(void *v, int idx) | |||
291 | pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); | 307 | pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); |
292 | 308 | ||
293 | again: | 309 | again: |
294 | rec++; | 310 | if (idx != 0) |
311 | rec++; | ||
312 | |||
295 | if ((void *)rec >= (void *)&pg->records[pg->index]) { | 313 | if ((void *)rec >= (void *)&pg->records[pg->index]) { |
296 | pg = pg->next; | 314 | pg = pg->next; |
297 | if (!pg) | 315 | if (!pg) |
@@ -349,9 +367,9 @@ static int function_stat_headers(struct seq_file *m) | |||
349 | { | 367 | { |
350 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 368 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
351 | seq_printf(m, " Function " | 369 | seq_printf(m, " Function " |
352 | "Hit Time Avg\n" | 370 | "Hit Time Avg s^2\n" |
353 | " -------- " | 371 | " -------- " |
354 | "--- ---- ---\n"); | 372 | "--- ---- --- ---\n"); |
355 | #else | 373 | #else |
356 | seq_printf(m, " Function Hit\n" | 374 | seq_printf(m, " Function Hit\n" |
357 | " -------- ---\n"); | 375 | " -------- ---\n"); |
@@ -363,11 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v) | |||
363 | { | 381 | { |
364 | struct ftrace_profile *rec = v; | 382 | struct ftrace_profile *rec = v; |
365 | char str[KSYM_SYMBOL_LEN]; | 383 | char str[KSYM_SYMBOL_LEN]; |
384 | int ret = 0; | ||
366 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 385 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
367 | static DEFINE_MUTEX(mutex); | ||
368 | static struct trace_seq s; | 386 | static struct trace_seq s; |
369 | unsigned long long avg; | 387 | unsigned long long avg; |
388 | unsigned long long stddev; | ||
370 | #endif | 389 | #endif |
390 | mutex_lock(&ftrace_profile_lock); | ||
391 | |||
392 | /* we raced with function_profile_reset() */ | ||
393 | if (unlikely(rec->counter == 0)) { | ||
394 | ret = -EBUSY; | ||
395 | goto out; | ||
396 | } | ||
371 | 397 | ||
372 | kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); | 398 | kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); |
373 | seq_printf(m, " %-30.30s %10lu", str, rec->counter); | 399 | seq_printf(m, " %-30.30s %10lu", str, rec->counter); |
@@ -377,17 +403,31 @@ static int function_stat_show(struct seq_file *m, void *v) | |||
377 | avg = rec->time; | 403 | avg = rec->time; |
378 | do_div(avg, rec->counter); | 404 | do_div(avg, rec->counter); |
379 | 405 | ||
380 | mutex_lock(&mutex); | 406 | /* Sample standard deviation (s^2) */ |
407 | if (rec->counter <= 1) | ||
408 | stddev = 0; | ||
409 | else { | ||
410 | stddev = rec->time_squared - rec->counter * avg * avg; | ||
411 | /* | ||
412 | * Divide only 1000 for ns^2 -> us^2 conversion. | ||
413 | * trace_print_graph_duration will divide 1000 again. | ||
414 | */ | ||
415 | do_div(stddev, (rec->counter - 1) * 1000); | ||
416 | } | ||
417 | |||
381 | trace_seq_init(&s); | 418 | trace_seq_init(&s); |
382 | trace_print_graph_duration(rec->time, &s); | 419 | trace_print_graph_duration(rec->time, &s); |
383 | trace_seq_puts(&s, " "); | 420 | trace_seq_puts(&s, " "); |
384 | trace_print_graph_duration(avg, &s); | 421 | trace_print_graph_duration(avg, &s); |
422 | trace_seq_puts(&s, " "); | ||
423 | trace_print_graph_duration(stddev, &s); | ||
385 | trace_print_seq(m, &s); | 424 | trace_print_seq(m, &s); |
386 | mutex_unlock(&mutex); | ||
387 | #endif | 425 | #endif |
388 | seq_putc(m, '\n'); | 426 | seq_putc(m, '\n'); |
427 | out: | ||
428 | mutex_unlock(&ftrace_profile_lock); | ||
389 | 429 | ||
390 | return 0; | 430 | return ret; |
391 | } | 431 | } |
392 | 432 | ||
393 | static void ftrace_profile_reset(struct ftrace_profile_stat *stat) | 433 | static void ftrace_profile_reset(struct ftrace_profile_stat *stat) |
@@ -633,6 +673,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) | |||
633 | if (!stat->hash || !ftrace_profile_enabled) | 673 | if (!stat->hash || !ftrace_profile_enabled) |
634 | goto out; | 674 | goto out; |
635 | 675 | ||
676 | /* If the calltime was zero'd ignore it */ | ||
677 | if (!trace->calltime) | ||
678 | goto out; | ||
679 | |||
636 | calltime = trace->rettime - trace->calltime; | 680 | calltime = trace->rettime - trace->calltime; |
637 | 681 | ||
638 | if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { | 682 | if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { |
@@ -651,8 +695,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) | |||
651 | } | 695 | } |
652 | 696 | ||
653 | rec = ftrace_find_profiled_func(stat, trace->func); | 697 | rec = ftrace_find_profiled_func(stat, trace->func); |
654 | if (rec) | 698 | if (rec) { |
655 | rec->time += calltime; | 699 | rec->time += calltime; |
700 | rec->time_squared += calltime * calltime; | ||
701 | } | ||
656 | 702 | ||
657 | out: | 703 | out: |
658 | local_irq_restore(flags); | 704 | local_irq_restore(flags); |
@@ -734,7 +780,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, | |||
734 | out: | 780 | out: |
735 | mutex_unlock(&ftrace_profile_lock); | 781 | mutex_unlock(&ftrace_profile_lock); |
736 | 782 | ||
737 | filp->f_pos += cnt; | 783 | *ppos += cnt; |
738 | 784 | ||
739 | return cnt; | 785 | return cnt; |
740 | } | 786 | } |
@@ -754,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = { | |||
754 | .open = tracing_open_generic, | 800 | .open = tracing_open_generic, |
755 | .read = ftrace_profile_read, | 801 | .read = ftrace_profile_read, |
756 | .write = ftrace_profile_write, | 802 | .write = ftrace_profile_write, |
803 | .llseek = default_llseek, | ||
757 | }; | 804 | }; |
758 | 805 | ||
759 | /* used to initialize the real stat files */ | 806 | /* used to initialize the real stat files */ |
@@ -766,7 +813,7 @@ static struct tracer_stat function_stats __initdata = { | |||
766 | .stat_show = function_stat_show | 813 | .stat_show = function_stat_show |
767 | }; | 814 | }; |
768 | 815 | ||
769 | static void ftrace_profile_debugfs(struct dentry *d_tracer) | 816 | static __init void ftrace_profile_debugfs(struct dentry *d_tracer) |
770 | { | 817 | { |
771 | struct ftrace_profile_stat *stat; | 818 | struct ftrace_profile_stat *stat; |
772 | struct dentry *entry; | 819 | struct dentry *entry; |
@@ -784,7 +831,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer) | |||
784 | * The files created are permanent, if something happens | 831 | * The files created are permanent, if something happens |
785 | * we still do not free memory. | 832 | * we still do not free memory. |
786 | */ | 833 | */ |
787 | kfree(stat); | ||
788 | WARN(1, | 834 | WARN(1, |
789 | "Could not allocate stat file for cpu %d\n", | 835 | "Could not allocate stat file for cpu %d\n", |
790 | cpu); | 836 | cpu); |
@@ -811,13 +857,11 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer) | |||
811 | } | 857 | } |
812 | 858 | ||
813 | #else /* CONFIG_FUNCTION_PROFILER */ | 859 | #else /* CONFIG_FUNCTION_PROFILER */ |
814 | static void ftrace_profile_debugfs(struct dentry *d_tracer) | 860 | static __init void ftrace_profile_debugfs(struct dentry *d_tracer) |
815 | { | 861 | { |
816 | } | 862 | } |
817 | #endif /* CONFIG_FUNCTION_PROFILER */ | 863 | #endif /* CONFIG_FUNCTION_PROFILER */ |
818 | 864 | ||
819 | /* set when tracing only a pid */ | ||
820 | struct pid *ftrace_pid_trace; | ||
821 | static struct pid * const ftrace_swapper_pid = &init_struct_pid; | 865 | static struct pid * const ftrace_swapper_pid = &init_struct_pid; |
822 | 866 | ||
823 | #ifdef CONFIG_DYNAMIC_FTRACE | 867 | #ifdef CONFIG_DYNAMIC_FTRACE |
@@ -841,10 +885,8 @@ enum { | |||
841 | FTRACE_ENABLE_CALLS = (1 << 0), | 885 | FTRACE_ENABLE_CALLS = (1 << 0), |
842 | FTRACE_DISABLE_CALLS = (1 << 1), | 886 | FTRACE_DISABLE_CALLS = (1 << 1), |
843 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | 887 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), |
844 | FTRACE_ENABLE_MCOUNT = (1 << 3), | 888 | FTRACE_START_FUNC_RET = (1 << 3), |
845 | FTRACE_DISABLE_MCOUNT = (1 << 4), | 889 | FTRACE_STOP_FUNC_RET = (1 << 4), |
846 | FTRACE_START_FUNC_RET = (1 << 5), | ||
847 | FTRACE_STOP_FUNC_RET = (1 << 6), | ||
848 | }; | 890 | }; |
849 | 891 | ||
850 | static int ftrace_filtered; | 892 | static int ftrace_filtered; |
@@ -884,36 +926,6 @@ static struct dyn_ftrace *ftrace_free_records; | |||
884 | } \ | 926 | } \ |
885 | } | 927 | } |
886 | 928 | ||
887 | #ifdef CONFIG_KPROBES | ||
888 | |||
889 | static int frozen_record_count; | ||
890 | |||
891 | static inline void freeze_record(struct dyn_ftrace *rec) | ||
892 | { | ||
893 | if (!(rec->flags & FTRACE_FL_FROZEN)) { | ||
894 | rec->flags |= FTRACE_FL_FROZEN; | ||
895 | frozen_record_count++; | ||
896 | } | ||
897 | } | ||
898 | |||
899 | static inline void unfreeze_record(struct dyn_ftrace *rec) | ||
900 | { | ||
901 | if (rec->flags & FTRACE_FL_FROZEN) { | ||
902 | rec->flags &= ~FTRACE_FL_FROZEN; | ||
903 | frozen_record_count--; | ||
904 | } | ||
905 | } | ||
906 | |||
907 | static inline int record_frozen(struct dyn_ftrace *rec) | ||
908 | { | ||
909 | return rec->flags & FTRACE_FL_FROZEN; | ||
910 | } | ||
911 | #else | ||
912 | # define freeze_record(rec) ({ 0; }) | ||
913 | # define unfreeze_record(rec) ({ 0; }) | ||
914 | # define record_frozen(rec) ({ 0; }) | ||
915 | #endif /* CONFIG_KPROBES */ | ||
916 | |||
917 | static void ftrace_free_rec(struct dyn_ftrace *rec) | 929 | static void ftrace_free_rec(struct dyn_ftrace *rec) |
918 | { | 930 | { |
919 | rec->freelist = ftrace_free_records; | 931 | rec->freelist = ftrace_free_records; |
@@ -1011,75 +1023,54 @@ static void ftrace_bug(int failed, unsigned long ip) | |||
1011 | } | 1023 | } |
1012 | 1024 | ||
1013 | 1025 | ||
1026 | /* Return 1 if the address range is reserved for ftrace */ | ||
1027 | int ftrace_text_reserved(void *start, void *end) | ||
1028 | { | ||
1029 | struct dyn_ftrace *rec; | ||
1030 | struct ftrace_page *pg; | ||
1031 | |||
1032 | do_for_each_ftrace_rec(pg, rec) { | ||
1033 | if (rec->ip <= (unsigned long)end && | ||
1034 | rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start) | ||
1035 | return 1; | ||
1036 | } while_for_each_ftrace_rec(); | ||
1037 | return 0; | ||
1038 | } | ||
1039 | |||
1040 | |||
1014 | static int | 1041 | static int |
1015 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | 1042 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) |
1016 | { | 1043 | { |
1017 | unsigned long ftrace_addr; | 1044 | unsigned long ftrace_addr; |
1018 | unsigned long ip, fl; | 1045 | unsigned long flag = 0UL; |
1019 | 1046 | ||
1020 | ftrace_addr = (unsigned long)FTRACE_ADDR; | 1047 | ftrace_addr = (unsigned long)FTRACE_ADDR; |
1021 | 1048 | ||
1022 | ip = rec->ip; | ||
1023 | |||
1024 | /* | 1049 | /* |
1025 | * If this record is not to be traced and | 1050 | * If this record is not to be traced or we want to disable it, |
1026 | * it is not enabled then do nothing. | 1051 | * then disable it. |
1027 | * | 1052 | * |
1028 | * If this record is not to be traced and | 1053 | * If we want to enable it and filtering is off, then enable it. |
1029 | * it is enabled then disable it. | ||
1030 | * | 1054 | * |
1055 | * If we want to enable it and filtering is on, enable it only if | ||
1056 | * it's filtered | ||
1031 | */ | 1057 | */ |
1032 | if (rec->flags & FTRACE_FL_NOTRACE) { | 1058 | if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) { |
1033 | if (rec->flags & FTRACE_FL_ENABLED) | 1059 | if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER)) |
1034 | rec->flags &= ~FTRACE_FL_ENABLED; | 1060 | flag = FTRACE_FL_ENABLED; |
1035 | else | 1061 | } |
1036 | return 0; | ||
1037 | |||
1038 | } else if (ftrace_filtered && enable) { | ||
1039 | /* | ||
1040 | * Filtering is on: | ||
1041 | */ | ||
1042 | |||
1043 | fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); | ||
1044 | |||
1045 | /* Record is filtered and enabled, do nothing */ | ||
1046 | if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) | ||
1047 | return 0; | ||
1048 | |||
1049 | /* Record is not filtered or enabled, do nothing */ | ||
1050 | if (!fl) | ||
1051 | return 0; | ||
1052 | |||
1053 | /* Record is not filtered but enabled, disable it */ | ||
1054 | if (fl == FTRACE_FL_ENABLED) | ||
1055 | rec->flags &= ~FTRACE_FL_ENABLED; | ||
1056 | else | ||
1057 | /* Otherwise record is filtered but not enabled, enable it */ | ||
1058 | rec->flags |= FTRACE_FL_ENABLED; | ||
1059 | } else { | ||
1060 | /* Disable or not filtered */ | ||
1061 | |||
1062 | if (enable) { | ||
1063 | /* if record is enabled, do nothing */ | ||
1064 | if (rec->flags & FTRACE_FL_ENABLED) | ||
1065 | return 0; | ||
1066 | |||
1067 | rec->flags |= FTRACE_FL_ENABLED; | ||
1068 | |||
1069 | } else { | ||
1070 | 1062 | ||
1071 | /* if record is not enabled, do nothing */ | 1063 | /* If the state of this record hasn't changed, then do nothing */ |
1072 | if (!(rec->flags & FTRACE_FL_ENABLED)) | 1064 | if ((rec->flags & FTRACE_FL_ENABLED) == flag) |
1073 | return 0; | 1065 | return 0; |
1074 | 1066 | ||
1075 | rec->flags &= ~FTRACE_FL_ENABLED; | 1067 | if (flag) { |
1076 | } | 1068 | rec->flags |= FTRACE_FL_ENABLED; |
1069 | return ftrace_make_call(rec, ftrace_addr); | ||
1077 | } | 1070 | } |
1078 | 1071 | ||
1079 | if (rec->flags & FTRACE_FL_ENABLED) | 1072 | rec->flags &= ~FTRACE_FL_ENABLED; |
1080 | return ftrace_make_call(rec, ftrace_addr); | 1073 | return ftrace_make_nop(NULL, rec, ftrace_addr); |
1081 | else | ||
1082 | return ftrace_make_nop(NULL, rec, ftrace_addr); | ||
1083 | } | 1074 | } |
1084 | 1075 | ||
1085 | static void ftrace_replace_code(int enable) | 1076 | static void ftrace_replace_code(int enable) |
@@ -1098,25 +1089,12 @@ static void ftrace_replace_code(int enable) | |||
1098 | !(rec->flags & FTRACE_FL_CONVERTED)) | 1089 | !(rec->flags & FTRACE_FL_CONVERTED)) |
1099 | continue; | 1090 | continue; |
1100 | 1091 | ||
1101 | /* ignore updates to this record's mcount site */ | ||
1102 | if (get_kprobe((void *)rec->ip)) { | ||
1103 | freeze_record(rec); | ||
1104 | continue; | ||
1105 | } else { | ||
1106 | unfreeze_record(rec); | ||
1107 | } | ||
1108 | |||
1109 | failed = __ftrace_replace_code(rec, enable); | 1092 | failed = __ftrace_replace_code(rec, enable); |
1110 | if (failed) { | 1093 | if (failed) { |
1111 | rec->flags |= FTRACE_FL_FAILED; | 1094 | rec->flags |= FTRACE_FL_FAILED; |
1112 | if ((system_state == SYSTEM_BOOTING) || | 1095 | ftrace_bug(failed, rec->ip); |
1113 | !core_kernel_text(rec->ip)) { | 1096 | /* Stop processing */ |
1114 | ftrace_free_rec(rec); | 1097 | return; |
1115 | } else { | ||
1116 | ftrace_bug(failed, rec->ip); | ||
1117 | /* Stop processing */ | ||
1118 | return; | ||
1119 | } | ||
1120 | } | 1098 | } |
1121 | } while_for_each_ftrace_rec(); | 1099 | } while_for_each_ftrace_rec(); |
1122 | } | 1100 | } |
@@ -1247,8 +1225,6 @@ static void ftrace_shutdown(int command) | |||
1247 | 1225 | ||
1248 | static void ftrace_startup_sysctl(void) | 1226 | static void ftrace_startup_sysctl(void) |
1249 | { | 1227 | { |
1250 | int command = FTRACE_ENABLE_MCOUNT; | ||
1251 | |||
1252 | if (unlikely(ftrace_disabled)) | 1228 | if (unlikely(ftrace_disabled)) |
1253 | return; | 1229 | return; |
1254 | 1230 | ||
@@ -1256,23 +1232,17 @@ static void ftrace_startup_sysctl(void) | |||
1256 | saved_ftrace_func = NULL; | 1232 | saved_ftrace_func = NULL; |
1257 | /* ftrace_start_up is true if we want ftrace running */ | 1233 | /* ftrace_start_up is true if we want ftrace running */ |
1258 | if (ftrace_start_up) | 1234 | if (ftrace_start_up) |
1259 | command |= FTRACE_ENABLE_CALLS; | 1235 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); |
1260 | |||
1261 | ftrace_run_update_code(command); | ||
1262 | } | 1236 | } |
1263 | 1237 | ||
1264 | static void ftrace_shutdown_sysctl(void) | 1238 | static void ftrace_shutdown_sysctl(void) |
1265 | { | 1239 | { |
1266 | int command = FTRACE_DISABLE_MCOUNT; | ||
1267 | |||
1268 | if (unlikely(ftrace_disabled)) | 1240 | if (unlikely(ftrace_disabled)) |
1269 | return; | 1241 | return; |
1270 | 1242 | ||
1271 | /* ftrace_start_up is true if ftrace is running */ | 1243 | /* ftrace_start_up is true if ftrace is running */ |
1272 | if (ftrace_start_up) | 1244 | if (ftrace_start_up) |
1273 | command |= FTRACE_DISABLE_CALLS; | 1245 | ftrace_run_update_code(FTRACE_DISABLE_CALLS); |
1274 | |||
1275 | ftrace_run_update_code(command); | ||
1276 | } | 1246 | } |
1277 | 1247 | ||
1278 | static cycle_t ftrace_update_time; | 1248 | static cycle_t ftrace_update_time; |
@@ -1297,12 +1267,34 @@ static int ftrace_update_code(struct module *mod) | |||
1297 | ftrace_new_addrs = p->newlist; | 1267 | ftrace_new_addrs = p->newlist; |
1298 | p->flags = 0L; | 1268 | p->flags = 0L; |
1299 | 1269 | ||
1300 | /* convert record (i.e, patch mcount-call with NOP) */ | 1270 | /* |
1301 | if (ftrace_code_disable(mod, p)) { | 1271 | * Do the initial record convertion from mcount jump |
1302 | p->flags |= FTRACE_FL_CONVERTED; | 1272 | * to the NOP instructions. |
1303 | ftrace_update_cnt++; | 1273 | */ |
1304 | } else | 1274 | if (!ftrace_code_disable(mod, p)) { |
1305 | ftrace_free_rec(p); | 1275 | ftrace_free_rec(p); |
1276 | continue; | ||
1277 | } | ||
1278 | |||
1279 | p->flags |= FTRACE_FL_CONVERTED; | ||
1280 | ftrace_update_cnt++; | ||
1281 | |||
1282 | /* | ||
1283 | * If the tracing is enabled, go ahead and enable the record. | ||
1284 | * | ||
1285 | * The reason not to enable the record immediatelly is the | ||
1286 | * inherent check of ftrace_make_nop/ftrace_make_call for | ||
1287 | * correct previous instructions. Making first the NOP | ||
1288 | * conversion puts the module to the correct state, thus | ||
1289 | * passing the ftrace_make_call check. | ||
1290 | */ | ||
1291 | if (ftrace_start_up) { | ||
1292 | int failed = __ftrace_replace_code(p, 1); | ||
1293 | if (failed) { | ||
1294 | ftrace_bug(failed, p->ip); | ||
1295 | ftrace_free_rec(p); | ||
1296 | } | ||
1297 | } | ||
1306 | } | 1298 | } |
1307 | 1299 | ||
1308 | stop = ftrace_now(raw_smp_processor_id()); | 1300 | stop = ftrace_now(raw_smp_processor_id()); |
@@ -1358,36 +1350,38 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) | |||
1358 | 1350 | ||
1359 | enum { | 1351 | enum { |
1360 | FTRACE_ITER_FILTER = (1 << 0), | 1352 | FTRACE_ITER_FILTER = (1 << 0), |
1361 | FTRACE_ITER_CONT = (1 << 1), | 1353 | FTRACE_ITER_NOTRACE = (1 << 1), |
1362 | FTRACE_ITER_NOTRACE = (1 << 2), | 1354 | FTRACE_ITER_FAILURES = (1 << 2), |
1363 | FTRACE_ITER_FAILURES = (1 << 3), | 1355 | FTRACE_ITER_PRINTALL = (1 << 3), |
1364 | FTRACE_ITER_PRINTALL = (1 << 4), | 1356 | FTRACE_ITER_HASH = (1 << 4), |
1365 | FTRACE_ITER_HASH = (1 << 5), | ||
1366 | }; | 1357 | }; |
1367 | 1358 | ||
1368 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ | 1359 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ |
1369 | 1360 | ||
1370 | struct ftrace_iterator { | 1361 | struct ftrace_iterator { |
1371 | struct ftrace_page *pg; | 1362 | loff_t pos; |
1372 | int hidx; | 1363 | loff_t func_pos; |
1373 | int idx; | 1364 | struct ftrace_page *pg; |
1374 | unsigned flags; | 1365 | struct dyn_ftrace *func; |
1375 | unsigned char buffer[FTRACE_BUFF_MAX+1]; | 1366 | struct ftrace_func_probe *probe; |
1376 | unsigned buffer_idx; | 1367 | struct trace_parser parser; |
1377 | unsigned filtered; | 1368 | int hidx; |
1369 | int idx; | ||
1370 | unsigned flags; | ||
1378 | }; | 1371 | }; |
1379 | 1372 | ||
1380 | static void * | 1373 | static void * |
1381 | t_hash_next(struct seq_file *m, void *v, loff_t *pos) | 1374 | t_hash_next(struct seq_file *m, loff_t *pos) |
1382 | { | 1375 | { |
1383 | struct ftrace_iterator *iter = m->private; | 1376 | struct ftrace_iterator *iter = m->private; |
1384 | struct hlist_node *hnd = v; | 1377 | struct hlist_node *hnd = NULL; |
1385 | struct hlist_head *hhd; | 1378 | struct hlist_head *hhd; |
1386 | 1379 | ||
1387 | WARN_ON(!(iter->flags & FTRACE_ITER_HASH)); | ||
1388 | |||
1389 | (*pos)++; | 1380 | (*pos)++; |
1381 | iter->pos = *pos; | ||
1390 | 1382 | ||
1383 | if (iter->probe) | ||
1384 | hnd = &iter->probe->node; | ||
1391 | retry: | 1385 | retry: |
1392 | if (iter->hidx >= FTRACE_FUNC_HASHSIZE) | 1386 | if (iter->hidx >= FTRACE_FUNC_HASHSIZE) |
1393 | return NULL; | 1387 | return NULL; |
@@ -1410,35 +1404,51 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos) | |||
1410 | } | 1404 | } |
1411 | } | 1405 | } |
1412 | 1406 | ||
1413 | return hnd; | 1407 | if (WARN_ON_ONCE(!hnd)) |
1408 | return NULL; | ||
1409 | |||
1410 | iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node); | ||
1411 | |||
1412 | return iter; | ||
1414 | } | 1413 | } |
1415 | 1414 | ||
1416 | static void *t_hash_start(struct seq_file *m, loff_t *pos) | 1415 | static void *t_hash_start(struct seq_file *m, loff_t *pos) |
1417 | { | 1416 | { |
1418 | struct ftrace_iterator *iter = m->private; | 1417 | struct ftrace_iterator *iter = m->private; |
1419 | void *p = NULL; | 1418 | void *p = NULL; |
1419 | loff_t l; | ||
1420 | 1420 | ||
1421 | if (iter->func_pos > *pos) | ||
1422 | return NULL; | ||
1423 | |||
1424 | iter->hidx = 0; | ||
1425 | for (l = 0; l <= (*pos - iter->func_pos); ) { | ||
1426 | p = t_hash_next(m, &l); | ||
1427 | if (!p) | ||
1428 | break; | ||
1429 | } | ||
1430 | if (!p) | ||
1431 | return NULL; | ||
1432 | |||
1433 | /* Only set this if we have an item */ | ||
1421 | iter->flags |= FTRACE_ITER_HASH; | 1434 | iter->flags |= FTRACE_ITER_HASH; |
1422 | 1435 | ||
1423 | return t_hash_next(m, p, pos); | 1436 | return iter; |
1424 | } | 1437 | } |
1425 | 1438 | ||
1426 | static int t_hash_show(struct seq_file *m, void *v) | 1439 | static int |
1440 | t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) | ||
1427 | { | 1441 | { |
1428 | struct ftrace_func_probe *rec; | 1442 | struct ftrace_func_probe *rec; |
1429 | struct hlist_node *hnd = v; | ||
1430 | char str[KSYM_SYMBOL_LEN]; | ||
1431 | 1443 | ||
1432 | rec = hlist_entry(hnd, struct ftrace_func_probe, node); | 1444 | rec = iter->probe; |
1445 | if (WARN_ON_ONCE(!rec)) | ||
1446 | return -EIO; | ||
1433 | 1447 | ||
1434 | if (rec->ops->print) | 1448 | if (rec->ops->print) |
1435 | return rec->ops->print(m, rec->ip, rec->ops, rec->data); | 1449 | return rec->ops->print(m, rec->ip, rec->ops, rec->data); |
1436 | 1450 | ||
1437 | kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); | 1451 | seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func); |
1438 | seq_printf(m, "%s:", str); | ||
1439 | |||
1440 | kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str); | ||
1441 | seq_printf(m, "%s", str); | ||
1442 | 1452 | ||
1443 | if (rec->data) | 1453 | if (rec->data) |
1444 | seq_printf(m, ":%p", rec->data); | 1454 | seq_printf(m, ":%p", rec->data); |
@@ -1454,12 +1464,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1454 | struct dyn_ftrace *rec = NULL; | 1464 | struct dyn_ftrace *rec = NULL; |
1455 | 1465 | ||
1456 | if (iter->flags & FTRACE_ITER_HASH) | 1466 | if (iter->flags & FTRACE_ITER_HASH) |
1457 | return t_hash_next(m, v, pos); | 1467 | return t_hash_next(m, pos); |
1458 | 1468 | ||
1459 | (*pos)++; | 1469 | (*pos)++; |
1470 | iter->pos = *pos; | ||
1460 | 1471 | ||
1461 | if (iter->flags & FTRACE_ITER_PRINTALL) | 1472 | if (iter->flags & FTRACE_ITER_PRINTALL) |
1462 | return NULL; | 1473 | return t_hash_start(m, pos); |
1463 | 1474 | ||
1464 | retry: | 1475 | retry: |
1465 | if (iter->idx >= iter->pg->index) { | 1476 | if (iter->idx >= iter->pg->index) { |
@@ -1467,8 +1478,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1467 | iter->pg = iter->pg->next; | 1478 | iter->pg = iter->pg->next; |
1468 | iter->idx = 0; | 1479 | iter->idx = 0; |
1469 | goto retry; | 1480 | goto retry; |
1470 | } else { | ||
1471 | iter->idx = -1; | ||
1472 | } | 1481 | } |
1473 | } else { | 1482 | } else { |
1474 | rec = &iter->pg->records[iter->idx++]; | 1483 | rec = &iter->pg->records[iter->idx++]; |
@@ -1490,16 +1499,36 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1490 | } | 1499 | } |
1491 | } | 1500 | } |
1492 | 1501 | ||
1493 | return rec; | 1502 | if (!rec) |
1503 | return t_hash_start(m, pos); | ||
1504 | |||
1505 | iter->func_pos = *pos; | ||
1506 | iter->func = rec; | ||
1507 | |||
1508 | return iter; | ||
1509 | } | ||
1510 | |||
1511 | static void reset_iter_read(struct ftrace_iterator *iter) | ||
1512 | { | ||
1513 | iter->pos = 0; | ||
1514 | iter->func_pos = 0; | ||
1515 | iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH); | ||
1494 | } | 1516 | } |
1495 | 1517 | ||
1496 | static void *t_start(struct seq_file *m, loff_t *pos) | 1518 | static void *t_start(struct seq_file *m, loff_t *pos) |
1497 | { | 1519 | { |
1498 | struct ftrace_iterator *iter = m->private; | 1520 | struct ftrace_iterator *iter = m->private; |
1499 | void *p = NULL; | 1521 | void *p = NULL; |
1522 | loff_t l; | ||
1500 | 1523 | ||
1501 | mutex_lock(&ftrace_lock); | 1524 | mutex_lock(&ftrace_lock); |
1502 | /* | 1525 | /* |
1526 | * If an lseek was done, then reset and start from beginning. | ||
1527 | */ | ||
1528 | if (*pos < iter->pos) | ||
1529 | reset_iter_read(iter); | ||
1530 | |||
1531 | /* | ||
1503 | * For set_ftrace_filter reading, if we have the filter | 1532 | * For set_ftrace_filter reading, if we have the filter |
1504 | * off, we can short cut and just print out that all | 1533 | * off, we can short cut and just print out that all |
1505 | * functions are enabled. | 1534 | * functions are enabled. |
@@ -1508,26 +1537,35 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
1508 | if (*pos > 0) | 1537 | if (*pos > 0) |
1509 | return t_hash_start(m, pos); | 1538 | return t_hash_start(m, pos); |
1510 | iter->flags |= FTRACE_ITER_PRINTALL; | 1539 | iter->flags |= FTRACE_ITER_PRINTALL; |
1511 | (*pos)++; | 1540 | /* reset in case of seek/pread */ |
1541 | iter->flags &= ~FTRACE_ITER_HASH; | ||
1512 | return iter; | 1542 | return iter; |
1513 | } | 1543 | } |
1514 | 1544 | ||
1515 | if (iter->flags & FTRACE_ITER_HASH) | 1545 | if (iter->flags & FTRACE_ITER_HASH) |
1516 | return t_hash_start(m, pos); | 1546 | return t_hash_start(m, pos); |
1517 | 1547 | ||
1518 | if (*pos > 0) { | 1548 | /* |
1519 | if (iter->idx < 0) | 1549 | * Unfortunately, we need to restart at ftrace_pages_start |
1520 | return p; | 1550 | * every time we let go of the ftrace_mutex. This is because |
1521 | (*pos)--; | 1551 | * those pointers can change without the lock. |
1522 | iter->idx--; | 1552 | */ |
1553 | iter->pg = ftrace_pages_start; | ||
1554 | iter->idx = 0; | ||
1555 | for (l = 0; l <= *pos; ) { | ||
1556 | p = t_next(m, p, &l); | ||
1557 | if (!p) | ||
1558 | break; | ||
1523 | } | 1559 | } |
1524 | 1560 | ||
1525 | p = t_next(m, p, pos); | 1561 | if (!p) { |
1562 | if (iter->flags & FTRACE_ITER_FILTER) | ||
1563 | return t_hash_start(m, pos); | ||
1526 | 1564 | ||
1527 | if (!p) | 1565 | return NULL; |
1528 | return t_hash_start(m, pos); | 1566 | } |
1529 | 1567 | ||
1530 | return p; | 1568 | return iter; |
1531 | } | 1569 | } |
1532 | 1570 | ||
1533 | static void t_stop(struct seq_file *m, void *p) | 1571 | static void t_stop(struct seq_file *m, void *p) |
@@ -1538,28 +1576,27 @@ static void t_stop(struct seq_file *m, void *p) | |||
1538 | static int t_show(struct seq_file *m, void *v) | 1576 | static int t_show(struct seq_file *m, void *v) |
1539 | { | 1577 | { |
1540 | struct ftrace_iterator *iter = m->private; | 1578 | struct ftrace_iterator *iter = m->private; |
1541 | struct dyn_ftrace *rec = v; | 1579 | struct dyn_ftrace *rec; |
1542 | char str[KSYM_SYMBOL_LEN]; | ||
1543 | 1580 | ||
1544 | if (iter->flags & FTRACE_ITER_HASH) | 1581 | if (iter->flags & FTRACE_ITER_HASH) |
1545 | return t_hash_show(m, v); | 1582 | return t_hash_show(m, iter); |
1546 | 1583 | ||
1547 | if (iter->flags & FTRACE_ITER_PRINTALL) { | 1584 | if (iter->flags & FTRACE_ITER_PRINTALL) { |
1548 | seq_printf(m, "#### all functions enabled ####\n"); | 1585 | seq_printf(m, "#### all functions enabled ####\n"); |
1549 | return 0; | 1586 | return 0; |
1550 | } | 1587 | } |
1551 | 1588 | ||
1589 | rec = iter->func; | ||
1590 | |||
1552 | if (!rec) | 1591 | if (!rec) |
1553 | return 0; | 1592 | return 0; |
1554 | 1593 | ||
1555 | kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); | 1594 | seq_printf(m, "%ps\n", (void *)rec->ip); |
1556 | |||
1557 | seq_printf(m, "%s\n", str); | ||
1558 | 1595 | ||
1559 | return 0; | 1596 | return 0; |
1560 | } | 1597 | } |
1561 | 1598 | ||
1562 | static struct seq_operations show_ftrace_seq_ops = { | 1599 | static const struct seq_operations show_ftrace_seq_ops = { |
1563 | .start = t_start, | 1600 | .start = t_start, |
1564 | .next = t_next, | 1601 | .next = t_next, |
1565 | .stop = t_stop, | 1602 | .stop = t_stop, |
@@ -1593,17 +1630,6 @@ ftrace_avail_open(struct inode *inode, struct file *file) | |||
1593 | return ret; | 1630 | return ret; |
1594 | } | 1631 | } |
1595 | 1632 | ||
1596 | int ftrace_avail_release(struct inode *inode, struct file *file) | ||
1597 | { | ||
1598 | struct seq_file *m = (struct seq_file *)file->private_data; | ||
1599 | struct ftrace_iterator *iter = m->private; | ||
1600 | |||
1601 | seq_release(inode, file); | ||
1602 | kfree(iter); | ||
1603 | |||
1604 | return 0; | ||
1605 | } | ||
1606 | |||
1607 | static int | 1633 | static int |
1608 | ftrace_failures_open(struct inode *inode, struct file *file) | 1634 | ftrace_failures_open(struct inode *inode, struct file *file) |
1609 | { | 1635 | { |
@@ -1613,8 +1639,8 @@ ftrace_failures_open(struct inode *inode, struct file *file) | |||
1613 | 1639 | ||
1614 | ret = ftrace_avail_open(inode, file); | 1640 | ret = ftrace_avail_open(inode, file); |
1615 | if (!ret) { | 1641 | if (!ret) { |
1616 | m = (struct seq_file *)file->private_data; | 1642 | m = file->private_data; |
1617 | iter = (struct ftrace_iterator *)m->private; | 1643 | iter = m->private; |
1618 | iter->flags = FTRACE_ITER_FAILURES; | 1644 | iter->flags = FTRACE_ITER_FAILURES; |
1619 | } | 1645 | } |
1620 | 1646 | ||
@@ -1652,9 +1678,14 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) | |||
1652 | if (!iter) | 1678 | if (!iter) |
1653 | return -ENOMEM; | 1679 | return -ENOMEM; |
1654 | 1680 | ||
1681 | if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) { | ||
1682 | kfree(iter); | ||
1683 | return -ENOMEM; | ||
1684 | } | ||
1685 | |||
1655 | mutex_lock(&ftrace_regex_lock); | 1686 | mutex_lock(&ftrace_regex_lock); |
1656 | if ((file->f_mode & FMODE_WRITE) && | 1687 | if ((file->f_mode & FMODE_WRITE) && |
1657 | !(file->f_flags & O_APPEND)) | 1688 | (file->f_flags & O_TRUNC)) |
1658 | ftrace_filter_reset(enable); | 1689 | ftrace_filter_reset(enable); |
1659 | 1690 | ||
1660 | if (file->f_mode & FMODE_READ) { | 1691 | if (file->f_mode & FMODE_READ) { |
@@ -1666,8 +1697,10 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) | |||
1666 | if (!ret) { | 1697 | if (!ret) { |
1667 | struct seq_file *m = file->private_data; | 1698 | struct seq_file *m = file->private_data; |
1668 | m->private = iter; | 1699 | m->private = iter; |
1669 | } else | 1700 | } else { |
1701 | trace_parser_put(&iter->parser); | ||
1670 | kfree(iter); | 1702 | kfree(iter); |
1703 | } | ||
1671 | } else | 1704 | } else |
1672 | file->private_data = iter; | 1705 | file->private_data = iter; |
1673 | mutex_unlock(&ftrace_regex_lock); | 1706 | mutex_unlock(&ftrace_regex_lock); |
@@ -1700,64 +1733,10 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin) | |||
1700 | return ret; | 1733 | return ret; |
1701 | } | 1734 | } |
1702 | 1735 | ||
1703 | enum { | ||
1704 | MATCH_FULL, | ||
1705 | MATCH_FRONT_ONLY, | ||
1706 | MATCH_MIDDLE_ONLY, | ||
1707 | MATCH_END_ONLY, | ||
1708 | }; | ||
1709 | |||
1710 | /* | ||
1711 | * (static function - no need for kernel doc) | ||
1712 | * | ||
1713 | * Pass in a buffer containing a glob and this function will | ||
1714 | * set search to point to the search part of the buffer and | ||
1715 | * return the type of search it is (see enum above). | ||
1716 | * This does modify buff. | ||
1717 | * | ||
1718 | * Returns enum type. | ||
1719 | * search returns the pointer to use for comparison. | ||
1720 | * not returns 1 if buff started with a '!' | ||
1721 | * 0 otherwise. | ||
1722 | */ | ||
1723 | static int | ||
1724 | ftrace_setup_glob(char *buff, int len, char **search, int *not) | ||
1725 | { | ||
1726 | int type = MATCH_FULL; | ||
1727 | int i; | ||
1728 | |||
1729 | if (buff[0] == '!') { | ||
1730 | *not = 1; | ||
1731 | buff++; | ||
1732 | len--; | ||
1733 | } else | ||
1734 | *not = 0; | ||
1735 | |||
1736 | *search = buff; | ||
1737 | |||
1738 | for (i = 0; i < len; i++) { | ||
1739 | if (buff[i] == '*') { | ||
1740 | if (!i) { | ||
1741 | *search = buff + 1; | ||
1742 | type = MATCH_END_ONLY; | ||
1743 | } else { | ||
1744 | if (type == MATCH_END_ONLY) | ||
1745 | type = MATCH_MIDDLE_ONLY; | ||
1746 | else | ||
1747 | type = MATCH_FRONT_ONLY; | ||
1748 | buff[i] = 0; | ||
1749 | break; | ||
1750 | } | ||
1751 | } | ||
1752 | } | ||
1753 | |||
1754 | return type; | ||
1755 | } | ||
1756 | |||
1757 | static int ftrace_match(char *str, char *regex, int len, int type) | 1736 | static int ftrace_match(char *str, char *regex, int len, int type) |
1758 | { | 1737 | { |
1759 | int matched = 0; | 1738 | int matched = 0; |
1760 | char *ptr; | 1739 | int slen; |
1761 | 1740 | ||
1762 | switch (type) { | 1741 | switch (type) { |
1763 | case MATCH_FULL: | 1742 | case MATCH_FULL: |
@@ -1773,8 +1752,8 @@ static int ftrace_match(char *str, char *regex, int len, int type) | |||
1773 | matched = 1; | 1752 | matched = 1; |
1774 | break; | 1753 | break; |
1775 | case MATCH_END_ONLY: | 1754 | case MATCH_END_ONLY: |
1776 | ptr = strstr(str, regex); | 1755 | slen = strlen(str); |
1777 | if (ptr && (ptr[len] == 0)) | 1756 | if (slen >= len && memcmp(str + slen - len, regex, len) == 0) |
1778 | matched = 1; | 1757 | matched = 1; |
1779 | break; | 1758 | break; |
1780 | } | 1759 | } |
@@ -1791,7 +1770,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type) | |||
1791 | return ftrace_match(str, regex, len, type); | 1770 | return ftrace_match(str, regex, len, type); |
1792 | } | 1771 | } |
1793 | 1772 | ||
1794 | static void ftrace_match_records(char *buff, int len, int enable) | 1773 | static int ftrace_match_records(char *buff, int len, int enable) |
1795 | { | 1774 | { |
1796 | unsigned int search_len; | 1775 | unsigned int search_len; |
1797 | struct ftrace_page *pg; | 1776 | struct ftrace_page *pg; |
@@ -1800,9 +1779,10 @@ static void ftrace_match_records(char *buff, int len, int enable) | |||
1800 | char *search; | 1779 | char *search; |
1801 | int type; | 1780 | int type; |
1802 | int not; | 1781 | int not; |
1782 | int found = 0; | ||
1803 | 1783 | ||
1804 | flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; | 1784 | flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; |
1805 | type = ftrace_setup_glob(buff, len, &search, ¬); | 1785 | type = filter_parse_regex(buff, len, &search, ¬); |
1806 | 1786 | ||
1807 | search_len = strlen(search); | 1787 | search_len = strlen(search); |
1808 | 1788 | ||
@@ -1817,6 +1797,7 @@ static void ftrace_match_records(char *buff, int len, int enable) | |||
1817 | rec->flags &= ~flag; | 1797 | rec->flags &= ~flag; |
1818 | else | 1798 | else |
1819 | rec->flags |= flag; | 1799 | rec->flags |= flag; |
1800 | found = 1; | ||
1820 | } | 1801 | } |
1821 | /* | 1802 | /* |
1822 | * Only enable filtering if we have a function that | 1803 | * Only enable filtering if we have a function that |
@@ -1826,6 +1807,8 @@ static void ftrace_match_records(char *buff, int len, int enable) | |||
1826 | ftrace_filtered = 1; | 1807 | ftrace_filtered = 1; |
1827 | } while_for_each_ftrace_rec(); | 1808 | } while_for_each_ftrace_rec(); |
1828 | mutex_unlock(&ftrace_lock); | 1809 | mutex_unlock(&ftrace_lock); |
1810 | |||
1811 | return found; | ||
1829 | } | 1812 | } |
1830 | 1813 | ||
1831 | static int | 1814 | static int |
@@ -1847,7 +1830,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod, | |||
1847 | return 1; | 1830 | return 1; |
1848 | } | 1831 | } |
1849 | 1832 | ||
1850 | static void ftrace_match_module_records(char *buff, char *mod, int enable) | 1833 | static int ftrace_match_module_records(char *buff, char *mod, int enable) |
1851 | { | 1834 | { |
1852 | unsigned search_len = 0; | 1835 | unsigned search_len = 0; |
1853 | struct ftrace_page *pg; | 1836 | struct ftrace_page *pg; |
@@ -1856,6 +1839,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) | |||
1856 | char *search = buff; | 1839 | char *search = buff; |
1857 | unsigned long flag; | 1840 | unsigned long flag; |
1858 | int not = 0; | 1841 | int not = 0; |
1842 | int found = 0; | ||
1859 | 1843 | ||
1860 | flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; | 1844 | flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; |
1861 | 1845 | ||
@@ -1870,7 +1854,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) | |||
1870 | } | 1854 | } |
1871 | 1855 | ||
1872 | if (strlen(buff)) { | 1856 | if (strlen(buff)) { |
1873 | type = ftrace_setup_glob(buff, strlen(buff), &search, ¬); | 1857 | type = filter_parse_regex(buff, strlen(buff), &search, ¬); |
1874 | search_len = strlen(search); | 1858 | search_len = strlen(search); |
1875 | } | 1859 | } |
1876 | 1860 | ||
@@ -1886,12 +1870,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) | |||
1886 | rec->flags &= ~flag; | 1870 | rec->flags &= ~flag; |
1887 | else | 1871 | else |
1888 | rec->flags |= flag; | 1872 | rec->flags |= flag; |
1873 | found = 1; | ||
1889 | } | 1874 | } |
1890 | if (enable && (rec->flags & FTRACE_FL_FILTER)) | 1875 | if (enable && (rec->flags & FTRACE_FL_FILTER)) |
1891 | ftrace_filtered = 1; | 1876 | ftrace_filtered = 1; |
1892 | 1877 | ||
1893 | } while_for_each_ftrace_rec(); | 1878 | } while_for_each_ftrace_rec(); |
1894 | mutex_unlock(&ftrace_lock); | 1879 | mutex_unlock(&ftrace_lock); |
1880 | |||
1881 | return found; | ||
1895 | } | 1882 | } |
1896 | 1883 | ||
1897 | /* | 1884 | /* |
@@ -1920,8 +1907,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable) | |||
1920 | if (!strlen(mod)) | 1907 | if (!strlen(mod)) |
1921 | return -EINVAL; | 1908 | return -EINVAL; |
1922 | 1909 | ||
1923 | ftrace_match_module_records(func, mod, enable); | 1910 | if (ftrace_match_module_records(func, mod, enable)) |
1924 | return 0; | 1911 | return 0; |
1912 | return -EINVAL; | ||
1925 | } | 1913 | } |
1926 | 1914 | ||
1927 | static struct ftrace_func_command ftrace_mod_cmd = { | 1915 | static struct ftrace_func_command ftrace_mod_cmd = { |
@@ -1942,7 +1930,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip) | |||
1942 | struct hlist_head *hhd; | 1930 | struct hlist_head *hhd; |
1943 | struct hlist_node *n; | 1931 | struct hlist_node *n; |
1944 | unsigned long key; | 1932 | unsigned long key; |
1945 | int resched; | ||
1946 | 1933 | ||
1947 | key = hash_long(ip, FTRACE_HASH_BITS); | 1934 | key = hash_long(ip, FTRACE_HASH_BITS); |
1948 | 1935 | ||
@@ -1956,12 +1943,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip) | |||
1956 | * period. This syncs the hash iteration and freeing of items | 1943 | * period. This syncs the hash iteration and freeing of items |
1957 | * on the hash. rcu_read_lock is too dangerous here. | 1944 | * on the hash. rcu_read_lock is too dangerous here. |
1958 | */ | 1945 | */ |
1959 | resched = ftrace_preempt_disable(); | 1946 | preempt_disable_notrace(); |
1960 | hlist_for_each_entry_rcu(entry, n, hhd, node) { | 1947 | hlist_for_each_entry_rcu(entry, n, hhd, node) { |
1961 | if (entry->ip == ip) | 1948 | if (entry->ip == ip) |
1962 | entry->ops->func(ip, parent_ip, &entry->data); | 1949 | entry->ops->func(ip, parent_ip, &entry->data); |
1963 | } | 1950 | } |
1964 | ftrace_preempt_enable(resched); | 1951 | preempt_enable_notrace(); |
1965 | } | 1952 | } |
1966 | 1953 | ||
1967 | static struct ftrace_ops trace_probe_ops __read_mostly = | 1954 | static struct ftrace_ops trace_probe_ops __read_mostly = |
@@ -2035,7 +2022,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
2035 | int count = 0; | 2022 | int count = 0; |
2036 | char *search; | 2023 | char *search; |
2037 | 2024 | ||
2038 | type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); | 2025 | type = filter_parse_regex(glob, strlen(glob), &search, ¬); |
2039 | len = strlen(search); | 2026 | len = strlen(search); |
2040 | 2027 | ||
2041 | /* we do not support '!' for function probes */ | 2028 | /* we do not support '!' for function probes */ |
@@ -2107,12 +2094,12 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
2107 | int i, len = 0; | 2094 | int i, len = 0; |
2108 | char *search; | 2095 | char *search; |
2109 | 2096 | ||
2110 | if (glob && (strcmp(glob, "*") || !strlen(glob))) | 2097 | if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) |
2111 | glob = NULL; | 2098 | glob = NULL; |
2112 | else { | 2099 | else if (glob) { |
2113 | int not; | 2100 | int not; |
2114 | 2101 | ||
2115 | type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); | 2102 | type = filter_parse_regex(glob, strlen(glob), &search, ¬); |
2116 | len = strlen(search); | 2103 | len = strlen(search); |
2117 | 2104 | ||
2118 | /* we do not support '!' for function probes */ | 2105 | /* we do not support '!' for function probes */ |
@@ -2218,8 +2205,9 @@ static int ftrace_process_regex(char *buff, int len, int enable) | |||
2218 | func = strsep(&next, ":"); | 2205 | func = strsep(&next, ":"); |
2219 | 2206 | ||
2220 | if (!next) { | 2207 | if (!next) { |
2221 | ftrace_match_records(func, len, enable); | 2208 | if (ftrace_match_records(func, len, enable)) |
2222 | return 0; | 2209 | return 0; |
2210 | return ret; | ||
2223 | } | 2211 | } |
2224 | 2212 | ||
2225 | /* command found */ | 2213 | /* command found */ |
@@ -2244,11 +2232,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, | |||
2244 | size_t cnt, loff_t *ppos, int enable) | 2232 | size_t cnt, loff_t *ppos, int enable) |
2245 | { | 2233 | { |
2246 | struct ftrace_iterator *iter; | 2234 | struct ftrace_iterator *iter; |
2247 | char ch; | 2235 | struct trace_parser *parser; |
2248 | size_t read = 0; | 2236 | ssize_t ret, read; |
2249 | ssize_t ret; | ||
2250 | 2237 | ||
2251 | if (!cnt || cnt < 0) | 2238 | if (!cnt) |
2252 | return 0; | 2239 | return 0; |
2253 | 2240 | ||
2254 | mutex_lock(&ftrace_regex_lock); | 2241 | mutex_lock(&ftrace_regex_lock); |
@@ -2259,66 +2246,20 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, | |||
2259 | } else | 2246 | } else |
2260 | iter = file->private_data; | 2247 | iter = file->private_data; |
2261 | 2248 | ||
2262 | if (!*ppos) { | 2249 | parser = &iter->parser; |
2263 | iter->flags &= ~FTRACE_ITER_CONT; | 2250 | read = trace_get_user(parser, ubuf, cnt, ppos); |
2264 | iter->buffer_idx = 0; | ||
2265 | } | ||
2266 | |||
2267 | ret = get_user(ch, ubuf++); | ||
2268 | if (ret) | ||
2269 | goto out; | ||
2270 | read++; | ||
2271 | cnt--; | ||
2272 | |||
2273 | if (!(iter->flags & ~FTRACE_ITER_CONT)) { | ||
2274 | /* skip white space */ | ||
2275 | while (cnt && isspace(ch)) { | ||
2276 | ret = get_user(ch, ubuf++); | ||
2277 | if (ret) | ||
2278 | goto out; | ||
2279 | read++; | ||
2280 | cnt--; | ||
2281 | } | ||
2282 | 2251 | ||
2283 | if (isspace(ch)) { | 2252 | if (read >= 0 && trace_parser_loaded(parser) && |
2284 | file->f_pos += read; | 2253 | !trace_parser_cont(parser)) { |
2285 | ret = read; | 2254 | ret = ftrace_process_regex(parser->buffer, |
2286 | goto out; | 2255 | parser->idx, enable); |
2287 | } | 2256 | trace_parser_clear(parser); |
2288 | |||
2289 | iter->buffer_idx = 0; | ||
2290 | } | ||
2291 | |||
2292 | while (cnt && !isspace(ch)) { | ||
2293 | if (iter->buffer_idx < FTRACE_BUFF_MAX) | ||
2294 | iter->buffer[iter->buffer_idx++] = ch; | ||
2295 | else { | ||
2296 | ret = -EINVAL; | ||
2297 | goto out; | ||
2298 | } | ||
2299 | ret = get_user(ch, ubuf++); | ||
2300 | if (ret) | 2257 | if (ret) |
2301 | goto out; | 2258 | goto out_unlock; |
2302 | read++; | ||
2303 | cnt--; | ||
2304 | } | 2259 | } |
2305 | 2260 | ||
2306 | if (isspace(ch)) { | ||
2307 | iter->filtered++; | ||
2308 | iter->buffer[iter->buffer_idx] = 0; | ||
2309 | ret = ftrace_process_regex(iter->buffer, | ||
2310 | iter->buffer_idx, enable); | ||
2311 | if (ret) | ||
2312 | goto out; | ||
2313 | iter->buffer_idx = 0; | ||
2314 | } else | ||
2315 | iter->flags |= FTRACE_ITER_CONT; | ||
2316 | |||
2317 | |||
2318 | file->f_pos += read; | ||
2319 | |||
2320 | ret = read; | 2261 | ret = read; |
2321 | out: | 2262 | out_unlock: |
2322 | mutex_unlock(&ftrace_regex_lock); | 2263 | mutex_unlock(&ftrace_regex_lock); |
2323 | 2264 | ||
2324 | return ret; | 2265 | return ret; |
@@ -2402,6 +2343,34 @@ static int __init set_ftrace_filter(char *str) | |||
2402 | } | 2343 | } |
2403 | __setup("ftrace_filter=", set_ftrace_filter); | 2344 | __setup("ftrace_filter=", set_ftrace_filter); |
2404 | 2345 | ||
2346 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
2347 | static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; | ||
2348 | static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); | ||
2349 | |||
2350 | static int __init set_graph_function(char *str) | ||
2351 | { | ||
2352 | strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); | ||
2353 | return 1; | ||
2354 | } | ||
2355 | __setup("ftrace_graph_filter=", set_graph_function); | ||
2356 | |||
2357 | static void __init set_ftrace_early_graph(char *buf) | ||
2358 | { | ||
2359 | int ret; | ||
2360 | char *func; | ||
2361 | |||
2362 | while (buf) { | ||
2363 | func = strsep(&buf, ","); | ||
2364 | /* we allow only one expression at a time */ | ||
2365 | ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, | ||
2366 | func); | ||
2367 | if (ret) | ||
2368 | printk(KERN_DEBUG "ftrace: function %s not " | ||
2369 | "traceable\n", func); | ||
2370 | } | ||
2371 | } | ||
2372 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
2373 | |||
2405 | static void __init set_ftrace_early_filter(char *buf, int enable) | 2374 | static void __init set_ftrace_early_filter(char *buf, int enable) |
2406 | { | 2375 | { |
2407 | char *func; | 2376 | char *func; |
@@ -2418,6 +2387,10 @@ static void __init set_ftrace_early_filters(void) | |||
2418 | set_ftrace_early_filter(ftrace_filter_buf, 1); | 2387 | set_ftrace_early_filter(ftrace_filter_buf, 1); |
2419 | if (ftrace_notrace_buf[0]) | 2388 | if (ftrace_notrace_buf[0]) |
2420 | set_ftrace_early_filter(ftrace_notrace_buf, 0); | 2389 | set_ftrace_early_filter(ftrace_notrace_buf, 0); |
2390 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
2391 | if (ftrace_graph_buf[0]) | ||
2392 | set_ftrace_early_graph(ftrace_graph_buf); | ||
2393 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
2421 | } | 2394 | } |
2422 | 2395 | ||
2423 | static int | 2396 | static int |
@@ -2425,6 +2398,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) | |||
2425 | { | 2398 | { |
2426 | struct seq_file *m = (struct seq_file *)file->private_data; | 2399 | struct seq_file *m = (struct seq_file *)file->private_data; |
2427 | struct ftrace_iterator *iter; | 2400 | struct ftrace_iterator *iter; |
2401 | struct trace_parser *parser; | ||
2428 | 2402 | ||
2429 | mutex_lock(&ftrace_regex_lock); | 2403 | mutex_lock(&ftrace_regex_lock); |
2430 | if (file->f_mode & FMODE_READ) { | 2404 | if (file->f_mode & FMODE_READ) { |
@@ -2434,10 +2408,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) | |||
2434 | } else | 2408 | } else |
2435 | iter = file->private_data; | 2409 | iter = file->private_data; |
2436 | 2410 | ||
2437 | if (iter->buffer_idx) { | 2411 | parser = &iter->parser; |
2438 | iter->filtered++; | 2412 | if (trace_parser_loaded(parser)) { |
2439 | iter->buffer[iter->buffer_idx] = 0; | 2413 | parser->buffer[parser->idx] = 0; |
2440 | ftrace_match_records(iter->buffer, iter->buffer_idx, enable); | 2414 | ftrace_match_records(parser->buffer, parser->idx, enable); |
2441 | } | 2415 | } |
2442 | 2416 | ||
2443 | mutex_lock(&ftrace_lock); | 2417 | mutex_lock(&ftrace_lock); |
@@ -2445,7 +2419,9 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) | |||
2445 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 2419 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); |
2446 | mutex_unlock(&ftrace_lock); | 2420 | mutex_unlock(&ftrace_lock); |
2447 | 2421 | ||
2422 | trace_parser_put(parser); | ||
2448 | kfree(iter); | 2423 | kfree(iter); |
2424 | |||
2449 | mutex_unlock(&ftrace_regex_lock); | 2425 | mutex_unlock(&ftrace_regex_lock); |
2450 | return 0; | 2426 | return 0; |
2451 | } | 2427 | } |
@@ -2466,14 +2442,14 @@ static const struct file_operations ftrace_avail_fops = { | |||
2466 | .open = ftrace_avail_open, | 2442 | .open = ftrace_avail_open, |
2467 | .read = seq_read, | 2443 | .read = seq_read, |
2468 | .llseek = seq_lseek, | 2444 | .llseek = seq_lseek, |
2469 | .release = ftrace_avail_release, | 2445 | .release = seq_release_private, |
2470 | }; | 2446 | }; |
2471 | 2447 | ||
2472 | static const struct file_operations ftrace_failures_fops = { | 2448 | static const struct file_operations ftrace_failures_fops = { |
2473 | .open = ftrace_failures_open, | 2449 | .open = ftrace_failures_open, |
2474 | .read = seq_read, | 2450 | .read = seq_read, |
2475 | .llseek = seq_lseek, | 2451 | .llseek = seq_lseek, |
2476 | .release = ftrace_avail_release, | 2452 | .release = seq_release_private, |
2477 | }; | 2453 | }; |
2478 | 2454 | ||
2479 | static const struct file_operations ftrace_filter_fops = { | 2455 | static const struct file_operations ftrace_filter_fops = { |
@@ -2497,35 +2473,33 @@ static const struct file_operations ftrace_notrace_fops = { | |||
2497 | static DEFINE_MUTEX(graph_lock); | 2473 | static DEFINE_MUTEX(graph_lock); |
2498 | 2474 | ||
2499 | int ftrace_graph_count; | 2475 | int ftrace_graph_count; |
2476 | int ftrace_graph_filter_enabled; | ||
2500 | unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; | 2477 | unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; |
2501 | 2478 | ||
2502 | static void * | 2479 | static void * |
2503 | g_next(struct seq_file *m, void *v, loff_t *pos) | 2480 | __g_next(struct seq_file *m, loff_t *pos) |
2504 | { | 2481 | { |
2505 | unsigned long *array = m->private; | 2482 | if (*pos >= ftrace_graph_count) |
2506 | int index = *pos; | ||
2507 | |||
2508 | (*pos)++; | ||
2509 | |||
2510 | if (index >= ftrace_graph_count) | ||
2511 | return NULL; | 2483 | return NULL; |
2484 | return &ftrace_graph_funcs[*pos]; | ||
2485 | } | ||
2512 | 2486 | ||
2513 | return &array[index]; | 2487 | static void * |
2488 | g_next(struct seq_file *m, void *v, loff_t *pos) | ||
2489 | { | ||
2490 | (*pos)++; | ||
2491 | return __g_next(m, pos); | ||
2514 | } | 2492 | } |
2515 | 2493 | ||
2516 | static void *g_start(struct seq_file *m, loff_t *pos) | 2494 | static void *g_start(struct seq_file *m, loff_t *pos) |
2517 | { | 2495 | { |
2518 | void *p = NULL; | ||
2519 | |||
2520 | mutex_lock(&graph_lock); | 2496 | mutex_lock(&graph_lock); |
2521 | 2497 | ||
2522 | /* Nothing, tell g_show to print all functions are enabled */ | 2498 | /* Nothing, tell g_show to print all functions are enabled */ |
2523 | if (!ftrace_graph_count && !*pos) | 2499 | if (!ftrace_graph_filter_enabled && !*pos) |
2524 | return (void *)1; | 2500 | return (void *)1; |
2525 | 2501 | ||
2526 | p = g_next(m, p, pos); | 2502 | return __g_next(m, pos); |
2527 | |||
2528 | return p; | ||
2529 | } | 2503 | } |
2530 | 2504 | ||
2531 | static void g_stop(struct seq_file *m, void *p) | 2505 | static void g_stop(struct seq_file *m, void *p) |
@@ -2536,7 +2510,6 @@ static void g_stop(struct seq_file *m, void *p) | |||
2536 | static int g_show(struct seq_file *m, void *v) | 2510 | static int g_show(struct seq_file *m, void *v) |
2537 | { | 2511 | { |
2538 | unsigned long *ptr = v; | 2512 | unsigned long *ptr = v; |
2539 | char str[KSYM_SYMBOL_LEN]; | ||
2540 | 2513 | ||
2541 | if (!ptr) | 2514 | if (!ptr) |
2542 | return 0; | 2515 | return 0; |
@@ -2546,14 +2519,12 @@ static int g_show(struct seq_file *m, void *v) | |||
2546 | return 0; | 2519 | return 0; |
2547 | } | 2520 | } |
2548 | 2521 | ||
2549 | kallsyms_lookup(*ptr, NULL, NULL, NULL, str); | 2522 | seq_printf(m, "%ps\n", (void *)*ptr); |
2550 | |||
2551 | seq_printf(m, "%s\n", str); | ||
2552 | 2523 | ||
2553 | return 0; | 2524 | return 0; |
2554 | } | 2525 | } |
2555 | 2526 | ||
2556 | static struct seq_operations ftrace_graph_seq_ops = { | 2527 | static const struct seq_operations ftrace_graph_seq_ops = { |
2557 | .start = g_start, | 2528 | .start = g_start, |
2558 | .next = g_next, | 2529 | .next = g_next, |
2559 | .stop = g_stop, | 2530 | .stop = g_stop, |
@@ -2570,31 +2541,34 @@ ftrace_graph_open(struct inode *inode, struct file *file) | |||
2570 | 2541 | ||
2571 | mutex_lock(&graph_lock); | 2542 | mutex_lock(&graph_lock); |
2572 | if ((file->f_mode & FMODE_WRITE) && | 2543 | if ((file->f_mode & FMODE_WRITE) && |
2573 | !(file->f_flags & O_APPEND)) { | 2544 | (file->f_flags & O_TRUNC)) { |
2545 | ftrace_graph_filter_enabled = 0; | ||
2574 | ftrace_graph_count = 0; | 2546 | ftrace_graph_count = 0; |
2575 | memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); | 2547 | memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); |
2576 | } | 2548 | } |
2549 | mutex_unlock(&graph_lock); | ||
2577 | 2550 | ||
2578 | if (file->f_mode & FMODE_READ) { | 2551 | if (file->f_mode & FMODE_READ) |
2579 | ret = seq_open(file, &ftrace_graph_seq_ops); | 2552 | ret = seq_open(file, &ftrace_graph_seq_ops); |
2580 | if (!ret) { | ||
2581 | struct seq_file *m = file->private_data; | ||
2582 | m->private = ftrace_graph_funcs; | ||
2583 | } | ||
2584 | } else | ||
2585 | file->private_data = ftrace_graph_funcs; | ||
2586 | mutex_unlock(&graph_lock); | ||
2587 | 2553 | ||
2588 | return ret; | 2554 | return ret; |
2589 | } | 2555 | } |
2590 | 2556 | ||
2591 | static int | 2557 | static int |
2558 | ftrace_graph_release(struct inode *inode, struct file *file) | ||
2559 | { | ||
2560 | if (file->f_mode & FMODE_READ) | ||
2561 | seq_release(inode, file); | ||
2562 | return 0; | ||
2563 | } | ||
2564 | |||
2565 | static int | ||
2592 | ftrace_set_func(unsigned long *array, int *idx, char *buffer) | 2566 | ftrace_set_func(unsigned long *array, int *idx, char *buffer) |
2593 | { | 2567 | { |
2594 | struct dyn_ftrace *rec; | 2568 | struct dyn_ftrace *rec; |
2595 | struct ftrace_page *pg; | 2569 | struct ftrace_page *pg; |
2596 | int search_len; | 2570 | int search_len; |
2597 | int found = 0; | 2571 | int fail = 1; |
2598 | int type, not; | 2572 | int type, not; |
2599 | char *search; | 2573 | char *search; |
2600 | bool exists; | 2574 | bool exists; |
@@ -2604,122 +2578,99 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) | |||
2604 | return -ENODEV; | 2578 | return -ENODEV; |
2605 | 2579 | ||
2606 | /* decode regex */ | 2580 | /* decode regex */ |
2607 | type = ftrace_setup_glob(buffer, strlen(buffer), &search, ¬); | 2581 | type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); |
2608 | if (not) | 2582 | if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS) |
2609 | return -EINVAL; | 2583 | return -EBUSY; |
2610 | 2584 | ||
2611 | search_len = strlen(search); | 2585 | search_len = strlen(search); |
2612 | 2586 | ||
2613 | mutex_lock(&ftrace_lock); | 2587 | mutex_lock(&ftrace_lock); |
2614 | do_for_each_ftrace_rec(pg, rec) { | 2588 | do_for_each_ftrace_rec(pg, rec) { |
2615 | 2589 | ||
2616 | if (*idx >= FTRACE_GRAPH_MAX_FUNCS) | ||
2617 | break; | ||
2618 | |||
2619 | if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) | 2590 | if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) |
2620 | continue; | 2591 | continue; |
2621 | 2592 | ||
2622 | if (ftrace_match_record(rec, search, search_len, type)) { | 2593 | if (ftrace_match_record(rec, search, search_len, type)) { |
2623 | /* ensure it is not already in the array */ | 2594 | /* if it is in the array */ |
2624 | exists = false; | 2595 | exists = false; |
2625 | for (i = 0; i < *idx; i++) | 2596 | for (i = 0; i < *idx; i++) { |
2626 | if (array[i] == rec->ip) { | 2597 | if (array[i] == rec->ip) { |
2627 | exists = true; | 2598 | exists = true; |
2628 | break; | 2599 | break; |
2629 | } | 2600 | } |
2630 | if (!exists) { | 2601 | } |
2631 | array[(*idx)++] = rec->ip; | 2602 | |
2632 | found = 1; | 2603 | if (!not) { |
2604 | fail = 0; | ||
2605 | if (!exists) { | ||
2606 | array[(*idx)++] = rec->ip; | ||
2607 | if (*idx >= FTRACE_GRAPH_MAX_FUNCS) | ||
2608 | goto out; | ||
2609 | } | ||
2610 | } else { | ||
2611 | if (exists) { | ||
2612 | array[i] = array[--(*idx)]; | ||
2613 | array[*idx] = 0; | ||
2614 | fail = 0; | ||
2615 | } | ||
2633 | } | 2616 | } |
2634 | } | 2617 | } |
2635 | } while_for_each_ftrace_rec(); | 2618 | } while_for_each_ftrace_rec(); |
2636 | 2619 | out: | |
2637 | mutex_unlock(&ftrace_lock); | 2620 | mutex_unlock(&ftrace_lock); |
2638 | 2621 | ||
2639 | return found ? 0 : -EINVAL; | 2622 | if (fail) |
2623 | return -EINVAL; | ||
2624 | |||
2625 | ftrace_graph_filter_enabled = 1; | ||
2626 | return 0; | ||
2640 | } | 2627 | } |
2641 | 2628 | ||
2642 | static ssize_t | 2629 | static ssize_t |
2643 | ftrace_graph_write(struct file *file, const char __user *ubuf, | 2630 | ftrace_graph_write(struct file *file, const char __user *ubuf, |
2644 | size_t cnt, loff_t *ppos) | 2631 | size_t cnt, loff_t *ppos) |
2645 | { | 2632 | { |
2646 | unsigned char buffer[FTRACE_BUFF_MAX+1]; | 2633 | struct trace_parser parser; |
2647 | unsigned long *array; | 2634 | ssize_t read, ret; |
2648 | size_t read = 0; | ||
2649 | ssize_t ret; | ||
2650 | int index = 0; | ||
2651 | char ch; | ||
2652 | 2635 | ||
2653 | if (!cnt || cnt < 0) | 2636 | if (!cnt) |
2654 | return 0; | 2637 | return 0; |
2655 | 2638 | ||
2656 | mutex_lock(&graph_lock); | 2639 | mutex_lock(&graph_lock); |
2657 | 2640 | ||
2658 | if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { | 2641 | if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { |
2659 | ret = -EBUSY; | 2642 | ret = -ENOMEM; |
2660 | goto out; | 2643 | goto out_unlock; |
2661 | } | 2644 | } |
2662 | 2645 | ||
2663 | if (file->f_mode & FMODE_READ) { | 2646 | read = trace_get_user(&parser, ubuf, cnt, ppos); |
2664 | struct seq_file *m = file->private_data; | ||
2665 | array = m->private; | ||
2666 | } else | ||
2667 | array = file->private_data; | ||
2668 | 2647 | ||
2669 | ret = get_user(ch, ubuf++); | 2648 | if (read >= 0 && trace_parser_loaded((&parser))) { |
2670 | if (ret) | 2649 | parser.buffer[parser.idx] = 0; |
2671 | goto out; | ||
2672 | read++; | ||
2673 | cnt--; | ||
2674 | 2650 | ||
2675 | /* skip white space */ | 2651 | /* we allow only one expression at a time */ |
2676 | while (cnt && isspace(ch)) { | 2652 | ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, |
2677 | ret = get_user(ch, ubuf++); | 2653 | parser.buffer); |
2678 | if (ret) | 2654 | if (ret) |
2679 | goto out; | 2655 | goto out_free; |
2680 | read++; | ||
2681 | cnt--; | ||
2682 | } | ||
2683 | |||
2684 | if (isspace(ch)) { | ||
2685 | *ppos += read; | ||
2686 | ret = read; | ||
2687 | goto out; | ||
2688 | } | ||
2689 | |||
2690 | while (cnt && !isspace(ch)) { | ||
2691 | if (index < FTRACE_BUFF_MAX) | ||
2692 | buffer[index++] = ch; | ||
2693 | else { | ||
2694 | ret = -EINVAL; | ||
2695 | goto out; | ||
2696 | } | ||
2697 | ret = get_user(ch, ubuf++); | ||
2698 | if (ret) | ||
2699 | goto out; | ||
2700 | read++; | ||
2701 | cnt--; | ||
2702 | } | 2656 | } |
2703 | buffer[index] = 0; | ||
2704 | |||
2705 | /* we allow only one expression at a time */ | ||
2706 | ret = ftrace_set_func(array, &ftrace_graph_count, buffer); | ||
2707 | if (ret) | ||
2708 | goto out; | ||
2709 | |||
2710 | file->f_pos += read; | ||
2711 | 2657 | ||
2712 | ret = read; | 2658 | ret = read; |
2713 | out: | 2659 | |
2660 | out_free: | ||
2661 | trace_parser_put(&parser); | ||
2662 | out_unlock: | ||
2714 | mutex_unlock(&graph_lock); | 2663 | mutex_unlock(&graph_lock); |
2715 | 2664 | ||
2716 | return ret; | 2665 | return ret; |
2717 | } | 2666 | } |
2718 | 2667 | ||
2719 | static const struct file_operations ftrace_graph_fops = { | 2668 | static const struct file_operations ftrace_graph_fops = { |
2720 | .open = ftrace_graph_open, | 2669 | .open = ftrace_graph_open, |
2721 | .read = seq_read, | 2670 | .read = seq_read, |
2722 | .write = ftrace_graph_write, | 2671 | .write = ftrace_graph_write, |
2672 | .release = ftrace_graph_release, | ||
2673 | .llseek = seq_lseek, | ||
2723 | }; | 2674 | }; |
2724 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 2675 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
2725 | 2676 | ||
@@ -2747,7 +2698,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) | |||
2747 | return 0; | 2698 | return 0; |
2748 | } | 2699 | } |
2749 | 2700 | ||
2750 | static int ftrace_convert_nops(struct module *mod, | 2701 | static int ftrace_process_locs(struct module *mod, |
2751 | unsigned long *start, | 2702 | unsigned long *start, |
2752 | unsigned long *end) | 2703 | unsigned long *end) |
2753 | { | 2704 | { |
@@ -2780,19 +2731,17 @@ static int ftrace_convert_nops(struct module *mod, | |||
2780 | } | 2731 | } |
2781 | 2732 | ||
2782 | #ifdef CONFIG_MODULES | 2733 | #ifdef CONFIG_MODULES |
2783 | void ftrace_release(void *start, void *end) | 2734 | void ftrace_release_mod(struct module *mod) |
2784 | { | 2735 | { |
2785 | struct dyn_ftrace *rec; | 2736 | struct dyn_ftrace *rec; |
2786 | struct ftrace_page *pg; | 2737 | struct ftrace_page *pg; |
2787 | unsigned long s = (unsigned long)start; | ||
2788 | unsigned long e = (unsigned long)end; | ||
2789 | 2738 | ||
2790 | if (ftrace_disabled || !start || start == end) | 2739 | if (ftrace_disabled) |
2791 | return; | 2740 | return; |
2792 | 2741 | ||
2793 | mutex_lock(&ftrace_lock); | 2742 | mutex_lock(&ftrace_lock); |
2794 | do_for_each_ftrace_rec(pg, rec) { | 2743 | do_for_each_ftrace_rec(pg, rec) { |
2795 | if ((rec->ip >= s) && (rec->ip < e)) { | 2744 | if (within_module_core(rec->ip, mod)) { |
2796 | /* | 2745 | /* |
2797 | * rec->ip is changed in ftrace_free_rec() | 2746 | * rec->ip is changed in ftrace_free_rec() |
2798 | * It should not between s and e if record was freed. | 2747 | * It should not between s and e if record was freed. |
@@ -2809,7 +2758,7 @@ static void ftrace_init_module(struct module *mod, | |||
2809 | { | 2758 | { |
2810 | if (ftrace_disabled || start == end) | 2759 | if (ftrace_disabled || start == end) |
2811 | return; | 2760 | return; |
2812 | ftrace_convert_nops(mod, start, end); | 2761 | ftrace_process_locs(mod, start, end); |
2813 | } | 2762 | } |
2814 | 2763 | ||
2815 | static int ftrace_module_notify(struct notifier_block *self, | 2764 | static int ftrace_module_notify(struct notifier_block *self, |
@@ -2824,9 +2773,7 @@ static int ftrace_module_notify(struct notifier_block *self, | |||
2824 | mod->num_ftrace_callsites); | 2773 | mod->num_ftrace_callsites); |
2825 | break; | 2774 | break; |
2826 | case MODULE_STATE_GOING: | 2775 | case MODULE_STATE_GOING: |
2827 | ftrace_release(mod->ftrace_callsites, | 2776 | ftrace_release_mod(mod); |
2828 | mod->ftrace_callsites + | ||
2829 | mod->num_ftrace_callsites); | ||
2830 | break; | 2777 | break; |
2831 | } | 2778 | } |
2832 | 2779 | ||
@@ -2872,7 +2819,7 @@ void __init ftrace_init(void) | |||
2872 | 2819 | ||
2873 | last_ftrace_enabled = ftrace_enabled = 1; | 2820 | last_ftrace_enabled = ftrace_enabled = 1; |
2874 | 2821 | ||
2875 | ret = ftrace_convert_nops(NULL, | 2822 | ret = ftrace_process_locs(NULL, |
2876 | __start_mcount_loc, | 2823 | __start_mcount_loc, |
2877 | __stop_mcount_loc); | 2824 | __stop_mcount_loc); |
2878 | 2825 | ||
@@ -2905,23 +2852,6 @@ static inline void ftrace_startup_enable(int command) { } | |||
2905 | # define ftrace_shutdown_sysctl() do { } while (0) | 2852 | # define ftrace_shutdown_sysctl() do { } while (0) |
2906 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 2853 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
2907 | 2854 | ||
2908 | static ssize_t | ||
2909 | ftrace_pid_read(struct file *file, char __user *ubuf, | ||
2910 | size_t cnt, loff_t *ppos) | ||
2911 | { | ||
2912 | char buf[64]; | ||
2913 | int r; | ||
2914 | |||
2915 | if (ftrace_pid_trace == ftrace_swapper_pid) | ||
2916 | r = sprintf(buf, "swapper tasks\n"); | ||
2917 | else if (ftrace_pid_trace) | ||
2918 | r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace)); | ||
2919 | else | ||
2920 | r = sprintf(buf, "no pid\n"); | ||
2921 | |||
2922 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
2923 | } | ||
2924 | |||
2925 | static void clear_ftrace_swapper(void) | 2855 | static void clear_ftrace_swapper(void) |
2926 | { | 2856 | { |
2927 | struct task_struct *p; | 2857 | struct task_struct *p; |
@@ -2972,14 +2902,12 @@ static void set_ftrace_pid(struct pid *pid) | |||
2972 | rcu_read_unlock(); | 2902 | rcu_read_unlock(); |
2973 | } | 2903 | } |
2974 | 2904 | ||
2975 | static void clear_ftrace_pid_task(struct pid **pid) | 2905 | static void clear_ftrace_pid_task(struct pid *pid) |
2976 | { | 2906 | { |
2977 | if (*pid == ftrace_swapper_pid) | 2907 | if (pid == ftrace_swapper_pid) |
2978 | clear_ftrace_swapper(); | 2908 | clear_ftrace_swapper(); |
2979 | else | 2909 | else |
2980 | clear_ftrace_pid(*pid); | 2910 | clear_ftrace_pid(pid); |
2981 | |||
2982 | *pid = NULL; | ||
2983 | } | 2911 | } |
2984 | 2912 | ||
2985 | static void set_ftrace_pid_task(struct pid *pid) | 2913 | static void set_ftrace_pid_task(struct pid *pid) |
@@ -2990,74 +2918,184 @@ static void set_ftrace_pid_task(struct pid *pid) | |||
2990 | set_ftrace_pid(pid); | 2918 | set_ftrace_pid(pid); |
2991 | } | 2919 | } |
2992 | 2920 | ||
2993 | static ssize_t | 2921 | static int ftrace_pid_add(int p) |
2994 | ftrace_pid_write(struct file *filp, const char __user *ubuf, | ||
2995 | size_t cnt, loff_t *ppos) | ||
2996 | { | 2922 | { |
2997 | struct pid *pid; | 2923 | struct pid *pid; |
2998 | char buf[64]; | 2924 | struct ftrace_pid *fpid; |
2999 | long val; | 2925 | int ret = -EINVAL; |
3000 | int ret; | ||
3001 | 2926 | ||
3002 | if (cnt >= sizeof(buf)) | 2927 | mutex_lock(&ftrace_lock); |
3003 | return -EINVAL; | ||
3004 | 2928 | ||
3005 | if (copy_from_user(&buf, ubuf, cnt)) | 2929 | if (!p) |
3006 | return -EFAULT; | 2930 | pid = ftrace_swapper_pid; |
2931 | else | ||
2932 | pid = find_get_pid(p); | ||
3007 | 2933 | ||
3008 | buf[cnt] = 0; | 2934 | if (!pid) |
2935 | goto out; | ||
3009 | 2936 | ||
3010 | ret = strict_strtol(buf, 10, &val); | 2937 | ret = 0; |
3011 | if (ret < 0) | ||
3012 | return ret; | ||
3013 | 2938 | ||
3014 | mutex_lock(&ftrace_lock); | 2939 | list_for_each_entry(fpid, &ftrace_pids, list) |
3015 | if (val < 0) { | 2940 | if (fpid->pid == pid) |
3016 | /* disable pid tracing */ | 2941 | goto out_put; |
3017 | if (!ftrace_pid_trace) | ||
3018 | goto out; | ||
3019 | 2942 | ||
3020 | clear_ftrace_pid_task(&ftrace_pid_trace); | 2943 | ret = -ENOMEM; |
3021 | 2944 | ||
3022 | } else { | 2945 | fpid = kmalloc(sizeof(*fpid), GFP_KERNEL); |
3023 | /* swapper task is special */ | 2946 | if (!fpid) |
3024 | if (!val) { | 2947 | goto out_put; |
3025 | pid = ftrace_swapper_pid; | ||
3026 | if (pid == ftrace_pid_trace) | ||
3027 | goto out; | ||
3028 | } else { | ||
3029 | pid = find_get_pid(val); | ||
3030 | 2948 | ||
3031 | if (pid == ftrace_pid_trace) { | 2949 | list_add(&fpid->list, &ftrace_pids); |
3032 | put_pid(pid); | 2950 | fpid->pid = pid; |
3033 | goto out; | ||
3034 | } | ||
3035 | } | ||
3036 | 2951 | ||
3037 | if (ftrace_pid_trace) | 2952 | set_ftrace_pid_task(pid); |
3038 | clear_ftrace_pid_task(&ftrace_pid_trace); | ||
3039 | 2953 | ||
3040 | if (!pid) | 2954 | ftrace_update_pid_func(); |
3041 | goto out; | 2955 | ftrace_startup_enable(0); |
2956 | |||
2957 | mutex_unlock(&ftrace_lock); | ||
2958 | return 0; | ||
2959 | |||
2960 | out_put: | ||
2961 | if (pid != ftrace_swapper_pid) | ||
2962 | put_pid(pid); | ||
2963 | |||
2964 | out: | ||
2965 | mutex_unlock(&ftrace_lock); | ||
2966 | return ret; | ||
2967 | } | ||
2968 | |||
2969 | static void ftrace_pid_reset(void) | ||
2970 | { | ||
2971 | struct ftrace_pid *fpid, *safe; | ||
2972 | |||
2973 | mutex_lock(&ftrace_lock); | ||
2974 | list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) { | ||
2975 | struct pid *pid = fpid->pid; | ||
3042 | 2976 | ||
3043 | ftrace_pid_trace = pid; | 2977 | clear_ftrace_pid_task(pid); |
3044 | 2978 | ||
3045 | set_ftrace_pid_task(ftrace_pid_trace); | 2979 | list_del(&fpid->list); |
2980 | kfree(fpid); | ||
3046 | } | 2981 | } |
3047 | 2982 | ||
3048 | /* update the function call */ | ||
3049 | ftrace_update_pid_func(); | 2983 | ftrace_update_pid_func(); |
3050 | ftrace_startup_enable(0); | 2984 | ftrace_startup_enable(0); |
3051 | 2985 | ||
3052 | out: | ||
3053 | mutex_unlock(&ftrace_lock); | 2986 | mutex_unlock(&ftrace_lock); |
2987 | } | ||
3054 | 2988 | ||
3055 | return cnt; | 2989 | static void *fpid_start(struct seq_file *m, loff_t *pos) |
2990 | { | ||
2991 | mutex_lock(&ftrace_lock); | ||
2992 | |||
2993 | if (list_empty(&ftrace_pids) && (!*pos)) | ||
2994 | return (void *) 1; | ||
2995 | |||
2996 | return seq_list_start(&ftrace_pids, *pos); | ||
2997 | } | ||
2998 | |||
2999 | static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) | ||
3000 | { | ||
3001 | if (v == (void *)1) | ||
3002 | return NULL; | ||
3003 | |||
3004 | return seq_list_next(v, &ftrace_pids, pos); | ||
3005 | } | ||
3006 | |||
3007 | static void fpid_stop(struct seq_file *m, void *p) | ||
3008 | { | ||
3009 | mutex_unlock(&ftrace_lock); | ||
3010 | } | ||
3011 | |||
3012 | static int fpid_show(struct seq_file *m, void *v) | ||
3013 | { | ||
3014 | const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list); | ||
3015 | |||
3016 | if (v == (void *)1) { | ||
3017 | seq_printf(m, "no pid\n"); | ||
3018 | return 0; | ||
3019 | } | ||
3020 | |||
3021 | if (fpid->pid == ftrace_swapper_pid) | ||
3022 | seq_printf(m, "swapper tasks\n"); | ||
3023 | else | ||
3024 | seq_printf(m, "%u\n", pid_vnr(fpid->pid)); | ||
3025 | |||
3026 | return 0; | ||
3027 | } | ||
3028 | |||
3029 | static const struct seq_operations ftrace_pid_sops = { | ||
3030 | .start = fpid_start, | ||
3031 | .next = fpid_next, | ||
3032 | .stop = fpid_stop, | ||
3033 | .show = fpid_show, | ||
3034 | }; | ||
3035 | |||
3036 | static int | ||
3037 | ftrace_pid_open(struct inode *inode, struct file *file) | ||
3038 | { | ||
3039 | int ret = 0; | ||
3040 | |||
3041 | if ((file->f_mode & FMODE_WRITE) && | ||
3042 | (file->f_flags & O_TRUNC)) | ||
3043 | ftrace_pid_reset(); | ||
3044 | |||
3045 | if (file->f_mode & FMODE_READ) | ||
3046 | ret = seq_open(file, &ftrace_pid_sops); | ||
3047 | |||
3048 | return ret; | ||
3049 | } | ||
3050 | |||
3051 | static ssize_t | ||
3052 | ftrace_pid_write(struct file *filp, const char __user *ubuf, | ||
3053 | size_t cnt, loff_t *ppos) | ||
3054 | { | ||
3055 | char buf[64], *tmp; | ||
3056 | long val; | ||
3057 | int ret; | ||
3058 | |||
3059 | if (cnt >= sizeof(buf)) | ||
3060 | return -EINVAL; | ||
3061 | |||
3062 | if (copy_from_user(&buf, ubuf, cnt)) | ||
3063 | return -EFAULT; | ||
3064 | |||
3065 | buf[cnt] = 0; | ||
3066 | |||
3067 | /* | ||
3068 | * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid" | ||
3069 | * to clean the filter quietly. | ||
3070 | */ | ||
3071 | tmp = strstrip(buf); | ||
3072 | if (strlen(tmp) == 0) | ||
3073 | return 1; | ||
3074 | |||
3075 | ret = strict_strtol(tmp, 10, &val); | ||
3076 | if (ret < 0) | ||
3077 | return ret; | ||
3078 | |||
3079 | ret = ftrace_pid_add(val); | ||
3080 | |||
3081 | return ret ? ret : cnt; | ||
3082 | } | ||
3083 | |||
3084 | static int | ||
3085 | ftrace_pid_release(struct inode *inode, struct file *file) | ||
3086 | { | ||
3087 | if (file->f_mode & FMODE_READ) | ||
3088 | seq_release(inode, file); | ||
3089 | |||
3090 | return 0; | ||
3056 | } | 3091 | } |
3057 | 3092 | ||
3058 | static const struct file_operations ftrace_pid_fops = { | 3093 | static const struct file_operations ftrace_pid_fops = { |
3059 | .read = ftrace_pid_read, | 3094 | .open = ftrace_pid_open, |
3060 | .write = ftrace_pid_write, | 3095 | .write = ftrace_pid_write, |
3096 | .read = seq_read, | ||
3097 | .llseek = seq_lseek, | ||
3098 | .release = ftrace_pid_release, | ||
3061 | }; | 3099 | }; |
3062 | 3100 | ||
3063 | static __init int ftrace_init_debugfs(void) | 3101 | static __init int ftrace_init_debugfs(void) |
@@ -3140,7 +3178,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) | |||
3140 | 3178 | ||
3141 | int | 3179 | int |
3142 | ftrace_enable_sysctl(struct ctl_table *table, int write, | 3180 | ftrace_enable_sysctl(struct ctl_table *table, int write, |
3143 | struct file *file, void __user *buffer, size_t *lenp, | 3181 | void __user *buffer, size_t *lenp, |
3144 | loff_t *ppos) | 3182 | loff_t *ppos) |
3145 | { | 3183 | { |
3146 | int ret; | 3184 | int ret; |
@@ -3150,12 +3188,12 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, | |||
3150 | 3188 | ||
3151 | mutex_lock(&ftrace_lock); | 3189 | mutex_lock(&ftrace_lock); |
3152 | 3190 | ||
3153 | ret = proc_dointvec(table, write, file, buffer, lenp, ppos); | 3191 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
3154 | 3192 | ||
3155 | if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) | 3193 | if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) |
3156 | goto out; | 3194 | goto out; |
3157 | 3195 | ||
3158 | last_ftrace_enabled = ftrace_enabled; | 3196 | last_ftrace_enabled = !!ftrace_enabled; |
3159 | 3197 | ||
3160 | if (ftrace_enabled) { | 3198 | if (ftrace_enabled) { |
3161 | 3199 | ||
@@ -3243,8 +3281,8 @@ free: | |||
3243 | } | 3281 | } |
3244 | 3282 | ||
3245 | static void | 3283 | static void |
3246 | ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, | 3284 | ftrace_graph_probe_sched_switch(void *ignore, |
3247 | struct task_struct *next) | 3285 | struct task_struct *prev, struct task_struct *next) |
3248 | { | 3286 | { |
3249 | unsigned long long timestamp; | 3287 | unsigned long long timestamp; |
3250 | int index; | 3288 | int index; |
@@ -3298,7 +3336,7 @@ static int start_graph_tracing(void) | |||
3298 | } while (ret == -EAGAIN); | 3336 | } while (ret == -EAGAIN); |
3299 | 3337 | ||
3300 | if (!ret) { | 3338 | if (!ret) { |
3301 | ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); | 3339 | ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); |
3302 | if (ret) | 3340 | if (ret) |
3303 | pr_info("ftrace_graph: Couldn't activate tracepoint" | 3341 | pr_info("ftrace_graph: Couldn't activate tracepoint" |
3304 | " probe to kernel_sched_switch\n"); | 3342 | " probe to kernel_sched_switch\n"); |
@@ -3370,11 +3408,11 @@ void unregister_ftrace_graph(void) | |||
3370 | goto out; | 3408 | goto out; |
3371 | 3409 | ||
3372 | ftrace_graph_active--; | 3410 | ftrace_graph_active--; |
3373 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); | ||
3374 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; | 3411 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; |
3375 | ftrace_graph_entry = ftrace_graph_entry_stub; | 3412 | ftrace_graph_entry = ftrace_graph_entry_stub; |
3376 | ftrace_shutdown(FTRACE_STOP_FUNC_RET); | 3413 | ftrace_shutdown(FTRACE_STOP_FUNC_RET); |
3377 | unregister_pm_notifier(&ftrace_suspend_notifier); | 3414 | unregister_pm_notifier(&ftrace_suspend_notifier); |
3415 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); | ||
3378 | 3416 | ||
3379 | out: | 3417 | out: |
3380 | mutex_unlock(&ftrace_lock); | 3418 | mutex_unlock(&ftrace_lock); |
@@ -3385,6 +3423,7 @@ void ftrace_graph_init_task(struct task_struct *t) | |||
3385 | { | 3423 | { |
3386 | /* Make sure we do not use the parent ret_stack */ | 3424 | /* Make sure we do not use the parent ret_stack */ |
3387 | t->ret_stack = NULL; | 3425 | t->ret_stack = NULL; |
3426 | t->curr_ret_stack = -1; | ||
3388 | 3427 | ||
3389 | if (ftrace_graph_active) { | 3428 | if (ftrace_graph_active) { |
3390 | struct ftrace_ret_stack *ret_stack; | 3429 | struct ftrace_ret_stack *ret_stack; |
@@ -3394,7 +3433,6 @@ void ftrace_graph_init_task(struct task_struct *t) | |||
3394 | GFP_KERNEL); | 3433 | GFP_KERNEL); |
3395 | if (!ret_stack) | 3434 | if (!ret_stack) |
3396 | return; | 3435 | return; |
3397 | t->curr_ret_stack = -1; | ||
3398 | atomic_set(&t->tracing_graph_pause, 0); | 3436 | atomic_set(&t->tracing_graph_pause, 0); |
3399 | atomic_set(&t->trace_overrun, 0); | 3437 | atomic_set(&t->trace_overrun, 0); |
3400 | t->ftrace_timestamp = 0; | 3438 | t->ftrace_timestamp = 0; |
@@ -3420,4 +3458,3 @@ void ftrace_graph_stop(void) | |||
3420 | ftrace_stop(); | 3458 | ftrace_stop(); |
3421 | } | 3459 | } |
3422 | #endif | 3460 | #endif |
3423 | |||
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c deleted file mode 100644 index 1edaa9516e81..000000000000 --- a/kernel/trace/kmemtrace.c +++ /dev/null | |||
@@ -1,468 +0,0 @@ | |||
1 | /* | ||
2 | * Memory allocator tracing | ||
3 | * | ||
4 | * Copyright (C) 2008 Eduard - Gabriel Munteanu | ||
5 | * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi> | ||
6 | * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/tracepoint.h> | ||
10 | #include <linux/seq_file.h> | ||
11 | #include <linux/debugfs.h> | ||
12 | #include <linux/dcache.h> | ||
13 | #include <linux/fs.h> | ||
14 | |||
15 | #include <linux/kmemtrace.h> | ||
16 | |||
17 | #include "trace_output.h" | ||
18 | #include "trace.h" | ||
19 | |||
20 | /* Select an alternative, minimalistic output than the original one */ | ||
21 | #define TRACE_KMEM_OPT_MINIMAL 0x1 | ||
22 | |||
23 | static struct tracer_opt kmem_opts[] = { | ||
24 | /* Default disable the minimalistic output */ | ||
25 | { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) }, | ||
26 | { } | ||
27 | }; | ||
28 | |||
29 | static struct tracer_flags kmem_tracer_flags = { | ||
30 | .val = 0, | ||
31 | .opts = kmem_opts | ||
32 | }; | ||
33 | |||
34 | static struct trace_array *kmemtrace_array; | ||
35 | |||
36 | /* Trace allocations */ | ||
37 | static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, | ||
38 | unsigned long call_site, | ||
39 | const void *ptr, | ||
40 | size_t bytes_req, | ||
41 | size_t bytes_alloc, | ||
42 | gfp_t gfp_flags, | ||
43 | int node) | ||
44 | { | ||
45 | struct ftrace_event_call *call = &event_kmem_alloc; | ||
46 | struct trace_array *tr = kmemtrace_array; | ||
47 | struct kmemtrace_alloc_entry *entry; | ||
48 | struct ring_buffer_event *event; | ||
49 | |||
50 | event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); | ||
51 | if (!event) | ||
52 | return; | ||
53 | |||
54 | entry = ring_buffer_event_data(event); | ||
55 | tracing_generic_entry_update(&entry->ent, 0, 0); | ||
56 | |||
57 | entry->ent.type = TRACE_KMEM_ALLOC; | ||
58 | entry->type_id = type_id; | ||
59 | entry->call_site = call_site; | ||
60 | entry->ptr = ptr; | ||
61 | entry->bytes_req = bytes_req; | ||
62 | entry->bytes_alloc = bytes_alloc; | ||
63 | entry->gfp_flags = gfp_flags; | ||
64 | entry->node = node; | ||
65 | |||
66 | if (!filter_check_discard(call, entry, tr->buffer, event)) | ||
67 | ring_buffer_unlock_commit(tr->buffer, event); | ||
68 | |||
69 | trace_wake_up(); | ||
70 | } | ||
71 | |||
72 | static inline void kmemtrace_free(enum kmemtrace_type_id type_id, | ||
73 | unsigned long call_site, | ||
74 | const void *ptr) | ||
75 | { | ||
76 | struct ftrace_event_call *call = &event_kmem_free; | ||
77 | struct trace_array *tr = kmemtrace_array; | ||
78 | struct kmemtrace_free_entry *entry; | ||
79 | struct ring_buffer_event *event; | ||
80 | |||
81 | event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); | ||
82 | if (!event) | ||
83 | return; | ||
84 | entry = ring_buffer_event_data(event); | ||
85 | tracing_generic_entry_update(&entry->ent, 0, 0); | ||
86 | |||
87 | entry->ent.type = TRACE_KMEM_FREE; | ||
88 | entry->type_id = type_id; | ||
89 | entry->call_site = call_site; | ||
90 | entry->ptr = ptr; | ||
91 | |||
92 | if (!filter_check_discard(call, entry, tr->buffer, event)) | ||
93 | ring_buffer_unlock_commit(tr->buffer, event); | ||
94 | |||
95 | trace_wake_up(); | ||
96 | } | ||
97 | |||
98 | static void kmemtrace_kmalloc(unsigned long call_site, | ||
99 | const void *ptr, | ||
100 | size_t bytes_req, | ||
101 | size_t bytes_alloc, | ||
102 | gfp_t gfp_flags) | ||
103 | { | ||
104 | kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, | ||
105 | bytes_req, bytes_alloc, gfp_flags, -1); | ||
106 | } | ||
107 | |||
108 | static void kmemtrace_kmem_cache_alloc(unsigned long call_site, | ||
109 | const void *ptr, | ||
110 | size_t bytes_req, | ||
111 | size_t bytes_alloc, | ||
112 | gfp_t gfp_flags) | ||
113 | { | ||
114 | kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, | ||
115 | bytes_req, bytes_alloc, gfp_flags, -1); | ||
116 | } | ||
117 | |||
118 | static void kmemtrace_kmalloc_node(unsigned long call_site, | ||
119 | const void *ptr, | ||
120 | size_t bytes_req, | ||
121 | size_t bytes_alloc, | ||
122 | gfp_t gfp_flags, | ||
123 | int node) | ||
124 | { | ||
125 | kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, | ||
126 | bytes_req, bytes_alloc, gfp_flags, node); | ||
127 | } | ||
128 | |||
129 | static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site, | ||
130 | const void *ptr, | ||
131 | size_t bytes_req, | ||
132 | size_t bytes_alloc, | ||
133 | gfp_t gfp_flags, | ||
134 | int node) | ||
135 | { | ||
136 | kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, | ||
137 | bytes_req, bytes_alloc, gfp_flags, node); | ||
138 | } | ||
139 | |||
140 | static void kmemtrace_kfree(unsigned long call_site, const void *ptr) | ||
141 | { | ||
142 | kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr); | ||
143 | } | ||
144 | |||
145 | static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr) | ||
146 | { | ||
147 | kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr); | ||
148 | } | ||
149 | |||
150 | static int kmemtrace_start_probes(void) | ||
151 | { | ||
152 | int err; | ||
153 | |||
154 | err = register_trace_kmalloc(kmemtrace_kmalloc); | ||
155 | if (err) | ||
156 | return err; | ||
157 | err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); | ||
158 | if (err) | ||
159 | return err; | ||
160 | err = register_trace_kmalloc_node(kmemtrace_kmalloc_node); | ||
161 | if (err) | ||
162 | return err; | ||
163 | err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); | ||
164 | if (err) | ||
165 | return err; | ||
166 | err = register_trace_kfree(kmemtrace_kfree); | ||
167 | if (err) | ||
168 | return err; | ||
169 | err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free); | ||
170 | |||
171 | return err; | ||
172 | } | ||
173 | |||
174 | static void kmemtrace_stop_probes(void) | ||
175 | { | ||
176 | unregister_trace_kmalloc(kmemtrace_kmalloc); | ||
177 | unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); | ||
178 | unregister_trace_kmalloc_node(kmemtrace_kmalloc_node); | ||
179 | unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); | ||
180 | unregister_trace_kfree(kmemtrace_kfree); | ||
181 | unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free); | ||
182 | } | ||
183 | |||
184 | static int kmem_trace_init(struct trace_array *tr) | ||
185 | { | ||
186 | int cpu; | ||
187 | kmemtrace_array = tr; | ||
188 | |||
189 | for_each_cpu(cpu, cpu_possible_mask) | ||
190 | tracing_reset(tr, cpu); | ||
191 | |||
192 | kmemtrace_start_probes(); | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static void kmem_trace_reset(struct trace_array *tr) | ||
198 | { | ||
199 | kmemtrace_stop_probes(); | ||
200 | } | ||
201 | |||
202 | static void kmemtrace_headers(struct seq_file *s) | ||
203 | { | ||
204 | /* Don't need headers for the original kmemtrace output */ | ||
205 | if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)) | ||
206 | return; | ||
207 | |||
208 | seq_printf(s, "#\n"); | ||
209 | seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS " | ||
210 | " POINTER NODE CALLER\n"); | ||
211 | seq_printf(s, "# FREE | | | | " | ||
212 | " | | | |\n"); | ||
213 | seq_printf(s, "# |\n\n"); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * The following functions give the original output from kmemtrace, | ||
218 | * plus the origin CPU, since reordering occurs in-kernel now. | ||
219 | */ | ||
220 | |||
221 | #define KMEMTRACE_USER_ALLOC 0 | ||
222 | #define KMEMTRACE_USER_FREE 1 | ||
223 | |||
224 | struct kmemtrace_user_event { | ||
225 | u8 event_id; | ||
226 | u8 type_id; | ||
227 | u16 event_size; | ||
228 | u32 cpu; | ||
229 | u64 timestamp; | ||
230 | unsigned long call_site; | ||
231 | unsigned long ptr; | ||
232 | }; | ||
233 | |||
234 | struct kmemtrace_user_event_alloc { | ||
235 | size_t bytes_req; | ||
236 | size_t bytes_alloc; | ||
237 | unsigned gfp_flags; | ||
238 | int node; | ||
239 | }; | ||
240 | |||
241 | static enum print_line_t | ||
242 | kmemtrace_print_alloc_user(struct trace_iterator *iter, | ||
243 | struct kmemtrace_alloc_entry *entry) | ||
244 | { | ||
245 | struct kmemtrace_user_event_alloc *ev_alloc; | ||
246 | struct trace_seq *s = &iter->seq; | ||
247 | struct kmemtrace_user_event *ev; | ||
248 | |||
249 | ev = trace_seq_reserve(s, sizeof(*ev)); | ||
250 | if (!ev) | ||
251 | return TRACE_TYPE_PARTIAL_LINE; | ||
252 | |||
253 | ev->event_id = KMEMTRACE_USER_ALLOC; | ||
254 | ev->type_id = entry->type_id; | ||
255 | ev->event_size = sizeof(*ev) + sizeof(*ev_alloc); | ||
256 | ev->cpu = iter->cpu; | ||
257 | ev->timestamp = iter->ts; | ||
258 | ev->call_site = entry->call_site; | ||
259 | ev->ptr = (unsigned long)entry->ptr; | ||
260 | |||
261 | ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc)); | ||
262 | if (!ev_alloc) | ||
263 | return TRACE_TYPE_PARTIAL_LINE; | ||
264 | |||
265 | ev_alloc->bytes_req = entry->bytes_req; | ||
266 | ev_alloc->bytes_alloc = entry->bytes_alloc; | ||
267 | ev_alloc->gfp_flags = entry->gfp_flags; | ||
268 | ev_alloc->node = entry->node; | ||
269 | |||
270 | return TRACE_TYPE_HANDLED; | ||
271 | } | ||
272 | |||
273 | static enum print_line_t | ||
274 | kmemtrace_print_free_user(struct trace_iterator *iter, | ||
275 | struct kmemtrace_free_entry *entry) | ||
276 | { | ||
277 | struct trace_seq *s = &iter->seq; | ||
278 | struct kmemtrace_user_event *ev; | ||
279 | |||
280 | ev = trace_seq_reserve(s, sizeof(*ev)); | ||
281 | if (!ev) | ||
282 | return TRACE_TYPE_PARTIAL_LINE; | ||
283 | |||
284 | ev->event_id = KMEMTRACE_USER_FREE; | ||
285 | ev->type_id = entry->type_id; | ||
286 | ev->event_size = sizeof(*ev); | ||
287 | ev->cpu = iter->cpu; | ||
288 | ev->timestamp = iter->ts; | ||
289 | ev->call_site = entry->call_site; | ||
290 | ev->ptr = (unsigned long)entry->ptr; | ||
291 | |||
292 | return TRACE_TYPE_HANDLED; | ||
293 | } | ||
294 | |||
295 | /* The two other following provide a more minimalistic output */ | ||
296 | static enum print_line_t | ||
297 | kmemtrace_print_alloc_compress(struct trace_iterator *iter, | ||
298 | struct kmemtrace_alloc_entry *entry) | ||
299 | { | ||
300 | struct trace_seq *s = &iter->seq; | ||
301 | int ret; | ||
302 | |||
303 | /* Alloc entry */ | ||
304 | ret = trace_seq_printf(s, " + "); | ||
305 | if (!ret) | ||
306 | return TRACE_TYPE_PARTIAL_LINE; | ||
307 | |||
308 | /* Type */ | ||
309 | switch (entry->type_id) { | ||
310 | case KMEMTRACE_TYPE_KMALLOC: | ||
311 | ret = trace_seq_printf(s, "K "); | ||
312 | break; | ||
313 | case KMEMTRACE_TYPE_CACHE: | ||
314 | ret = trace_seq_printf(s, "C "); | ||
315 | break; | ||
316 | case KMEMTRACE_TYPE_PAGES: | ||
317 | ret = trace_seq_printf(s, "P "); | ||
318 | break; | ||
319 | default: | ||
320 | ret = trace_seq_printf(s, "? "); | ||
321 | } | ||
322 | |||
323 | if (!ret) | ||
324 | return TRACE_TYPE_PARTIAL_LINE; | ||
325 | |||
326 | /* Requested */ | ||
327 | ret = trace_seq_printf(s, "%4zu ", entry->bytes_req); | ||
328 | if (!ret) | ||
329 | return TRACE_TYPE_PARTIAL_LINE; | ||
330 | |||
331 | /* Allocated */ | ||
332 | ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc); | ||
333 | if (!ret) | ||
334 | return TRACE_TYPE_PARTIAL_LINE; | ||
335 | |||
336 | /* Flags | ||
337 | * TODO: would be better to see the name of the GFP flag names | ||
338 | */ | ||
339 | ret = trace_seq_printf(s, "%08x ", entry->gfp_flags); | ||
340 | if (!ret) | ||
341 | return TRACE_TYPE_PARTIAL_LINE; | ||
342 | |||
343 | /* Pointer to allocated */ | ||
344 | ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); | ||
345 | if (!ret) | ||
346 | return TRACE_TYPE_PARTIAL_LINE; | ||
347 | |||
348 | /* Node */ | ||
349 | ret = trace_seq_printf(s, "%4d ", entry->node); | ||
350 | if (!ret) | ||
351 | return TRACE_TYPE_PARTIAL_LINE; | ||
352 | |||
353 | /* Call site */ | ||
354 | ret = seq_print_ip_sym(s, entry->call_site, 0); | ||
355 | if (!ret) | ||
356 | return TRACE_TYPE_PARTIAL_LINE; | ||
357 | |||
358 | if (!trace_seq_printf(s, "\n")) | ||
359 | return TRACE_TYPE_PARTIAL_LINE; | ||
360 | |||
361 | return TRACE_TYPE_HANDLED; | ||
362 | } | ||
363 | |||
364 | static enum print_line_t | ||
365 | kmemtrace_print_free_compress(struct trace_iterator *iter, | ||
366 | struct kmemtrace_free_entry *entry) | ||
367 | { | ||
368 | struct trace_seq *s = &iter->seq; | ||
369 | int ret; | ||
370 | |||
371 | /* Free entry */ | ||
372 | ret = trace_seq_printf(s, " - "); | ||
373 | if (!ret) | ||
374 | return TRACE_TYPE_PARTIAL_LINE; | ||
375 | |||
376 | /* Type */ | ||
377 | switch (entry->type_id) { | ||
378 | case KMEMTRACE_TYPE_KMALLOC: | ||
379 | ret = trace_seq_printf(s, "K "); | ||
380 | break; | ||
381 | case KMEMTRACE_TYPE_CACHE: | ||
382 | ret = trace_seq_printf(s, "C "); | ||
383 | break; | ||
384 | case KMEMTRACE_TYPE_PAGES: | ||
385 | ret = trace_seq_printf(s, "P "); | ||
386 | break; | ||
387 | default: | ||
388 | ret = trace_seq_printf(s, "? "); | ||
389 | } | ||
390 | |||
391 | if (!ret) | ||
392 | return TRACE_TYPE_PARTIAL_LINE; | ||
393 | |||
394 | /* Skip requested/allocated/flags */ | ||
395 | ret = trace_seq_printf(s, " "); | ||
396 | if (!ret) | ||
397 | return TRACE_TYPE_PARTIAL_LINE; | ||
398 | |||
399 | /* Pointer to allocated */ | ||
400 | ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); | ||
401 | if (!ret) | ||
402 | return TRACE_TYPE_PARTIAL_LINE; | ||
403 | |||
404 | /* Skip node */ | ||
405 | ret = trace_seq_printf(s, " "); | ||
406 | if (!ret) | ||
407 | return TRACE_TYPE_PARTIAL_LINE; | ||
408 | |||
409 | /* Call site */ | ||
410 | ret = seq_print_ip_sym(s, entry->call_site, 0); | ||
411 | if (!ret) | ||
412 | return TRACE_TYPE_PARTIAL_LINE; | ||
413 | |||
414 | if (!trace_seq_printf(s, "\n")) | ||
415 | return TRACE_TYPE_PARTIAL_LINE; | ||
416 | |||
417 | return TRACE_TYPE_HANDLED; | ||
418 | } | ||
419 | |||
420 | static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) | ||
421 | { | ||
422 | struct trace_entry *entry = iter->ent; | ||
423 | |||
424 | switch (entry->type) { | ||
425 | case TRACE_KMEM_ALLOC: { | ||
426 | struct kmemtrace_alloc_entry *field; | ||
427 | |||
428 | trace_assign_type(field, entry); | ||
429 | if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) | ||
430 | return kmemtrace_print_alloc_compress(iter, field); | ||
431 | else | ||
432 | return kmemtrace_print_alloc_user(iter, field); | ||
433 | } | ||
434 | |||
435 | case TRACE_KMEM_FREE: { | ||
436 | struct kmemtrace_free_entry *field; | ||
437 | |||
438 | trace_assign_type(field, entry); | ||
439 | if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) | ||
440 | return kmemtrace_print_free_compress(iter, field); | ||
441 | else | ||
442 | return kmemtrace_print_free_user(iter, field); | ||
443 | } | ||
444 | |||
445 | default: | ||
446 | return TRACE_TYPE_UNHANDLED; | ||
447 | } | ||
448 | } | ||
449 | |||
450 | static struct tracer kmem_tracer __read_mostly = { | ||
451 | .name = "kmemtrace", | ||
452 | .init = kmem_trace_init, | ||
453 | .reset = kmem_trace_reset, | ||
454 | .print_line = kmemtrace_print_line, | ||
455 | .print_header = kmemtrace_headers, | ||
456 | .flags = &kmem_tracer_flags | ||
457 | }; | ||
458 | |||
459 | void kmemtrace_init(void) | ||
460 | { | ||
461 | /* earliest opportunity to start kmem tracing */ | ||
462 | } | ||
463 | |||
464 | static int __init init_kmem_tracer(void) | ||
465 | { | ||
466 | return register_tracer(&kmem_tracer); | ||
467 | } | ||
468 | device_initcall(init_kmem_tracer); | ||
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c new file mode 100644 index 000000000000..f55fcf61b223 --- /dev/null +++ b/kernel/trace/power-traces.c | |||
@@ -0,0 +1,20 @@ | |||
1 | /* | ||
2 | * Power trace points | ||
3 | * | ||
4 | * Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/string.h> | ||
8 | #include <linux/types.h> | ||
9 | #include <linux/workqueue.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/module.h> | ||
12 | |||
13 | #define CREATE_TRACE_POINTS | ||
14 | #include <trace/events/power.h> | ||
15 | |||
16 | #ifdef EVENT_POWER_TRACING_DEPRECATED | ||
17 | EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); | ||
18 | #endif | ||
19 | EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); | ||
20 | |||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 04dac2638258..bd1c35a4fbcc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -14,12 +14,14 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/slab.h> | ||
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/hash.h> | 19 | #include <linux/hash.h> |
19 | #include <linux/list.h> | 20 | #include <linux/list.h> |
20 | #include <linux/cpu.h> | 21 | #include <linux/cpu.h> |
21 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
22 | 23 | ||
24 | #include <asm/local.h> | ||
23 | #include "trace.h" | 25 | #include "trace.h" |
24 | 26 | ||
25 | /* | 27 | /* |
@@ -201,13 +203,19 @@ int tracing_is_on(void) | |||
201 | } | 203 | } |
202 | EXPORT_SYMBOL_GPL(tracing_is_on); | 204 | EXPORT_SYMBOL_GPL(tracing_is_on); |
203 | 205 | ||
204 | #include "trace.h" | ||
205 | |||
206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
207 | #define RB_ALIGNMENT 4U | 207 | #define RB_ALIGNMENT 4U |
208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | 209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ |
210 | 210 | ||
211 | #if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | ||
212 | # define RB_FORCE_8BYTE_ALIGNMENT 0 | ||
213 | # define RB_ARCH_ALIGNMENT RB_ALIGNMENT | ||
214 | #else | ||
215 | # define RB_FORCE_8BYTE_ALIGNMENT 1 | ||
216 | # define RB_ARCH_ALIGNMENT 8U | ||
217 | #endif | ||
218 | |||
211 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 219 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
212 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 220 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
213 | 221 | ||
@@ -216,19 +224,17 @@ enum { | |||
216 | RB_LEN_TIME_STAMP = 16, | 224 | RB_LEN_TIME_STAMP = 16, |
217 | }; | 225 | }; |
218 | 226 | ||
219 | static inline int rb_null_event(struct ring_buffer_event *event) | 227 | #define skip_time_extend(event) \ |
220 | { | 228 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) |
221 | return event->type_len == RINGBUF_TYPE_PADDING | ||
222 | && event->time_delta == 0; | ||
223 | } | ||
224 | 229 | ||
225 | static inline int rb_discarded_event(struct ring_buffer_event *event) | 230 | static inline int rb_null_event(struct ring_buffer_event *event) |
226 | { | 231 | { |
227 | return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; | 232 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
228 | } | 233 | } |
229 | 234 | ||
230 | static void rb_event_set_padding(struct ring_buffer_event *event) | 235 | static void rb_event_set_padding(struct ring_buffer_event *event) |
231 | { | 236 | { |
237 | /* padding has a NULL time_delta */ | ||
232 | event->type_len = RINGBUF_TYPE_PADDING; | 238 | event->type_len = RINGBUF_TYPE_PADDING; |
233 | event->time_delta = 0; | 239 | event->time_delta = 0; |
234 | } | 240 | } |
@@ -245,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
245 | return length + RB_EVNT_HDR_SIZE; | 251 | return length + RB_EVNT_HDR_SIZE; |
246 | } | 252 | } |
247 | 253 | ||
248 | /* inline for ring buffer fast paths */ | 254 | /* |
249 | static unsigned | 255 | * Return the length of the given event. Will return |
256 | * the length of the time extend if the event is a | ||
257 | * time extend. | ||
258 | */ | ||
259 | static inline unsigned | ||
250 | rb_event_length(struct ring_buffer_event *event) | 260 | rb_event_length(struct ring_buffer_event *event) |
251 | { | 261 | { |
252 | switch (event->type_len) { | 262 | switch (event->type_len) { |
@@ -271,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event) | |||
271 | return 0; | 281 | return 0; |
272 | } | 282 | } |
273 | 283 | ||
284 | /* | ||
285 | * Return total length of time extend and data, | ||
286 | * or just the event length for all other events. | ||
287 | */ | ||
288 | static inline unsigned | ||
289 | rb_event_ts_length(struct ring_buffer_event *event) | ||
290 | { | ||
291 | unsigned len = 0; | ||
292 | |||
293 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
294 | /* time extends include the data event after it */ | ||
295 | len = RB_LEN_TIME_EXTEND; | ||
296 | event = skip_time_extend(event); | ||
297 | } | ||
298 | return len + rb_event_length(event); | ||
299 | } | ||
300 | |||
274 | /** | 301 | /** |
275 | * ring_buffer_event_length - return the length of the event | 302 | * ring_buffer_event_length - return the length of the event |
276 | * @event: the event to get the length of | 303 | * @event: the event to get the length of |
304 | * | ||
305 | * Returns the size of the data load of a data event. | ||
306 | * If the event is something other than a data event, it | ||
307 | * returns the size of the event itself. With the exception | ||
308 | * of a TIME EXTEND, where it still returns the size of the | ||
309 | * data load of the data event after it. | ||
277 | */ | 310 | */ |
278 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 311 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
279 | { | 312 | { |
280 | unsigned length = rb_event_length(event); | 313 | unsigned length; |
314 | |||
315 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
316 | event = skip_time_extend(event); | ||
317 | |||
318 | length = rb_event_length(event); | ||
281 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 319 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
282 | return length; | 320 | return length; |
283 | length -= RB_EVNT_HDR_SIZE; | 321 | length -= RB_EVNT_HDR_SIZE; |
@@ -291,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
291 | static void * | 329 | static void * |
292 | rb_event_data(struct ring_buffer_event *event) | 330 | rb_event_data(struct ring_buffer_event *event) |
293 | { | 331 | { |
332 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
333 | event = skip_time_extend(event); | ||
294 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 334 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
295 | /* If length is in len field, then array[0] has the data */ | 335 | /* If length is in len field, then array[0] has the data */ |
296 | if (event->type_len) | 336 | if (event->type_len) |
@@ -316,20 +356,49 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
316 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 356 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) |
317 | #define TS_DELTA_TEST (~TS_MASK) | 357 | #define TS_DELTA_TEST (~TS_MASK) |
318 | 358 | ||
359 | /* Flag when events were overwritten */ | ||
360 | #define RB_MISSED_EVENTS (1 << 31) | ||
361 | /* Missed count stored at end */ | ||
362 | #define RB_MISSED_STORED (1 << 30) | ||
363 | |||
319 | struct buffer_data_page { | 364 | struct buffer_data_page { |
320 | u64 time_stamp; /* page time stamp */ | 365 | u64 time_stamp; /* page time stamp */ |
321 | local_t commit; /* write committed index */ | 366 | local_t commit; /* write committed index */ |
322 | unsigned char data[]; /* data of buffer page */ | 367 | unsigned char data[]; /* data of buffer page */ |
323 | }; | 368 | }; |
324 | 369 | ||
370 | /* | ||
371 | * Note, the buffer_page list must be first. The buffer pages | ||
372 | * are allocated in cache lines, which means that each buffer | ||
373 | * page will be at the beginning of a cache line, and thus | ||
374 | * the least significant bits will be zero. We use this to | ||
375 | * add flags in the list struct pointers, to make the ring buffer | ||
376 | * lockless. | ||
377 | */ | ||
325 | struct buffer_page { | 378 | struct buffer_page { |
326 | struct list_head list; /* list of buffer pages */ | 379 | struct list_head list; /* list of buffer pages */ |
327 | local_t write; /* index for next write */ | 380 | local_t write; /* index for next write */ |
328 | unsigned read; /* index for next read */ | 381 | unsigned read; /* index for next read */ |
329 | local_t entries; /* entries on this page */ | 382 | local_t entries; /* entries on this page */ |
383 | unsigned long real_end; /* real end of data */ | ||
330 | struct buffer_data_page *page; /* Actual data page */ | 384 | struct buffer_data_page *page; /* Actual data page */ |
331 | }; | 385 | }; |
332 | 386 | ||
387 | /* | ||
388 | * The buffer page counters, write and entries, must be reset | ||
389 | * atomically when crossing page boundaries. To synchronize this | ||
390 | * update, two counters are inserted into the number. One is | ||
391 | * the actual counter for the write position or count on the page. | ||
392 | * | ||
393 | * The other is a counter of updaters. Before an update happens | ||
394 | * the update partition of the counter is incremented. This will | ||
395 | * allow the updater to update the counter atomically. | ||
396 | * | ||
397 | * The counter is 20 bits, and the state data is 12. | ||
398 | */ | ||
399 | #define RB_WRITE_MASK 0xfffff | ||
400 | #define RB_WRITE_INTCNT (1 << 20) | ||
401 | |||
333 | static void rb_init_page(struct buffer_data_page *bpage) | 402 | static void rb_init_page(struct buffer_data_page *bpage) |
334 | { | 403 | { |
335 | local_set(&bpage->commit, 0); | 404 | local_set(&bpage->commit, 0); |
@@ -372,27 +441,33 @@ static inline int test_time_stamp(u64 delta) | |||
372 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 441 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
373 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 442 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
374 | 443 | ||
375 | /* Max number of timestamps that can fit on a page */ | ||
376 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP) | ||
377 | |||
378 | int ring_buffer_print_page_header(struct trace_seq *s) | 444 | int ring_buffer_print_page_header(struct trace_seq *s) |
379 | { | 445 | { |
380 | struct buffer_data_page field; | 446 | struct buffer_data_page field; |
381 | int ret; | 447 | int ret; |
382 | 448 | ||
383 | ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" | 449 | ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" |
384 | "offset:0;\tsize:%u;\n", | 450 | "offset:0;\tsize:%u;\tsigned:%u;\n", |
385 | (unsigned int)sizeof(field.time_stamp)); | 451 | (unsigned int)sizeof(field.time_stamp), |
452 | (unsigned int)is_signed_type(u64)); | ||
386 | 453 | ||
387 | ret = trace_seq_printf(s, "\tfield: local_t commit;\t" | 454 | ret = trace_seq_printf(s, "\tfield: local_t commit;\t" |
388 | "offset:%u;\tsize:%u;\n", | 455 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
456 | (unsigned int)offsetof(typeof(field), commit), | ||
457 | (unsigned int)sizeof(field.commit), | ||
458 | (unsigned int)is_signed_type(long)); | ||
459 | |||
460 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" | ||
461 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
389 | (unsigned int)offsetof(typeof(field), commit), | 462 | (unsigned int)offsetof(typeof(field), commit), |
390 | (unsigned int)sizeof(field.commit)); | 463 | 1, |
464 | (unsigned int)is_signed_type(long)); | ||
391 | 465 | ||
392 | ret = trace_seq_printf(s, "\tfield: char data;\t" | 466 | ret = trace_seq_printf(s, "\tfield: char data;\t" |
393 | "offset:%u;\tsize:%u;\n", | 467 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
394 | (unsigned int)offsetof(typeof(field), data), | 468 | (unsigned int)offsetof(typeof(field), data), |
395 | (unsigned int)BUF_PAGE_SIZE); | 469 | (unsigned int)BUF_PAGE_SIZE, |
470 | (unsigned int)is_signed_type(char)); | ||
396 | 471 | ||
397 | return ret; | 472 | return ret; |
398 | } | 473 | } |
@@ -402,25 +477,26 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
402 | */ | 477 | */ |
403 | struct ring_buffer_per_cpu { | 478 | struct ring_buffer_per_cpu { |
404 | int cpu; | 479 | int cpu; |
480 | atomic_t record_disabled; | ||
405 | struct ring_buffer *buffer; | 481 | struct ring_buffer *buffer; |
406 | spinlock_t reader_lock; /* serialize readers */ | 482 | spinlock_t reader_lock; /* serialize readers */ |
407 | raw_spinlock_t lock; | 483 | arch_spinlock_t lock; |
408 | struct lock_class_key lock_key; | 484 | struct lock_class_key lock_key; |
409 | struct list_head pages; | 485 | struct list_head *pages; |
410 | struct buffer_page *head_page; /* read from head */ | 486 | struct buffer_page *head_page; /* read from head */ |
411 | struct buffer_page *tail_page; /* write to tail */ | 487 | struct buffer_page *tail_page; /* write to tail */ |
412 | struct buffer_page *commit_page; /* committed pages */ | 488 | struct buffer_page *commit_page; /* committed pages */ |
413 | struct buffer_page *reader_page; | 489 | struct buffer_page *reader_page; |
414 | unsigned long nmi_dropped; | 490 | unsigned long lost_events; |
415 | unsigned long commit_overrun; | 491 | unsigned long last_overrun; |
416 | unsigned long overrun; | 492 | local_t commit_overrun; |
417 | unsigned long read; | 493 | local_t overrun; |
418 | local_t entries; | 494 | local_t entries; |
419 | local_t committing; | 495 | local_t committing; |
420 | local_t commits; | 496 | local_t commits; |
497 | unsigned long read; | ||
421 | u64 write_stamp; | 498 | u64 write_stamp; |
422 | u64 read_stamp; | 499 | u64 read_stamp; |
423 | atomic_t record_disabled; | ||
424 | }; | 500 | }; |
425 | 501 | ||
426 | struct ring_buffer { | 502 | struct ring_buffer { |
@@ -446,24 +522,31 @@ struct ring_buffer_iter { | |||
446 | struct ring_buffer_per_cpu *cpu_buffer; | 522 | struct ring_buffer_per_cpu *cpu_buffer; |
447 | unsigned long head; | 523 | unsigned long head; |
448 | struct buffer_page *head_page; | 524 | struct buffer_page *head_page; |
525 | struct buffer_page *cache_reader_page; | ||
526 | unsigned long cache_read; | ||
449 | u64 read_stamp; | 527 | u64 read_stamp; |
450 | }; | 528 | }; |
451 | 529 | ||
452 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ | 530 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ |
453 | #define RB_WARN_ON(buffer, cond) \ | 531 | #define RB_WARN_ON(b, cond) \ |
454 | ({ \ | 532 | ({ \ |
455 | int _____ret = unlikely(cond); \ | 533 | int _____ret = unlikely(cond); \ |
456 | if (_____ret) { \ | 534 | if (_____ret) { \ |
457 | atomic_inc(&buffer->record_disabled); \ | 535 | if (__same_type(*(b), struct ring_buffer_per_cpu)) { \ |
458 | WARN_ON(1); \ | 536 | struct ring_buffer_per_cpu *__b = \ |
459 | } \ | 537 | (void *)b; \ |
460 | _____ret; \ | 538 | atomic_inc(&__b->buffer->record_disabled); \ |
539 | } else \ | ||
540 | atomic_inc(&b->record_disabled); \ | ||
541 | WARN_ON(1); \ | ||
542 | } \ | ||
543 | _____ret; \ | ||
461 | }) | 544 | }) |
462 | 545 | ||
463 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 546 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
464 | #define DEBUG_SHIFT 0 | 547 | #define DEBUG_SHIFT 0 |
465 | 548 | ||
466 | static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) | 549 | static inline u64 rb_time_stamp(struct ring_buffer *buffer) |
467 | { | 550 | { |
468 | /* shift to debug/test normalization and TIME_EXTENTS */ | 551 | /* shift to debug/test normalization and TIME_EXTENTS */ |
469 | return buffer->clock() << DEBUG_SHIFT; | 552 | return buffer->clock() << DEBUG_SHIFT; |
@@ -474,7 +557,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) | |||
474 | u64 time; | 557 | u64 time; |
475 | 558 | ||
476 | preempt_disable_notrace(); | 559 | preempt_disable_notrace(); |
477 | time = rb_time_stamp(buffer, cpu); | 560 | time = rb_time_stamp(buffer); |
478 | preempt_enable_no_resched_notrace(); | 561 | preempt_enable_no_resched_notrace(); |
479 | 562 | ||
480 | return time; | 563 | return time; |
@@ -489,6 +572,390 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, | |||
489 | } | 572 | } |
490 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | 573 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); |
491 | 574 | ||
575 | /* | ||
576 | * Making the ring buffer lockless makes things tricky. | ||
577 | * Although writes only happen on the CPU that they are on, | ||
578 | * and they only need to worry about interrupts. Reads can | ||
579 | * happen on any CPU. | ||
580 | * | ||
581 | * The reader page is always off the ring buffer, but when the | ||
582 | * reader finishes with a page, it needs to swap its page with | ||
583 | * a new one from the buffer. The reader needs to take from | ||
584 | * the head (writes go to the tail). But if a writer is in overwrite | ||
585 | * mode and wraps, it must push the head page forward. | ||
586 | * | ||
587 | * Here lies the problem. | ||
588 | * | ||
589 | * The reader must be careful to replace only the head page, and | ||
590 | * not another one. As described at the top of the file in the | ||
591 | * ASCII art, the reader sets its old page to point to the next | ||
592 | * page after head. It then sets the page after head to point to | ||
593 | * the old reader page. But if the writer moves the head page | ||
594 | * during this operation, the reader could end up with the tail. | ||
595 | * | ||
596 | * We use cmpxchg to help prevent this race. We also do something | ||
597 | * special with the page before head. We set the LSB to 1. | ||
598 | * | ||
599 | * When the writer must push the page forward, it will clear the | ||
600 | * bit that points to the head page, move the head, and then set | ||
601 | * the bit that points to the new head page. | ||
602 | * | ||
603 | * We also don't want an interrupt coming in and moving the head | ||
604 | * page on another writer. Thus we use the second LSB to catch | ||
605 | * that too. Thus: | ||
606 | * | ||
607 | * head->list->prev->next bit 1 bit 0 | ||
608 | * ------- ------- | ||
609 | * Normal page 0 0 | ||
610 | * Points to head page 0 1 | ||
611 | * New head page 1 0 | ||
612 | * | ||
613 | * Note we can not trust the prev pointer of the head page, because: | ||
614 | * | ||
615 | * +----+ +-----+ +-----+ | ||
616 | * | |------>| T |---X--->| N | | ||
617 | * | |<------| | | | | ||
618 | * +----+ +-----+ +-----+ | ||
619 | * ^ ^ | | ||
620 | * | +-----+ | | | ||
621 | * +----------| R |----------+ | | ||
622 | * | |<-----------+ | ||
623 | * +-----+ | ||
624 | * | ||
625 | * Key: ---X--> HEAD flag set in pointer | ||
626 | * T Tail page | ||
627 | * R Reader page | ||
628 | * N Next page | ||
629 | * | ||
630 | * (see __rb_reserve_next() to see where this happens) | ||
631 | * | ||
632 | * What the above shows is that the reader just swapped out | ||
633 | * the reader page with a page in the buffer, but before it | ||
634 | * could make the new header point back to the new page added | ||
635 | * it was preempted by a writer. The writer moved forward onto | ||
636 | * the new page added by the reader and is about to move forward | ||
637 | * again. | ||
638 | * | ||
639 | * You can see, it is legitimate for the previous pointer of | ||
640 | * the head (or any page) not to point back to itself. But only | ||
641 | * temporarially. | ||
642 | */ | ||
643 | |||
644 | #define RB_PAGE_NORMAL 0UL | ||
645 | #define RB_PAGE_HEAD 1UL | ||
646 | #define RB_PAGE_UPDATE 2UL | ||
647 | |||
648 | |||
649 | #define RB_FLAG_MASK 3UL | ||
650 | |||
651 | /* PAGE_MOVED is not part of the mask */ | ||
652 | #define RB_PAGE_MOVED 4UL | ||
653 | |||
654 | /* | ||
655 | * rb_list_head - remove any bit | ||
656 | */ | ||
657 | static struct list_head *rb_list_head(struct list_head *list) | ||
658 | { | ||
659 | unsigned long val = (unsigned long)list; | ||
660 | |||
661 | return (struct list_head *)(val & ~RB_FLAG_MASK); | ||
662 | } | ||
663 | |||
664 | /* | ||
665 | * rb_is_head_page - test if the given page is the head page | ||
666 | * | ||
667 | * Because the reader may move the head_page pointer, we can | ||
668 | * not trust what the head page is (it may be pointing to | ||
669 | * the reader page). But if the next page is a header page, | ||
670 | * its flags will be non zero. | ||
671 | */ | ||
672 | static int inline | ||
673 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
674 | struct buffer_page *page, struct list_head *list) | ||
675 | { | ||
676 | unsigned long val; | ||
677 | |||
678 | val = (unsigned long)list->next; | ||
679 | |||
680 | if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list) | ||
681 | return RB_PAGE_MOVED; | ||
682 | |||
683 | return val & RB_FLAG_MASK; | ||
684 | } | ||
685 | |||
686 | /* | ||
687 | * rb_is_reader_page | ||
688 | * | ||
689 | * The unique thing about the reader page, is that, if the | ||
690 | * writer is ever on it, the previous pointer never points | ||
691 | * back to the reader page. | ||
692 | */ | ||
693 | static int rb_is_reader_page(struct buffer_page *page) | ||
694 | { | ||
695 | struct list_head *list = page->list.prev; | ||
696 | |||
697 | return rb_list_head(list->next) != &page->list; | ||
698 | } | ||
699 | |||
700 | /* | ||
701 | * rb_set_list_to_head - set a list_head to be pointing to head. | ||
702 | */ | ||
703 | static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, | ||
704 | struct list_head *list) | ||
705 | { | ||
706 | unsigned long *ptr; | ||
707 | |||
708 | ptr = (unsigned long *)&list->next; | ||
709 | *ptr |= RB_PAGE_HEAD; | ||
710 | *ptr &= ~RB_PAGE_UPDATE; | ||
711 | } | ||
712 | |||
713 | /* | ||
714 | * rb_head_page_activate - sets up head page | ||
715 | */ | ||
716 | static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) | ||
717 | { | ||
718 | struct buffer_page *head; | ||
719 | |||
720 | head = cpu_buffer->head_page; | ||
721 | if (!head) | ||
722 | return; | ||
723 | |||
724 | /* | ||
725 | * Set the previous list pointer to have the HEAD flag. | ||
726 | */ | ||
727 | rb_set_list_to_head(cpu_buffer, head->list.prev); | ||
728 | } | ||
729 | |||
730 | static void rb_list_head_clear(struct list_head *list) | ||
731 | { | ||
732 | unsigned long *ptr = (unsigned long *)&list->next; | ||
733 | |||
734 | *ptr &= ~RB_FLAG_MASK; | ||
735 | } | ||
736 | |||
737 | /* | ||
738 | * rb_head_page_dactivate - clears head page ptr (for free list) | ||
739 | */ | ||
740 | static void | ||
741 | rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) | ||
742 | { | ||
743 | struct list_head *hd; | ||
744 | |||
745 | /* Go through the whole list and clear any pointers found. */ | ||
746 | rb_list_head_clear(cpu_buffer->pages); | ||
747 | |||
748 | list_for_each(hd, cpu_buffer->pages) | ||
749 | rb_list_head_clear(hd); | ||
750 | } | ||
751 | |||
752 | static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, | ||
753 | struct buffer_page *head, | ||
754 | struct buffer_page *prev, | ||
755 | int old_flag, int new_flag) | ||
756 | { | ||
757 | struct list_head *list; | ||
758 | unsigned long val = (unsigned long)&head->list; | ||
759 | unsigned long ret; | ||
760 | |||
761 | list = &prev->list; | ||
762 | |||
763 | val &= ~RB_FLAG_MASK; | ||
764 | |||
765 | ret = cmpxchg((unsigned long *)&list->next, | ||
766 | val | old_flag, val | new_flag); | ||
767 | |||
768 | /* check if the reader took the page */ | ||
769 | if ((ret & ~RB_FLAG_MASK) != val) | ||
770 | return RB_PAGE_MOVED; | ||
771 | |||
772 | return ret & RB_FLAG_MASK; | ||
773 | } | ||
774 | |||
775 | static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, | ||
776 | struct buffer_page *head, | ||
777 | struct buffer_page *prev, | ||
778 | int old_flag) | ||
779 | { | ||
780 | return rb_head_page_set(cpu_buffer, head, prev, | ||
781 | old_flag, RB_PAGE_UPDATE); | ||
782 | } | ||
783 | |||
784 | static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, | ||
785 | struct buffer_page *head, | ||
786 | struct buffer_page *prev, | ||
787 | int old_flag) | ||
788 | { | ||
789 | return rb_head_page_set(cpu_buffer, head, prev, | ||
790 | old_flag, RB_PAGE_HEAD); | ||
791 | } | ||
792 | |||
793 | static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, | ||
794 | struct buffer_page *head, | ||
795 | struct buffer_page *prev, | ||
796 | int old_flag) | ||
797 | { | ||
798 | return rb_head_page_set(cpu_buffer, head, prev, | ||
799 | old_flag, RB_PAGE_NORMAL); | ||
800 | } | ||
801 | |||
802 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
803 | struct buffer_page **bpage) | ||
804 | { | ||
805 | struct list_head *p = rb_list_head((*bpage)->list.next); | ||
806 | |||
807 | *bpage = list_entry(p, struct buffer_page, list); | ||
808 | } | ||
809 | |||
810 | static struct buffer_page * | ||
811 | rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) | ||
812 | { | ||
813 | struct buffer_page *head; | ||
814 | struct buffer_page *page; | ||
815 | struct list_head *list; | ||
816 | int i; | ||
817 | |||
818 | if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page)) | ||
819 | return NULL; | ||
820 | |||
821 | /* sanity check */ | ||
822 | list = cpu_buffer->pages; | ||
823 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list)) | ||
824 | return NULL; | ||
825 | |||
826 | page = head = cpu_buffer->head_page; | ||
827 | /* | ||
828 | * It is possible that the writer moves the header behind | ||
829 | * where we started, and we miss in one loop. | ||
830 | * A second loop should grab the header, but we'll do | ||
831 | * three loops just because I'm paranoid. | ||
832 | */ | ||
833 | for (i = 0; i < 3; i++) { | ||
834 | do { | ||
835 | if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { | ||
836 | cpu_buffer->head_page = page; | ||
837 | return page; | ||
838 | } | ||
839 | rb_inc_page(cpu_buffer, &page); | ||
840 | } while (page != head); | ||
841 | } | ||
842 | |||
843 | RB_WARN_ON(cpu_buffer, 1); | ||
844 | |||
845 | return NULL; | ||
846 | } | ||
847 | |||
848 | static int rb_head_page_replace(struct buffer_page *old, | ||
849 | struct buffer_page *new) | ||
850 | { | ||
851 | unsigned long *ptr = (unsigned long *)&old->list.prev->next; | ||
852 | unsigned long val; | ||
853 | unsigned long ret; | ||
854 | |||
855 | val = *ptr & ~RB_FLAG_MASK; | ||
856 | val |= RB_PAGE_HEAD; | ||
857 | |||
858 | ret = cmpxchg(ptr, val, (unsigned long)&new->list); | ||
859 | |||
860 | return ret == val; | ||
861 | } | ||
862 | |||
863 | /* | ||
864 | * rb_tail_page_update - move the tail page forward | ||
865 | * | ||
866 | * Returns 1 if moved tail page, 0 if someone else did. | ||
867 | */ | ||
868 | static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, | ||
869 | struct buffer_page *tail_page, | ||
870 | struct buffer_page *next_page) | ||
871 | { | ||
872 | struct buffer_page *old_tail; | ||
873 | unsigned long old_entries; | ||
874 | unsigned long old_write; | ||
875 | int ret = 0; | ||
876 | |||
877 | /* | ||
878 | * The tail page now needs to be moved forward. | ||
879 | * | ||
880 | * We need to reset the tail page, but without messing | ||
881 | * with possible erasing of data brought in by interrupts | ||
882 | * that have moved the tail page and are currently on it. | ||
883 | * | ||
884 | * We add a counter to the write field to denote this. | ||
885 | */ | ||
886 | old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); | ||
887 | old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); | ||
888 | |||
889 | /* | ||
890 | * Just make sure we have seen our old_write and synchronize | ||
891 | * with any interrupts that come in. | ||
892 | */ | ||
893 | barrier(); | ||
894 | |||
895 | /* | ||
896 | * If the tail page is still the same as what we think | ||
897 | * it is, then it is up to us to update the tail | ||
898 | * pointer. | ||
899 | */ | ||
900 | if (tail_page == cpu_buffer->tail_page) { | ||
901 | /* Zero the write counter */ | ||
902 | unsigned long val = old_write & ~RB_WRITE_MASK; | ||
903 | unsigned long eval = old_entries & ~RB_WRITE_MASK; | ||
904 | |||
905 | /* | ||
906 | * This will only succeed if an interrupt did | ||
907 | * not come in and change it. In which case, we | ||
908 | * do not want to modify it. | ||
909 | * | ||
910 | * We add (void) to let the compiler know that we do not care | ||
911 | * about the return value of these functions. We use the | ||
912 | * cmpxchg to only update if an interrupt did not already | ||
913 | * do it for us. If the cmpxchg fails, we don't care. | ||
914 | */ | ||
915 | (void)local_cmpxchg(&next_page->write, old_write, val); | ||
916 | (void)local_cmpxchg(&next_page->entries, old_entries, eval); | ||
917 | |||
918 | /* | ||
919 | * No need to worry about races with clearing out the commit. | ||
920 | * it only can increment when a commit takes place. But that | ||
921 | * only happens in the outer most nested commit. | ||
922 | */ | ||
923 | local_set(&next_page->page->commit, 0); | ||
924 | |||
925 | old_tail = cmpxchg(&cpu_buffer->tail_page, | ||
926 | tail_page, next_page); | ||
927 | |||
928 | if (old_tail == tail_page) | ||
929 | ret = 1; | ||
930 | } | ||
931 | |||
932 | return ret; | ||
933 | } | ||
934 | |||
935 | static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, | ||
936 | struct buffer_page *bpage) | ||
937 | { | ||
938 | unsigned long val = (unsigned long)bpage; | ||
939 | |||
940 | if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK)) | ||
941 | return 1; | ||
942 | |||
943 | return 0; | ||
944 | } | ||
945 | |||
946 | /** | ||
947 | * rb_check_list - make sure a pointer to a list has the last bits zero | ||
948 | */ | ||
949 | static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, | ||
950 | struct list_head *list) | ||
951 | { | ||
952 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev)) | ||
953 | return 1; | ||
954 | if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next)) | ||
955 | return 1; | ||
956 | return 0; | ||
957 | } | ||
958 | |||
492 | /** | 959 | /** |
493 | * check_pages - integrity check of buffer pages | 960 | * check_pages - integrity check of buffer pages |
494 | * @cpu_buffer: CPU buffer with pages to test | 961 | * @cpu_buffer: CPU buffer with pages to test |
@@ -498,14 +965,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | |||
498 | */ | 965 | */ |
499 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 966 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) |
500 | { | 967 | { |
501 | struct list_head *head = &cpu_buffer->pages; | 968 | struct list_head *head = cpu_buffer->pages; |
502 | struct buffer_page *bpage, *tmp; | 969 | struct buffer_page *bpage, *tmp; |
503 | 970 | ||
971 | rb_head_page_deactivate(cpu_buffer); | ||
972 | |||
504 | if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) | 973 | if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) |
505 | return -1; | 974 | return -1; |
506 | if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) | 975 | if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) |
507 | return -1; | 976 | return -1; |
508 | 977 | ||
978 | if (rb_check_list(cpu_buffer, head)) | ||
979 | return -1; | ||
980 | |||
509 | list_for_each_entry_safe(bpage, tmp, head, list) { | 981 | list_for_each_entry_safe(bpage, tmp, head, list) { |
510 | if (RB_WARN_ON(cpu_buffer, | 982 | if (RB_WARN_ON(cpu_buffer, |
511 | bpage->list.next->prev != &bpage->list)) | 983 | bpage->list.next->prev != &bpage->list)) |
@@ -513,25 +985,33 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | |||
513 | if (RB_WARN_ON(cpu_buffer, | 985 | if (RB_WARN_ON(cpu_buffer, |
514 | bpage->list.prev->next != &bpage->list)) | 986 | bpage->list.prev->next != &bpage->list)) |
515 | return -1; | 987 | return -1; |
988 | if (rb_check_list(cpu_buffer, &bpage->list)) | ||
989 | return -1; | ||
516 | } | 990 | } |
517 | 991 | ||
992 | rb_head_page_activate(cpu_buffer); | ||
993 | |||
518 | return 0; | 994 | return 0; |
519 | } | 995 | } |
520 | 996 | ||
521 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | 997 | static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, |
522 | unsigned nr_pages) | 998 | unsigned nr_pages) |
523 | { | 999 | { |
524 | struct list_head *head = &cpu_buffer->pages; | ||
525 | struct buffer_page *bpage, *tmp; | 1000 | struct buffer_page *bpage, *tmp; |
526 | unsigned long addr; | 1001 | unsigned long addr; |
527 | LIST_HEAD(pages); | 1002 | LIST_HEAD(pages); |
528 | unsigned i; | 1003 | unsigned i; |
529 | 1004 | ||
1005 | WARN_ON(!nr_pages); | ||
1006 | |||
530 | for (i = 0; i < nr_pages; i++) { | 1007 | for (i = 0; i < nr_pages; i++) { |
531 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1008 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
532 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); | 1009 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); |
533 | if (!bpage) | 1010 | if (!bpage) |
534 | goto free_pages; | 1011 | goto free_pages; |
1012 | |||
1013 | rb_check_bpage(cpu_buffer, bpage); | ||
1014 | |||
535 | list_add(&bpage->list, &pages); | 1015 | list_add(&bpage->list, &pages); |
536 | 1016 | ||
537 | addr = __get_free_page(GFP_KERNEL); | 1017 | addr = __get_free_page(GFP_KERNEL); |
@@ -541,7 +1021,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
541 | rb_init_page(bpage->page); | 1021 | rb_init_page(bpage->page); |
542 | } | 1022 | } |
543 | 1023 | ||
544 | list_splice(&pages, head); | 1024 | /* |
1025 | * The ring buffer page list is a circular list that does not | ||
1026 | * start and end with a list head. All page list items point to | ||
1027 | * other pages. | ||
1028 | */ | ||
1029 | cpu_buffer->pages = pages.next; | ||
1030 | list_del(&pages); | ||
545 | 1031 | ||
546 | rb_check_pages(cpu_buffer); | 1032 | rb_check_pages(cpu_buffer); |
547 | 1033 | ||
@@ -572,14 +1058,15 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
572 | cpu_buffer->buffer = buffer; | 1058 | cpu_buffer->buffer = buffer; |
573 | spin_lock_init(&cpu_buffer->reader_lock); | 1059 | spin_lock_init(&cpu_buffer->reader_lock); |
574 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | 1060 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); |
575 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 1061 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
576 | INIT_LIST_HEAD(&cpu_buffer->pages); | ||
577 | 1062 | ||
578 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1063 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
579 | GFP_KERNEL, cpu_to_node(cpu)); | 1064 | GFP_KERNEL, cpu_to_node(cpu)); |
580 | if (!bpage) | 1065 | if (!bpage) |
581 | goto fail_free_buffer; | 1066 | goto fail_free_buffer; |
582 | 1067 | ||
1068 | rb_check_bpage(cpu_buffer, bpage); | ||
1069 | |||
583 | cpu_buffer->reader_page = bpage; | 1070 | cpu_buffer->reader_page = bpage; |
584 | addr = __get_free_page(GFP_KERNEL); | 1071 | addr = __get_free_page(GFP_KERNEL); |
585 | if (!addr) | 1072 | if (!addr) |
@@ -594,9 +1081,11 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
594 | goto fail_free_reader; | 1081 | goto fail_free_reader; |
595 | 1082 | ||
596 | cpu_buffer->head_page | 1083 | cpu_buffer->head_page |
597 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 1084 | = list_entry(cpu_buffer->pages, struct buffer_page, list); |
598 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; | 1085 | cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; |
599 | 1086 | ||
1087 | rb_head_page_activate(cpu_buffer); | ||
1088 | |||
600 | return cpu_buffer; | 1089 | return cpu_buffer; |
601 | 1090 | ||
602 | fail_free_reader: | 1091 | fail_free_reader: |
@@ -609,15 +1098,22 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
609 | 1098 | ||
610 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | 1099 | static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) |
611 | { | 1100 | { |
612 | struct list_head *head = &cpu_buffer->pages; | 1101 | struct list_head *head = cpu_buffer->pages; |
613 | struct buffer_page *bpage, *tmp; | 1102 | struct buffer_page *bpage, *tmp; |
614 | 1103 | ||
615 | free_buffer_page(cpu_buffer->reader_page); | 1104 | free_buffer_page(cpu_buffer->reader_page); |
616 | 1105 | ||
617 | list_for_each_entry_safe(bpage, tmp, head, list) { | 1106 | rb_head_page_deactivate(cpu_buffer); |
618 | list_del_init(&bpage->list); | 1107 | |
1108 | if (head) { | ||
1109 | list_for_each_entry_safe(bpage, tmp, head, list) { | ||
1110 | list_del_init(&bpage->list); | ||
1111 | free_buffer_page(bpage); | ||
1112 | } | ||
1113 | bpage = list_entry(head, struct buffer_page, list); | ||
619 | free_buffer_page(bpage); | 1114 | free_buffer_page(bpage); |
620 | } | 1115 | } |
1116 | |||
621 | kfree(cpu_buffer); | 1117 | kfree(cpu_buffer); |
622 | } | 1118 | } |
623 | 1119 | ||
@@ -735,6 +1231,7 @@ ring_buffer_free(struct ring_buffer *buffer) | |||
735 | 1231 | ||
736 | put_online_cpus(); | 1232 | put_online_cpus(); |
737 | 1233 | ||
1234 | kfree(buffer->buffers); | ||
738 | free_cpumask_var(buffer->cpumask); | 1235 | free_cpumask_var(buffer->cpumask); |
739 | 1236 | ||
740 | kfree(buffer); | 1237 | kfree(buffer); |
@@ -756,26 +1253,25 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | |||
756 | struct list_head *p; | 1253 | struct list_head *p; |
757 | unsigned i; | 1254 | unsigned i; |
758 | 1255 | ||
759 | atomic_inc(&cpu_buffer->record_disabled); | 1256 | spin_lock_irq(&cpu_buffer->reader_lock); |
760 | synchronize_sched(); | 1257 | rb_head_page_deactivate(cpu_buffer); |
761 | 1258 | ||
762 | for (i = 0; i < nr_pages; i++) { | 1259 | for (i = 0; i < nr_pages; i++) { |
763 | if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) | 1260 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) |
764 | return; | 1261 | goto out; |
765 | p = cpu_buffer->pages.next; | 1262 | p = cpu_buffer->pages->next; |
766 | bpage = list_entry(p, struct buffer_page, list); | 1263 | bpage = list_entry(p, struct buffer_page, list); |
767 | list_del_init(&bpage->list); | 1264 | list_del_init(&bpage->list); |
768 | free_buffer_page(bpage); | 1265 | free_buffer_page(bpage); |
769 | } | 1266 | } |
770 | if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) | 1267 | if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) |
771 | return; | 1268 | goto out; |
772 | 1269 | ||
773 | rb_reset_cpu(cpu_buffer); | 1270 | rb_reset_cpu(cpu_buffer); |
774 | |||
775 | rb_check_pages(cpu_buffer); | 1271 | rb_check_pages(cpu_buffer); |
776 | 1272 | ||
777 | atomic_dec(&cpu_buffer->record_disabled); | 1273 | out: |
778 | 1274 | spin_unlock_irq(&cpu_buffer->reader_lock); | |
779 | } | 1275 | } |
780 | 1276 | ||
781 | static void | 1277 | static void |
@@ -786,22 +1282,22 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
786 | struct list_head *p; | 1282 | struct list_head *p; |
787 | unsigned i; | 1283 | unsigned i; |
788 | 1284 | ||
789 | atomic_inc(&cpu_buffer->record_disabled); | 1285 | spin_lock_irq(&cpu_buffer->reader_lock); |
790 | synchronize_sched(); | 1286 | rb_head_page_deactivate(cpu_buffer); |
791 | 1287 | ||
792 | for (i = 0; i < nr_pages; i++) { | 1288 | for (i = 0; i < nr_pages; i++) { |
793 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) | 1289 | if (RB_WARN_ON(cpu_buffer, list_empty(pages))) |
794 | return; | 1290 | goto out; |
795 | p = pages->next; | 1291 | p = pages->next; |
796 | bpage = list_entry(p, struct buffer_page, list); | 1292 | bpage = list_entry(p, struct buffer_page, list); |
797 | list_del_init(&bpage->list); | 1293 | list_del_init(&bpage->list); |
798 | list_add_tail(&bpage->list, &cpu_buffer->pages); | 1294 | list_add_tail(&bpage->list, cpu_buffer->pages); |
799 | } | 1295 | } |
800 | rb_reset_cpu(cpu_buffer); | 1296 | rb_reset_cpu(cpu_buffer); |
801 | |||
802 | rb_check_pages(cpu_buffer); | 1297 | rb_check_pages(cpu_buffer); |
803 | 1298 | ||
804 | atomic_dec(&cpu_buffer->record_disabled); | 1299 | out: |
1300 | spin_unlock_irq(&cpu_buffer->reader_lock); | ||
805 | } | 1301 | } |
806 | 1302 | ||
807 | /** | 1303 | /** |
@@ -809,11 +1305,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
809 | * @buffer: the buffer to resize. | 1305 | * @buffer: the buffer to resize. |
810 | * @size: the new size. | 1306 | * @size: the new size. |
811 | * | 1307 | * |
812 | * The tracer is responsible for making sure that the buffer is | ||
813 | * not being used while changing the size. | ||
814 | * Note: We may be able to change the above requirement by using | ||
815 | * RCU synchronizations. | ||
816 | * | ||
817 | * Minimum size is 2 * BUF_PAGE_SIZE. | 1308 | * Minimum size is 2 * BUF_PAGE_SIZE. |
818 | * | 1309 | * |
819 | * Returns -1 on failure. | 1310 | * Returns -1 on failure. |
@@ -845,6 +1336,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
845 | if (size == buffer_size) | 1336 | if (size == buffer_size) |
846 | return size; | 1337 | return size; |
847 | 1338 | ||
1339 | atomic_inc(&buffer->record_disabled); | ||
1340 | |||
1341 | /* Make sure all writers are done with this buffer. */ | ||
1342 | synchronize_sched(); | ||
1343 | |||
848 | mutex_lock(&buffer->mutex); | 1344 | mutex_lock(&buffer->mutex); |
849 | get_online_cpus(); | 1345 | get_online_cpus(); |
850 | 1346 | ||
@@ -907,6 +1403,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
907 | put_online_cpus(); | 1403 | put_online_cpus(); |
908 | mutex_unlock(&buffer->mutex); | 1404 | mutex_unlock(&buffer->mutex); |
909 | 1405 | ||
1406 | atomic_dec(&buffer->record_disabled); | ||
1407 | |||
910 | return size; | 1408 | return size; |
911 | 1409 | ||
912 | free_pages: | 1410 | free_pages: |
@@ -916,6 +1414,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
916 | } | 1414 | } |
917 | put_online_cpus(); | 1415 | put_online_cpus(); |
918 | mutex_unlock(&buffer->mutex); | 1416 | mutex_unlock(&buffer->mutex); |
1417 | atomic_dec(&buffer->record_disabled); | ||
919 | return -ENOMEM; | 1418 | return -ENOMEM; |
920 | 1419 | ||
921 | /* | 1420 | /* |
@@ -925,6 +1424,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
925 | out_fail: | 1424 | out_fail: |
926 | put_online_cpus(); | 1425 | put_online_cpus(); |
927 | mutex_unlock(&buffer->mutex); | 1426 | mutex_unlock(&buffer->mutex); |
1427 | atomic_dec(&buffer->record_disabled); | ||
928 | return -1; | 1428 | return -1; |
929 | } | 1429 | } |
930 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | 1430 | EXPORT_SYMBOL_GPL(ring_buffer_resize); |
@@ -948,21 +1448,14 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) | |||
948 | } | 1448 | } |
949 | 1449 | ||
950 | static inline struct ring_buffer_event * | 1450 | static inline struct ring_buffer_event * |
951 | rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) | ||
952 | { | ||
953 | return __rb_page_index(cpu_buffer->head_page, | ||
954 | cpu_buffer->head_page->read); | ||
955 | } | ||
956 | |||
957 | static inline struct ring_buffer_event * | ||
958 | rb_iter_head_event(struct ring_buffer_iter *iter) | 1451 | rb_iter_head_event(struct ring_buffer_iter *iter) |
959 | { | 1452 | { |
960 | return __rb_page_index(iter->head_page, iter->head); | 1453 | return __rb_page_index(iter->head_page, iter->head); |
961 | } | 1454 | } |
962 | 1455 | ||
963 | static inline unsigned rb_page_write(struct buffer_page *bpage) | 1456 | static inline unsigned long rb_page_write(struct buffer_page *bpage) |
964 | { | 1457 | { |
965 | return local_read(&bpage->write); | 1458 | return local_read(&bpage->write) & RB_WRITE_MASK; |
966 | } | 1459 | } |
967 | 1460 | ||
968 | static inline unsigned rb_page_commit(struct buffer_page *bpage) | 1461 | static inline unsigned rb_page_commit(struct buffer_page *bpage) |
@@ -970,6 +1463,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage) | |||
970 | return local_read(&bpage->page->commit); | 1463 | return local_read(&bpage->page->commit); |
971 | } | 1464 | } |
972 | 1465 | ||
1466 | static inline unsigned long rb_page_entries(struct buffer_page *bpage) | ||
1467 | { | ||
1468 | return local_read(&bpage->entries) & RB_WRITE_MASK; | ||
1469 | } | ||
1470 | |||
973 | /* Size is determined by what has been commited */ | 1471 | /* Size is determined by what has been commited */ |
974 | static inline unsigned rb_page_size(struct buffer_page *bpage) | 1472 | static inline unsigned rb_page_size(struct buffer_page *bpage) |
975 | { | 1473 | { |
@@ -982,22 +1480,6 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) | |||
982 | return rb_page_commit(cpu_buffer->commit_page); | 1480 | return rb_page_commit(cpu_buffer->commit_page); |
983 | } | 1481 | } |
984 | 1482 | ||
985 | static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) | ||
986 | { | ||
987 | return rb_page_commit(cpu_buffer->head_page); | ||
988 | } | ||
989 | |||
990 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
991 | struct buffer_page **bpage) | ||
992 | { | ||
993 | struct list_head *p = (*bpage)->list.next; | ||
994 | |||
995 | if (p == &cpu_buffer->pages) | ||
996 | p = p->next; | ||
997 | |||
998 | *bpage = list_entry(p, struct buffer_page, list); | ||
999 | } | ||
1000 | |||
1001 | static inline unsigned | 1483 | static inline unsigned |
1002 | rb_event_index(struct ring_buffer_event *event) | 1484 | rb_event_index(struct ring_buffer_event *event) |
1003 | { | 1485 | { |
@@ -1023,6 +1505,8 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1023 | static void | 1505 | static void |
1024 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1506 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
1025 | { | 1507 | { |
1508 | unsigned long max_count; | ||
1509 | |||
1026 | /* | 1510 | /* |
1027 | * We only race with interrupts and NMIs on this CPU. | 1511 | * We only race with interrupts and NMIs on this CPU. |
1028 | * If we own the commit event, then we can commit | 1512 | * If we own the commit event, then we can commit |
@@ -1032,9 +1516,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | |||
1032 | * assign the commit to the tail. | 1516 | * assign the commit to the tail. |
1033 | */ | 1517 | */ |
1034 | again: | 1518 | again: |
1519 | max_count = cpu_buffer->buffer->pages * 100; | ||
1520 | |||
1035 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { | 1521 | while (cpu_buffer->commit_page != cpu_buffer->tail_page) { |
1036 | cpu_buffer->commit_page->page->commit = | 1522 | if (RB_WARN_ON(cpu_buffer, !(--max_count))) |
1037 | cpu_buffer->commit_page->write; | 1523 | return; |
1524 | if (RB_WARN_ON(cpu_buffer, | ||
1525 | rb_is_reader_page(cpu_buffer->tail_page))) | ||
1526 | return; | ||
1527 | local_set(&cpu_buffer->commit_page->page->commit, | ||
1528 | rb_page_write(cpu_buffer->commit_page)); | ||
1038 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | 1529 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); |
1039 | cpu_buffer->write_stamp = | 1530 | cpu_buffer->write_stamp = |
1040 | cpu_buffer->commit_page->page->time_stamp; | 1531 | cpu_buffer->commit_page->page->time_stamp; |
@@ -1043,8 +1534,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | |||
1043 | } | 1534 | } |
1044 | while (rb_commit_index(cpu_buffer) != | 1535 | while (rb_commit_index(cpu_buffer) != |
1045 | rb_page_write(cpu_buffer->commit_page)) { | 1536 | rb_page_write(cpu_buffer->commit_page)) { |
1046 | cpu_buffer->commit_page->page->commit = | 1537 | |
1047 | cpu_buffer->commit_page->write; | 1538 | local_set(&cpu_buffer->commit_page->page->commit, |
1539 | rb_page_write(cpu_buffer->commit_page)); | ||
1540 | RB_WARN_ON(cpu_buffer, | ||
1541 | local_read(&cpu_buffer->commit_page->page->commit) & | ||
1542 | ~RB_WRITE_MASK); | ||
1048 | barrier(); | 1543 | barrier(); |
1049 | } | 1544 | } |
1050 | 1545 | ||
@@ -1077,7 +1572,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1077 | * to the head page instead of next. | 1572 | * to the head page instead of next. |
1078 | */ | 1573 | */ |
1079 | if (iter->head_page == cpu_buffer->reader_page) | 1574 | if (iter->head_page == cpu_buffer->reader_page) |
1080 | iter->head_page = cpu_buffer->head_page; | 1575 | iter->head_page = rb_set_head_page(cpu_buffer); |
1081 | else | 1576 | else |
1082 | rb_inc_page(cpu_buffer, &iter->head_page); | 1577 | rb_inc_page(cpu_buffer, &iter->head_page); |
1083 | 1578 | ||
@@ -1085,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1085 | iter->head = 0; | 1580 | iter->head = 0; |
1086 | } | 1581 | } |
1087 | 1582 | ||
1583 | /* Slow path, do not inline */ | ||
1584 | static noinline struct ring_buffer_event * | ||
1585 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | ||
1586 | { | ||
1587 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | ||
1588 | |||
1589 | /* Not the first event on the page? */ | ||
1590 | if (rb_event_index(event)) { | ||
1591 | event->time_delta = delta & TS_MASK; | ||
1592 | event->array[0] = delta >> TS_SHIFT; | ||
1593 | } else { | ||
1594 | /* nope, just zero it */ | ||
1595 | event->time_delta = 0; | ||
1596 | event->array[0] = 0; | ||
1597 | } | ||
1598 | |||
1599 | return skip_time_extend(event); | ||
1600 | } | ||
1601 | |||
1088 | /** | 1602 | /** |
1089 | * ring_buffer_update_event - update event type and data | 1603 | * ring_buffer_update_event - update event type and data |
1090 | * @event: the even to update | 1604 | * @event: the even to update |
@@ -1097,28 +1611,188 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1097 | * data field. | 1611 | * data field. |
1098 | */ | 1612 | */ |
1099 | static void | 1613 | static void |
1100 | rb_update_event(struct ring_buffer_event *event, | 1614 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
1101 | unsigned type, unsigned length) | 1615 | struct ring_buffer_event *event, unsigned length, |
1616 | int add_timestamp, u64 delta) | ||
1102 | { | 1617 | { |
1103 | event->type_len = type; | 1618 | /* Only a commit updates the timestamp */ |
1619 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | ||
1620 | delta = 0; | ||
1621 | |||
1622 | /* | ||
1623 | * If we need to add a timestamp, then we | ||
1624 | * add it to the start of the resevered space. | ||
1625 | */ | ||
1626 | if (unlikely(add_timestamp)) { | ||
1627 | event = rb_add_time_stamp(event, delta); | ||
1628 | length -= RB_LEN_TIME_EXTEND; | ||
1629 | delta = 0; | ||
1630 | } | ||
1631 | |||
1632 | event->time_delta = delta; | ||
1633 | length -= RB_EVNT_HDR_SIZE; | ||
1634 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { | ||
1635 | event->type_len = 0; | ||
1636 | event->array[0] = length; | ||
1637 | } else | ||
1638 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | ||
1639 | } | ||
1640 | |||
1641 | /* | ||
1642 | * rb_handle_head_page - writer hit the head page | ||
1643 | * | ||
1644 | * Returns: +1 to retry page | ||
1645 | * 0 to continue | ||
1646 | * -1 on error | ||
1647 | */ | ||
1648 | static int | ||
1649 | rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, | ||
1650 | struct buffer_page *tail_page, | ||
1651 | struct buffer_page *next_page) | ||
1652 | { | ||
1653 | struct buffer_page *new_head; | ||
1654 | int entries; | ||
1655 | int type; | ||
1656 | int ret; | ||
1657 | |||
1658 | entries = rb_page_entries(next_page); | ||
1659 | |||
1660 | /* | ||
1661 | * The hard part is here. We need to move the head | ||
1662 | * forward, and protect against both readers on | ||
1663 | * other CPUs and writers coming in via interrupts. | ||
1664 | */ | ||
1665 | type = rb_head_page_set_update(cpu_buffer, next_page, tail_page, | ||
1666 | RB_PAGE_HEAD); | ||
1667 | |||
1668 | /* | ||
1669 | * type can be one of four: | ||
1670 | * NORMAL - an interrupt already moved it for us | ||
1671 | * HEAD - we are the first to get here. | ||
1672 | * UPDATE - we are the interrupt interrupting | ||
1673 | * a current move. | ||
1674 | * MOVED - a reader on another CPU moved the next | ||
1675 | * pointer to its reader page. Give up | ||
1676 | * and try again. | ||
1677 | */ | ||
1104 | 1678 | ||
1105 | switch (type) { | 1679 | switch (type) { |
1680 | case RB_PAGE_HEAD: | ||
1681 | /* | ||
1682 | * We changed the head to UPDATE, thus | ||
1683 | * it is our responsibility to update | ||
1684 | * the counters. | ||
1685 | */ | ||
1686 | local_add(entries, &cpu_buffer->overrun); | ||
1106 | 1687 | ||
1107 | case RINGBUF_TYPE_PADDING: | 1688 | /* |
1108 | case RINGBUF_TYPE_TIME_EXTEND: | 1689 | * The entries will be zeroed out when we move the |
1109 | case RINGBUF_TYPE_TIME_STAMP: | 1690 | * tail page. |
1691 | */ | ||
1692 | |||
1693 | /* still more to do */ | ||
1110 | break; | 1694 | break; |
1111 | 1695 | ||
1112 | case 0: | 1696 | case RB_PAGE_UPDATE: |
1113 | length -= RB_EVNT_HDR_SIZE; | 1697 | /* |
1114 | if (length > RB_MAX_SMALL_DATA) | 1698 | * This is an interrupt that interrupt the |
1115 | event->array[0] = length; | 1699 | * previous update. Still more to do. |
1116 | else | 1700 | */ |
1117 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | ||
1118 | break; | 1701 | break; |
1702 | case RB_PAGE_NORMAL: | ||
1703 | /* | ||
1704 | * An interrupt came in before the update | ||
1705 | * and processed this for us. | ||
1706 | * Nothing left to do. | ||
1707 | */ | ||
1708 | return 1; | ||
1709 | case RB_PAGE_MOVED: | ||
1710 | /* | ||
1711 | * The reader is on another CPU and just did | ||
1712 | * a swap with our next_page. | ||
1713 | * Try again. | ||
1714 | */ | ||
1715 | return 1; | ||
1119 | default: | 1716 | default: |
1120 | BUG(); | 1717 | RB_WARN_ON(cpu_buffer, 1); /* WTF??? */ |
1718 | return -1; | ||
1719 | } | ||
1720 | |||
1721 | /* | ||
1722 | * Now that we are here, the old head pointer is | ||
1723 | * set to UPDATE. This will keep the reader from | ||
1724 | * swapping the head page with the reader page. | ||
1725 | * The reader (on another CPU) will spin till | ||
1726 | * we are finished. | ||
1727 | * | ||
1728 | * We just need to protect against interrupts | ||
1729 | * doing the job. We will set the next pointer | ||
1730 | * to HEAD. After that, we set the old pointer | ||
1731 | * to NORMAL, but only if it was HEAD before. | ||
1732 | * otherwise we are an interrupt, and only | ||
1733 | * want the outer most commit to reset it. | ||
1734 | */ | ||
1735 | new_head = next_page; | ||
1736 | rb_inc_page(cpu_buffer, &new_head); | ||
1737 | |||
1738 | ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, | ||
1739 | RB_PAGE_NORMAL); | ||
1740 | |||
1741 | /* | ||
1742 | * Valid returns are: | ||
1743 | * HEAD - an interrupt came in and already set it. | ||
1744 | * NORMAL - One of two things: | ||
1745 | * 1) We really set it. | ||
1746 | * 2) A bunch of interrupts came in and moved | ||
1747 | * the page forward again. | ||
1748 | */ | ||
1749 | switch (ret) { | ||
1750 | case RB_PAGE_HEAD: | ||
1751 | case RB_PAGE_NORMAL: | ||
1752 | /* OK */ | ||
1753 | break; | ||
1754 | default: | ||
1755 | RB_WARN_ON(cpu_buffer, 1); | ||
1756 | return -1; | ||
1121 | } | 1757 | } |
1758 | |||
1759 | /* | ||
1760 | * It is possible that an interrupt came in, | ||
1761 | * set the head up, then more interrupts came in | ||
1762 | * and moved it again. When we get back here, | ||
1763 | * the page would have been set to NORMAL but we | ||
1764 | * just set it back to HEAD. | ||
1765 | * | ||
1766 | * How do you detect this? Well, if that happened | ||
1767 | * the tail page would have moved. | ||
1768 | */ | ||
1769 | if (ret == RB_PAGE_NORMAL) { | ||
1770 | /* | ||
1771 | * If the tail had moved passed next, then we need | ||
1772 | * to reset the pointer. | ||
1773 | */ | ||
1774 | if (cpu_buffer->tail_page != tail_page && | ||
1775 | cpu_buffer->tail_page != next_page) | ||
1776 | rb_head_page_set_normal(cpu_buffer, new_head, | ||
1777 | next_page, | ||
1778 | RB_PAGE_HEAD); | ||
1779 | } | ||
1780 | |||
1781 | /* | ||
1782 | * If this was the outer most commit (the one that | ||
1783 | * changed the original pointer from HEAD to UPDATE), | ||
1784 | * then it is up to us to reset it to NORMAL. | ||
1785 | */ | ||
1786 | if (type == RB_PAGE_HEAD) { | ||
1787 | ret = rb_head_page_set_normal(cpu_buffer, next_page, | ||
1788 | tail_page, | ||
1789 | RB_PAGE_UPDATE); | ||
1790 | if (RB_WARN_ON(cpu_buffer, | ||
1791 | ret != RB_PAGE_UPDATE)) | ||
1792 | return -1; | ||
1793 | } | ||
1794 | |||
1795 | return 0; | ||
1122 | } | 1796 | } |
1123 | 1797 | ||
1124 | static unsigned rb_calculate_event_length(unsigned length) | 1798 | static unsigned rb_calculate_event_length(unsigned length) |
@@ -1129,11 +1803,11 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
1129 | if (!length) | 1803 | if (!length) |
1130 | length = 1; | 1804 | length = 1; |
1131 | 1805 | ||
1132 | if (length > RB_MAX_SMALL_DATA) | 1806 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) |
1133 | length += sizeof(event.array[0]); | 1807 | length += sizeof(event.array[0]); |
1134 | 1808 | ||
1135 | length += RB_EVNT_HDR_SIZE; | 1809 | length += RB_EVNT_HDR_SIZE; |
1136 | length = ALIGN(length, RB_ALIGNMENT); | 1810 | length = ALIGN(length, RB_ARCH_ALIGNMENT); |
1137 | 1811 | ||
1138 | return length; | 1812 | return length; |
1139 | } | 1813 | } |
@@ -1150,6 +1824,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1150 | * must fill the old tail_page with padding. | 1824 | * must fill the old tail_page with padding. |
1151 | */ | 1825 | */ |
1152 | if (tail >= BUF_PAGE_SIZE) { | 1826 | if (tail >= BUF_PAGE_SIZE) { |
1827 | /* | ||
1828 | * If the page was filled, then we still need | ||
1829 | * to update the real_end. Reset it to zero | ||
1830 | * and the reader will ignore it. | ||
1831 | */ | ||
1832 | if (tail == BUF_PAGE_SIZE) | ||
1833 | tail_page->real_end = 0; | ||
1834 | |||
1153 | local_sub(length, &tail_page->write); | 1835 | local_sub(length, &tail_page->write); |
1154 | return; | 1836 | return; |
1155 | } | 1837 | } |
@@ -1158,6 +1840,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1158 | kmemcheck_annotate_bitfield(event, bitfield); | 1840 | kmemcheck_annotate_bitfield(event, bitfield); |
1159 | 1841 | ||
1160 | /* | 1842 | /* |
1843 | * Save the original length to the meta data. | ||
1844 | * This will be used by the reader to add lost event | ||
1845 | * counter. | ||
1846 | */ | ||
1847 | tail_page->real_end = tail; | ||
1848 | |||
1849 | /* | ||
1161 | * If this event is bigger than the minimum size, then | 1850 | * If this event is bigger than the minimum size, then |
1162 | * we need to be careful that we don't subtract the | 1851 | * we need to be careful that we don't subtract the |
1163 | * write counter enough to allow another writer to slip | 1852 | * write counter enough to allow another writer to slip |
@@ -1184,111 +1873,108 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1184 | event->type_len = RINGBUF_TYPE_PADDING; | 1873 | event->type_len = RINGBUF_TYPE_PADDING; |
1185 | /* time delta must be non zero */ | 1874 | /* time delta must be non zero */ |
1186 | event->time_delta = 1; | 1875 | event->time_delta = 1; |
1187 | /* Account for this as an entry */ | ||
1188 | local_inc(&tail_page->entries); | ||
1189 | local_inc(&cpu_buffer->entries); | ||
1190 | 1876 | ||
1191 | /* Set write to end of buffer */ | 1877 | /* Set write to end of buffer */ |
1192 | length = (tail + length) - BUF_PAGE_SIZE; | 1878 | length = (tail + length) - BUF_PAGE_SIZE; |
1193 | local_sub(length, &tail_page->write); | 1879 | local_sub(length, &tail_page->write); |
1194 | } | 1880 | } |
1195 | 1881 | ||
1196 | static struct ring_buffer_event * | 1882 | /* |
1883 | * This is the slow path, force gcc not to inline it. | ||
1884 | */ | ||
1885 | static noinline struct ring_buffer_event * | ||
1197 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1886 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
1198 | unsigned long length, unsigned long tail, | 1887 | unsigned long length, unsigned long tail, |
1199 | struct buffer_page *commit_page, | 1888 | struct buffer_page *tail_page, u64 ts) |
1200 | struct buffer_page *tail_page, u64 *ts) | ||
1201 | { | 1889 | { |
1202 | struct buffer_page *next_page, *head_page, *reader_page; | 1890 | struct buffer_page *commit_page = cpu_buffer->commit_page; |
1203 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1891 | struct ring_buffer *buffer = cpu_buffer->buffer; |
1204 | bool lock_taken = false; | 1892 | struct buffer_page *next_page; |
1205 | unsigned long flags; | 1893 | int ret; |
1206 | 1894 | ||
1207 | next_page = tail_page; | 1895 | next_page = tail_page; |
1208 | 1896 | ||
1209 | local_irq_save(flags); | ||
1210 | /* | ||
1211 | * Since the write to the buffer is still not | ||
1212 | * fully lockless, we must be careful with NMIs. | ||
1213 | * The locks in the writers are taken when a write | ||
1214 | * crosses to a new page. The locks protect against | ||
1215 | * races with the readers (this will soon be fixed | ||
1216 | * with a lockless solution). | ||
1217 | * | ||
1218 | * Because we can not protect against NMIs, and we | ||
1219 | * want to keep traces reentrant, we need to manage | ||
1220 | * what happens when we are in an NMI. | ||
1221 | * | ||
1222 | * NMIs can happen after we take the lock. | ||
1223 | * If we are in an NMI, only take the lock | ||
1224 | * if it is not already taken. Otherwise | ||
1225 | * simply fail. | ||
1226 | */ | ||
1227 | if (unlikely(in_nmi())) { | ||
1228 | if (!__raw_spin_trylock(&cpu_buffer->lock)) { | ||
1229 | cpu_buffer->nmi_dropped++; | ||
1230 | goto out_reset; | ||
1231 | } | ||
1232 | } else | ||
1233 | __raw_spin_lock(&cpu_buffer->lock); | ||
1234 | |||
1235 | lock_taken = true; | ||
1236 | |||
1237 | rb_inc_page(cpu_buffer, &next_page); | 1897 | rb_inc_page(cpu_buffer, &next_page); |
1238 | 1898 | ||
1239 | head_page = cpu_buffer->head_page; | ||
1240 | reader_page = cpu_buffer->reader_page; | ||
1241 | |||
1242 | /* we grabbed the lock before incrementing */ | ||
1243 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) | ||
1244 | goto out_reset; | ||
1245 | |||
1246 | /* | 1899 | /* |
1247 | * If for some reason, we had an interrupt storm that made | 1900 | * If for some reason, we had an interrupt storm that made |
1248 | * it all the way around the buffer, bail, and warn | 1901 | * it all the way around the buffer, bail, and warn |
1249 | * about it. | 1902 | * about it. |
1250 | */ | 1903 | */ |
1251 | if (unlikely(next_page == commit_page)) { | 1904 | if (unlikely(next_page == commit_page)) { |
1252 | cpu_buffer->commit_overrun++; | 1905 | local_inc(&cpu_buffer->commit_overrun); |
1253 | goto out_reset; | 1906 | goto out_reset; |
1254 | } | 1907 | } |
1255 | 1908 | ||
1256 | if (next_page == head_page) { | 1909 | /* |
1257 | if (!(buffer->flags & RB_FL_OVERWRITE)) | 1910 | * This is where the fun begins! |
1258 | goto out_reset; | 1911 | * |
1259 | 1912 | * We are fighting against races between a reader that | |
1260 | /* tail_page has not moved yet? */ | 1913 | * could be on another CPU trying to swap its reader |
1261 | if (tail_page == cpu_buffer->tail_page) { | 1914 | * page with the buffer head. |
1262 | /* count overflows */ | 1915 | * |
1263 | cpu_buffer->overrun += | 1916 | * We are also fighting against interrupts coming in and |
1264 | local_read(&head_page->entries); | 1917 | * moving the head or tail on us as well. |
1918 | * | ||
1919 | * If the next page is the head page then we have filled | ||
1920 | * the buffer, unless the commit page is still on the | ||
1921 | * reader page. | ||
1922 | */ | ||
1923 | if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { | ||
1265 | 1924 | ||
1266 | rb_inc_page(cpu_buffer, &head_page); | 1925 | /* |
1267 | cpu_buffer->head_page = head_page; | 1926 | * If the commit is not on the reader page, then |
1268 | cpu_buffer->head_page->read = 0; | 1927 | * move the header page. |
1928 | */ | ||
1929 | if (!rb_is_reader_page(cpu_buffer->commit_page)) { | ||
1930 | /* | ||
1931 | * If we are not in overwrite mode, | ||
1932 | * this is easy, just stop here. | ||
1933 | */ | ||
1934 | if (!(buffer->flags & RB_FL_OVERWRITE)) | ||
1935 | goto out_reset; | ||
1936 | |||
1937 | ret = rb_handle_head_page(cpu_buffer, | ||
1938 | tail_page, | ||
1939 | next_page); | ||
1940 | if (ret < 0) | ||
1941 | goto out_reset; | ||
1942 | if (ret) | ||
1943 | goto out_again; | ||
1944 | } else { | ||
1945 | /* | ||
1946 | * We need to be careful here too. The | ||
1947 | * commit page could still be on the reader | ||
1948 | * page. We could have a small buffer, and | ||
1949 | * have filled up the buffer with events | ||
1950 | * from interrupts and such, and wrapped. | ||
1951 | * | ||
1952 | * Note, if the tail page is also the on the | ||
1953 | * reader_page, we let it move out. | ||
1954 | */ | ||
1955 | if (unlikely((cpu_buffer->commit_page != | ||
1956 | cpu_buffer->tail_page) && | ||
1957 | (cpu_buffer->commit_page == | ||
1958 | cpu_buffer->reader_page))) { | ||
1959 | local_inc(&cpu_buffer->commit_overrun); | ||
1960 | goto out_reset; | ||
1961 | } | ||
1269 | } | 1962 | } |
1270 | } | 1963 | } |
1271 | 1964 | ||
1272 | /* | 1965 | ret = rb_tail_page_update(cpu_buffer, tail_page, next_page); |
1273 | * If the tail page is still the same as what we think | 1966 | if (ret) { |
1274 | * it is, then it is up to us to update the tail | 1967 | /* |
1275 | * pointer. | 1968 | * Nested commits always have zero deltas, so |
1276 | */ | 1969 | * just reread the time stamp |
1277 | if (tail_page == cpu_buffer->tail_page) { | 1970 | */ |
1278 | local_set(&next_page->write, 0); | 1971 | ts = rb_time_stamp(buffer); |
1279 | local_set(&next_page->entries, 0); | 1972 | next_page->page->time_stamp = ts; |
1280 | local_set(&next_page->page->commit, 0); | ||
1281 | cpu_buffer->tail_page = next_page; | ||
1282 | |||
1283 | /* reread the time stamp */ | ||
1284 | *ts = rb_time_stamp(buffer, cpu_buffer->cpu); | ||
1285 | cpu_buffer->tail_page->page->time_stamp = *ts; | ||
1286 | } | 1973 | } |
1287 | 1974 | ||
1288 | rb_reset_tail(cpu_buffer, tail_page, tail, length); | 1975 | out_again: |
1289 | 1976 | ||
1290 | __raw_spin_unlock(&cpu_buffer->lock); | 1977 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1291 | local_irq_restore(flags); | ||
1292 | 1978 | ||
1293 | /* fail and let the caller try again */ | 1979 | /* fail and let the caller try again */ |
1294 | return ERR_PTR(-EAGAIN); | 1980 | return ERR_PTR(-EAGAIN); |
@@ -1297,48 +1983,52 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1297 | /* reset write */ | 1983 | /* reset write */ |
1298 | rb_reset_tail(cpu_buffer, tail_page, tail, length); | 1984 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1299 | 1985 | ||
1300 | if (likely(lock_taken)) | ||
1301 | __raw_spin_unlock(&cpu_buffer->lock); | ||
1302 | local_irq_restore(flags); | ||
1303 | return NULL; | 1986 | return NULL; |
1304 | } | 1987 | } |
1305 | 1988 | ||
1306 | static struct ring_buffer_event * | 1989 | static struct ring_buffer_event * |
1307 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 1990 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
1308 | unsigned type, unsigned long length, u64 *ts) | 1991 | unsigned long length, u64 ts, |
1992 | u64 delta, int add_timestamp) | ||
1309 | { | 1993 | { |
1310 | struct buffer_page *tail_page, *commit_page; | 1994 | struct buffer_page *tail_page; |
1311 | struct ring_buffer_event *event; | 1995 | struct ring_buffer_event *event; |
1312 | unsigned long tail, write; | 1996 | unsigned long tail, write; |
1313 | 1997 | ||
1314 | commit_page = cpu_buffer->commit_page; | 1998 | /* |
1315 | /* we just need to protect against interrupts */ | 1999 | * If the time delta since the last event is too big to |
1316 | barrier(); | 2000 | * hold in the time field of the event, then we append a |
2001 | * TIME EXTEND event ahead of the data event. | ||
2002 | */ | ||
2003 | if (unlikely(add_timestamp)) | ||
2004 | length += RB_LEN_TIME_EXTEND; | ||
2005 | |||
1317 | tail_page = cpu_buffer->tail_page; | 2006 | tail_page = cpu_buffer->tail_page; |
1318 | write = local_add_return(length, &tail_page->write); | 2007 | write = local_add_return(length, &tail_page->write); |
2008 | |||
2009 | /* set write to only the index of the write */ | ||
2010 | write &= RB_WRITE_MASK; | ||
1319 | tail = write - length; | 2011 | tail = write - length; |
1320 | 2012 | ||
1321 | /* See if we shot pass the end of this buffer page */ | 2013 | /* See if we shot pass the end of this buffer page */ |
1322 | if (write > BUF_PAGE_SIZE) | 2014 | if (unlikely(write > BUF_PAGE_SIZE)) |
1323 | return rb_move_tail(cpu_buffer, length, tail, | 2015 | return rb_move_tail(cpu_buffer, length, tail, |
1324 | commit_page, tail_page, ts); | 2016 | tail_page, ts); |
1325 | 2017 | ||
1326 | /* We reserved something on the buffer */ | 2018 | /* We reserved something on the buffer */ |
1327 | 2019 | ||
1328 | event = __rb_page_index(tail_page, tail); | 2020 | event = __rb_page_index(tail_page, tail); |
1329 | kmemcheck_annotate_bitfield(event, bitfield); | 2021 | kmemcheck_annotate_bitfield(event, bitfield); |
1330 | rb_update_event(event, type, length); | 2022 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); |
1331 | 2023 | ||
1332 | /* The passed in type is zero for DATA */ | 2024 | local_inc(&tail_page->entries); |
1333 | if (likely(!type)) | ||
1334 | local_inc(&tail_page->entries); | ||
1335 | 2025 | ||
1336 | /* | 2026 | /* |
1337 | * If this is the first commit on the page, then update | 2027 | * If this is the first commit on the page, then update |
1338 | * its timestamp. | 2028 | * its timestamp. |
1339 | */ | 2029 | */ |
1340 | if (!tail) | 2030 | if (!tail) |
1341 | tail_page->page->time_stamp = *ts; | 2031 | tail_page->page->time_stamp = ts; |
1342 | 2032 | ||
1343 | return event; | 2033 | return event; |
1344 | } | 2034 | } |
@@ -1353,19 +2043,23 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1353 | unsigned long addr; | 2043 | unsigned long addr; |
1354 | 2044 | ||
1355 | new_index = rb_event_index(event); | 2045 | new_index = rb_event_index(event); |
1356 | old_index = new_index + rb_event_length(event); | 2046 | old_index = new_index + rb_event_ts_length(event); |
1357 | addr = (unsigned long)event; | 2047 | addr = (unsigned long)event; |
1358 | addr &= PAGE_MASK; | 2048 | addr &= PAGE_MASK; |
1359 | 2049 | ||
1360 | bpage = cpu_buffer->tail_page; | 2050 | bpage = cpu_buffer->tail_page; |
1361 | 2051 | ||
1362 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | 2052 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { |
2053 | unsigned long write_mask = | ||
2054 | local_read(&bpage->write) & ~RB_WRITE_MASK; | ||
1363 | /* | 2055 | /* |
1364 | * This is on the tail page. It is possible that | 2056 | * This is on the tail page. It is possible that |
1365 | * a write could come in and move the tail page | 2057 | * a write could come in and move the tail page |
1366 | * and write to the next page. That is fine | 2058 | * and write to the next page. That is fine |
1367 | * because we just shorten what is on this page. | 2059 | * because we just shorten what is on this page. |
1368 | */ | 2060 | */ |
2061 | old_index += write_mask; | ||
2062 | new_index += write_mask; | ||
1369 | index = local_cmpxchg(&bpage->write, old_index, new_index); | 2063 | index = local_cmpxchg(&bpage->write, old_index, new_index); |
1370 | if (index == old_index) | 2064 | if (index == old_index) |
1371 | return 1; | 2065 | return 1; |
@@ -1375,80 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1375 | return 0; | 2069 | return 0; |
1376 | } | 2070 | } |
1377 | 2071 | ||
1378 | static int | ||
1379 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
1380 | u64 *ts, u64 *delta) | ||
1381 | { | ||
1382 | struct ring_buffer_event *event; | ||
1383 | static int once; | ||
1384 | int ret; | ||
1385 | |||
1386 | if (unlikely(*delta > (1ULL << 59) && !once++)) { | ||
1387 | printk(KERN_WARNING "Delta way too big! %llu" | ||
1388 | " ts=%llu write stamp = %llu\n", | ||
1389 | (unsigned long long)*delta, | ||
1390 | (unsigned long long)*ts, | ||
1391 | (unsigned long long)cpu_buffer->write_stamp); | ||
1392 | WARN_ON(1); | ||
1393 | } | ||
1394 | |||
1395 | /* | ||
1396 | * The delta is too big, we to add a | ||
1397 | * new timestamp. | ||
1398 | */ | ||
1399 | event = __rb_reserve_next(cpu_buffer, | ||
1400 | RINGBUF_TYPE_TIME_EXTEND, | ||
1401 | RB_LEN_TIME_EXTEND, | ||
1402 | ts); | ||
1403 | if (!event) | ||
1404 | return -EBUSY; | ||
1405 | |||
1406 | if (PTR_ERR(event) == -EAGAIN) | ||
1407 | return -EAGAIN; | ||
1408 | |||
1409 | /* Only a commited time event can update the write stamp */ | ||
1410 | if (rb_event_is_commit(cpu_buffer, event)) { | ||
1411 | /* | ||
1412 | * If this is the first on the page, then it was | ||
1413 | * updated with the page itself. Try to discard it | ||
1414 | * and if we can't just make it zero. | ||
1415 | */ | ||
1416 | if (rb_event_index(event)) { | ||
1417 | event->time_delta = *delta & TS_MASK; | ||
1418 | event->array[0] = *delta >> TS_SHIFT; | ||
1419 | } else { | ||
1420 | /* try to discard, since we do not need this */ | ||
1421 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
1422 | /* nope, just zero it */ | ||
1423 | event->time_delta = 0; | ||
1424 | event->array[0] = 0; | ||
1425 | } | ||
1426 | } | ||
1427 | cpu_buffer->write_stamp = *ts; | ||
1428 | /* let the caller know this was the commit */ | ||
1429 | ret = 1; | ||
1430 | } else { | ||
1431 | /* Try to discard the event */ | ||
1432 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
1433 | /* Darn, this is just wasted space */ | ||
1434 | event->time_delta = 0; | ||
1435 | event->array[0] = 0; | ||
1436 | } | ||
1437 | ret = 0; | ||
1438 | } | ||
1439 | |||
1440 | *delta = 0; | ||
1441 | |||
1442 | return ret; | ||
1443 | } | ||
1444 | |||
1445 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2072 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) |
1446 | { | 2073 | { |
1447 | local_inc(&cpu_buffer->committing); | 2074 | local_inc(&cpu_buffer->committing); |
1448 | local_inc(&cpu_buffer->commits); | 2075 | local_inc(&cpu_buffer->commits); |
1449 | } | 2076 | } |
1450 | 2077 | ||
1451 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2078 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) |
1452 | { | 2079 | { |
1453 | unsigned long commits; | 2080 | unsigned long commits; |
1454 | 2081 | ||
@@ -1481,18 +2108,38 @@ static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | |||
1481 | } | 2108 | } |
1482 | 2109 | ||
1483 | static struct ring_buffer_event * | 2110 | static struct ring_buffer_event * |
1484 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 2111 | rb_reserve_next_event(struct ring_buffer *buffer, |
2112 | struct ring_buffer_per_cpu *cpu_buffer, | ||
1485 | unsigned long length) | 2113 | unsigned long length) |
1486 | { | 2114 | { |
1487 | struct ring_buffer_event *event; | 2115 | struct ring_buffer_event *event; |
1488 | u64 ts, delta = 0; | 2116 | u64 ts, delta; |
1489 | int commit = 0; | ||
1490 | int nr_loops = 0; | 2117 | int nr_loops = 0; |
2118 | int add_timestamp; | ||
2119 | u64 diff; | ||
1491 | 2120 | ||
1492 | rb_start_commit(cpu_buffer); | 2121 | rb_start_commit(cpu_buffer); |
1493 | 2122 | ||
2123 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | ||
2124 | /* | ||
2125 | * Due to the ability to swap a cpu buffer from a buffer | ||
2126 | * it is possible it was swapped before we committed. | ||
2127 | * (committing stops a swap). We check for it here and | ||
2128 | * if it happened, we have to fail the write. | ||
2129 | */ | ||
2130 | barrier(); | ||
2131 | if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { | ||
2132 | local_dec(&cpu_buffer->committing); | ||
2133 | local_dec(&cpu_buffer->commits); | ||
2134 | return NULL; | ||
2135 | } | ||
2136 | #endif | ||
2137 | |||
1494 | length = rb_calculate_event_length(length); | 2138 | length = rb_calculate_event_length(length); |
1495 | again: | 2139 | again: |
2140 | add_timestamp = 0; | ||
2141 | delta = 0; | ||
2142 | |||
1496 | /* | 2143 | /* |
1497 | * We allow for interrupts to reenter here and do a trace. | 2144 | * We allow for interrupts to reenter here and do a trace. |
1498 | * If one does, it will cause this original code to loop | 2145 | * If one does, it will cause this original code to loop |
@@ -1505,57 +2152,33 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1505 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 2152 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
1506 | goto out_fail; | 2153 | goto out_fail; |
1507 | 2154 | ||
1508 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); | 2155 | ts = rb_time_stamp(cpu_buffer->buffer); |
2156 | diff = ts - cpu_buffer->write_stamp; | ||
1509 | 2157 | ||
1510 | /* | 2158 | /* make sure this diff is calculated here */ |
1511 | * Only the first commit can update the timestamp. | 2159 | barrier(); |
1512 | * Yes there is a race here. If an interrupt comes in | ||
1513 | * just after the conditional and it traces too, then it | ||
1514 | * will also check the deltas. More than one timestamp may | ||
1515 | * also be made. But only the entry that did the actual | ||
1516 | * commit will be something other than zero. | ||
1517 | */ | ||
1518 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && | ||
1519 | rb_page_write(cpu_buffer->tail_page) == | ||
1520 | rb_commit_index(cpu_buffer))) { | ||
1521 | u64 diff; | ||
1522 | |||
1523 | diff = ts - cpu_buffer->write_stamp; | ||
1524 | |||
1525 | /* make sure this diff is calculated here */ | ||
1526 | barrier(); | ||
1527 | |||
1528 | /* Did the write stamp get updated already? */ | ||
1529 | if (unlikely(ts < cpu_buffer->write_stamp)) | ||
1530 | goto get_event; | ||
1531 | 2160 | ||
2161 | /* Did the write stamp get updated already? */ | ||
2162 | if (likely(ts >= cpu_buffer->write_stamp)) { | ||
1532 | delta = diff; | 2163 | delta = diff; |
1533 | if (unlikely(test_time_stamp(delta))) { | 2164 | if (unlikely(test_time_stamp(delta))) { |
1534 | 2165 | WARN_ONCE(delta > (1ULL << 59), | |
1535 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 2166 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", |
1536 | if (commit == -EBUSY) | 2167 | (unsigned long long)delta, |
1537 | goto out_fail; | 2168 | (unsigned long long)ts, |
1538 | 2169 | (unsigned long long)cpu_buffer->write_stamp); | |
1539 | if (commit == -EAGAIN) | 2170 | add_timestamp = 1; |
1540 | goto again; | ||
1541 | |||
1542 | RB_WARN_ON(cpu_buffer, commit < 0); | ||
1543 | } | 2171 | } |
1544 | } | 2172 | } |
1545 | 2173 | ||
1546 | get_event: | 2174 | event = __rb_reserve_next(cpu_buffer, length, ts, |
1547 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); | 2175 | delta, add_timestamp); |
1548 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2176 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
1549 | goto again; | 2177 | goto again; |
1550 | 2178 | ||
1551 | if (!event) | 2179 | if (!event) |
1552 | goto out_fail; | 2180 | goto out_fail; |
1553 | 2181 | ||
1554 | if (!rb_event_is_commit(cpu_buffer, event)) | ||
1555 | delta = 0; | ||
1556 | |||
1557 | event->time_delta = delta; | ||
1558 | |||
1559 | return event; | 2182 | return event; |
1560 | 2183 | ||
1561 | out_fail: | 2184 | out_fail: |
@@ -1563,15 +2186,13 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1563 | return NULL; | 2186 | return NULL; |
1564 | } | 2187 | } |
1565 | 2188 | ||
2189 | #ifdef CONFIG_TRACING | ||
2190 | |||
1566 | #define TRACE_RECURSIVE_DEPTH 16 | 2191 | #define TRACE_RECURSIVE_DEPTH 16 |
1567 | 2192 | ||
1568 | static int trace_recursive_lock(void) | 2193 | /* Keep this code out of the fast path cache */ |
2194 | static noinline void trace_recursive_fail(void) | ||
1569 | { | 2195 | { |
1570 | current->trace_recursion++; | ||
1571 | |||
1572 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
1573 | return 0; | ||
1574 | |||
1575 | /* Disable all tracing before we do anything else */ | 2196 | /* Disable all tracing before we do anything else */ |
1576 | tracing_off_permanent(); | 2197 | tracing_off_permanent(); |
1577 | 2198 | ||
@@ -1583,17 +2204,33 @@ static int trace_recursive_lock(void) | |||
1583 | in_nmi()); | 2204 | in_nmi()); |
1584 | 2205 | ||
1585 | WARN_ON_ONCE(1); | 2206 | WARN_ON_ONCE(1); |
2207 | } | ||
2208 | |||
2209 | static inline int trace_recursive_lock(void) | ||
2210 | { | ||
2211 | current->trace_recursion++; | ||
2212 | |||
2213 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2214 | return 0; | ||
2215 | |||
2216 | trace_recursive_fail(); | ||
2217 | |||
1586 | return -1; | 2218 | return -1; |
1587 | } | 2219 | } |
1588 | 2220 | ||
1589 | static void trace_recursive_unlock(void) | 2221 | static inline void trace_recursive_unlock(void) |
1590 | { | 2222 | { |
1591 | WARN_ON_ONCE(!current->trace_recursion); | 2223 | WARN_ON_ONCE(!current->trace_recursion); |
1592 | 2224 | ||
1593 | current->trace_recursion--; | 2225 | current->trace_recursion--; |
1594 | } | 2226 | } |
1595 | 2227 | ||
1596 | static DEFINE_PER_CPU(int, rb_need_resched); | 2228 | #else |
2229 | |||
2230 | #define trace_recursive_lock() (0) | ||
2231 | #define trace_recursive_unlock() do { } while (0) | ||
2232 | |||
2233 | #endif | ||
1597 | 2234 | ||
1598 | /** | 2235 | /** |
1599 | * ring_buffer_lock_reserve - reserve a part of the buffer | 2236 | * ring_buffer_lock_reserve - reserve a part of the buffer |
@@ -1615,16 +2252,16 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
1615 | { | 2252 | { |
1616 | struct ring_buffer_per_cpu *cpu_buffer; | 2253 | struct ring_buffer_per_cpu *cpu_buffer; |
1617 | struct ring_buffer_event *event; | 2254 | struct ring_buffer_event *event; |
1618 | int cpu, resched; | 2255 | int cpu; |
1619 | 2256 | ||
1620 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2257 | if (ring_buffer_flags != RB_BUFFERS_ON) |
1621 | return NULL; | 2258 | return NULL; |
1622 | 2259 | ||
1623 | if (atomic_read(&buffer->record_disabled)) | ||
1624 | return NULL; | ||
1625 | |||
1626 | /* If we are tracing schedule, we don't want to recurse */ | 2260 | /* If we are tracing schedule, we don't want to recurse */ |
1627 | resched = ftrace_preempt_disable(); | 2261 | preempt_disable_notrace(); |
2262 | |||
2263 | if (atomic_read(&buffer->record_disabled)) | ||
2264 | goto out_nocheck; | ||
1628 | 2265 | ||
1629 | if (trace_recursive_lock()) | 2266 | if (trace_recursive_lock()) |
1630 | goto out_nocheck; | 2267 | goto out_nocheck; |
@@ -1642,41 +2279,54 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
1642 | if (length > BUF_MAX_DATA_SIZE) | 2279 | if (length > BUF_MAX_DATA_SIZE) |
1643 | goto out; | 2280 | goto out; |
1644 | 2281 | ||
1645 | event = rb_reserve_next_event(cpu_buffer, length); | 2282 | event = rb_reserve_next_event(buffer, cpu_buffer, length); |
1646 | if (!event) | 2283 | if (!event) |
1647 | goto out; | 2284 | goto out; |
1648 | 2285 | ||
1649 | /* | ||
1650 | * Need to store resched state on this cpu. | ||
1651 | * Only the first needs to. | ||
1652 | */ | ||
1653 | |||
1654 | if (preempt_count() == 1) | ||
1655 | per_cpu(rb_need_resched, cpu) = resched; | ||
1656 | |||
1657 | return event; | 2286 | return event; |
1658 | 2287 | ||
1659 | out: | 2288 | out: |
1660 | trace_recursive_unlock(); | 2289 | trace_recursive_unlock(); |
1661 | 2290 | ||
1662 | out_nocheck: | 2291 | out_nocheck: |
1663 | ftrace_preempt_enable(resched); | 2292 | preempt_enable_notrace(); |
1664 | return NULL; | 2293 | return NULL; |
1665 | } | 2294 | } |
1666 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | 2295 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); |
1667 | 2296 | ||
1668 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2297 | static void |
2298 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
1669 | struct ring_buffer_event *event) | 2299 | struct ring_buffer_event *event) |
1670 | { | 2300 | { |
1671 | local_inc(&cpu_buffer->entries); | 2301 | u64 delta; |
1672 | 2302 | ||
1673 | /* | 2303 | /* |
1674 | * The event first in the commit queue updates the | 2304 | * The event first in the commit queue updates the |
1675 | * time stamp. | 2305 | * time stamp. |
1676 | */ | 2306 | */ |
1677 | if (rb_event_is_commit(cpu_buffer, event)) | 2307 | if (rb_event_is_commit(cpu_buffer, event)) { |
1678 | cpu_buffer->write_stamp += event->time_delta; | 2308 | /* |
2309 | * A commit event that is first on a page | ||
2310 | * updates the write timestamp with the page stamp | ||
2311 | */ | ||
2312 | if (!rb_event_index(event)) | ||
2313 | cpu_buffer->write_stamp = | ||
2314 | cpu_buffer->commit_page->page->time_stamp; | ||
2315 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
2316 | delta = event->array[0]; | ||
2317 | delta <<= TS_SHIFT; | ||
2318 | delta += event->time_delta; | ||
2319 | cpu_buffer->write_stamp += delta; | ||
2320 | } else | ||
2321 | cpu_buffer->write_stamp += event->time_delta; | ||
2322 | } | ||
2323 | } | ||
1679 | 2324 | ||
2325 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | ||
2326 | struct ring_buffer_event *event) | ||
2327 | { | ||
2328 | local_inc(&cpu_buffer->entries); | ||
2329 | rb_update_write_stamp(cpu_buffer, event); | ||
1680 | rb_end_commit(cpu_buffer); | 2330 | rb_end_commit(cpu_buffer); |
1681 | } | 2331 | } |
1682 | 2332 | ||
@@ -1701,13 +2351,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
1701 | 2351 | ||
1702 | trace_recursive_unlock(); | 2352 | trace_recursive_unlock(); |
1703 | 2353 | ||
1704 | /* | 2354 | preempt_enable_notrace(); |
1705 | * Only the last preempt count needs to restore preemption. | ||
1706 | */ | ||
1707 | if (preempt_count() == 1) | ||
1708 | ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); | ||
1709 | else | ||
1710 | preempt_enable_no_resched_notrace(); | ||
1711 | 2355 | ||
1712 | return 0; | 2356 | return 0; |
1713 | } | 2357 | } |
@@ -1715,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | |||
1715 | 2359 | ||
1716 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2360 | static inline void rb_event_discard(struct ring_buffer_event *event) |
1717 | { | 2361 | { |
2362 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
2363 | event = skip_time_extend(event); | ||
2364 | |||
1718 | /* array[0] holds the actual length for the discarded event */ | 2365 | /* array[0] holds the actual length for the discarded event */ |
1719 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | 2366 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; |
1720 | event->type_len = RINGBUF_TYPE_PADDING; | 2367 | event->type_len = RINGBUF_TYPE_PADDING; |
@@ -1723,32 +2370,57 @@ static inline void rb_event_discard(struct ring_buffer_event *event) | |||
1723 | event->time_delta = 1; | 2370 | event->time_delta = 1; |
1724 | } | 2371 | } |
1725 | 2372 | ||
1726 | /** | 2373 | /* |
1727 | * ring_buffer_event_discard - discard any event in the ring buffer | 2374 | * Decrement the entries to the page that an event is on. |
1728 | * @event: the event to discard | 2375 | * The event does not even need to exist, only the pointer |
1729 | * | 2376 | * to the page it is on. This may only be called before the commit |
1730 | * Sometimes a event that is in the ring buffer needs to be ignored. | 2377 | * takes place. |
1731 | * This function lets the user discard an event in the ring buffer | ||
1732 | * and then that event will not be read later. | ||
1733 | * | ||
1734 | * Note, it is up to the user to be careful with this, and protect | ||
1735 | * against races. If the user discards an event that has been consumed | ||
1736 | * it is possible that it could corrupt the ring buffer. | ||
1737 | */ | 2378 | */ |
1738 | void ring_buffer_event_discard(struct ring_buffer_event *event) | 2379 | static inline void |
2380 | rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, | ||
2381 | struct ring_buffer_event *event) | ||
1739 | { | 2382 | { |
1740 | rb_event_discard(event); | 2383 | unsigned long addr = (unsigned long)event; |
2384 | struct buffer_page *bpage = cpu_buffer->commit_page; | ||
2385 | struct buffer_page *start; | ||
2386 | |||
2387 | addr &= PAGE_MASK; | ||
2388 | |||
2389 | /* Do the likely case first */ | ||
2390 | if (likely(bpage->page == (void *)addr)) { | ||
2391 | local_dec(&bpage->entries); | ||
2392 | return; | ||
2393 | } | ||
2394 | |||
2395 | /* | ||
2396 | * Because the commit page may be on the reader page we | ||
2397 | * start with the next page and check the end loop there. | ||
2398 | */ | ||
2399 | rb_inc_page(cpu_buffer, &bpage); | ||
2400 | start = bpage; | ||
2401 | do { | ||
2402 | if (bpage->page == (void *)addr) { | ||
2403 | local_dec(&bpage->entries); | ||
2404 | return; | ||
2405 | } | ||
2406 | rb_inc_page(cpu_buffer, &bpage); | ||
2407 | } while (bpage != start); | ||
2408 | |||
2409 | /* commit not part of this buffer?? */ | ||
2410 | RB_WARN_ON(cpu_buffer, 1); | ||
1741 | } | 2411 | } |
1742 | EXPORT_SYMBOL_GPL(ring_buffer_event_discard); | ||
1743 | 2412 | ||
1744 | /** | 2413 | /** |
1745 | * ring_buffer_commit_discard - discard an event that has not been committed | 2414 | * ring_buffer_commit_discard - discard an event that has not been committed |
1746 | * @buffer: the ring buffer | 2415 | * @buffer: the ring buffer |
1747 | * @event: non committed event to discard | 2416 | * @event: non committed event to discard |
1748 | * | 2417 | * |
1749 | * This is similar to ring_buffer_event_discard but must only be | 2418 | * Sometimes an event that is in the ring buffer needs to be ignored. |
1750 | * performed on an event that has not been committed yet. The difference | 2419 | * This function lets the user discard an event in the ring buffer |
1751 | * is that this will also try to free the event from the ring buffer | 2420 | * and then that event will not be read later. |
2421 | * | ||
2422 | * This function only works if it is called before the the item has been | ||
2423 | * committed. It will try to free the event from the ring buffer | ||
1752 | * if another event has not been added behind it. | 2424 | * if another event has not been added behind it. |
1753 | * | 2425 | * |
1754 | * If another event has been added behind it, it will set the event | 2426 | * If another event has been added behind it, it will set the event |
@@ -1776,26 +2448,21 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1776 | */ | 2448 | */ |
1777 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); | 2449 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); |
1778 | 2450 | ||
1779 | if (!rb_try_to_discard(cpu_buffer, event)) | 2451 | rb_decrement_entry(cpu_buffer, event); |
2452 | if (rb_try_to_discard(cpu_buffer, event)) | ||
1780 | goto out; | 2453 | goto out; |
1781 | 2454 | ||
1782 | /* | 2455 | /* |
1783 | * The commit is still visible by the reader, so we | 2456 | * The commit is still visible by the reader, so we |
1784 | * must increment entries. | 2457 | * must still update the timestamp. |
1785 | */ | 2458 | */ |
1786 | local_inc(&cpu_buffer->entries); | 2459 | rb_update_write_stamp(cpu_buffer, event); |
1787 | out: | 2460 | out: |
1788 | rb_end_commit(cpu_buffer); | 2461 | rb_end_commit(cpu_buffer); |
1789 | 2462 | ||
1790 | trace_recursive_unlock(); | 2463 | trace_recursive_unlock(); |
1791 | 2464 | ||
1792 | /* | 2465 | preempt_enable_notrace(); |
1793 | * Only the last preempt count needs to restore preemption. | ||
1794 | */ | ||
1795 | if (preempt_count() == 1) | ||
1796 | ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); | ||
1797 | else | ||
1798 | preempt_enable_no_resched_notrace(); | ||
1799 | 2466 | ||
1800 | } | 2467 | } |
1801 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); | 2468 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); |
@@ -1821,15 +2488,15 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1821 | struct ring_buffer_event *event; | 2488 | struct ring_buffer_event *event; |
1822 | void *body; | 2489 | void *body; |
1823 | int ret = -EBUSY; | 2490 | int ret = -EBUSY; |
1824 | int cpu, resched; | 2491 | int cpu; |
1825 | 2492 | ||
1826 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2493 | if (ring_buffer_flags != RB_BUFFERS_ON) |
1827 | return -EBUSY; | 2494 | return -EBUSY; |
1828 | 2495 | ||
1829 | if (atomic_read(&buffer->record_disabled)) | 2496 | preempt_disable_notrace(); |
1830 | return -EBUSY; | ||
1831 | 2497 | ||
1832 | resched = ftrace_preempt_disable(); | 2498 | if (atomic_read(&buffer->record_disabled)) |
2499 | goto out; | ||
1833 | 2500 | ||
1834 | cpu = raw_smp_processor_id(); | 2501 | cpu = raw_smp_processor_id(); |
1835 | 2502 | ||
@@ -1844,7 +2511,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1844 | if (length > BUF_MAX_DATA_SIZE) | 2511 | if (length > BUF_MAX_DATA_SIZE) |
1845 | goto out; | 2512 | goto out; |
1846 | 2513 | ||
1847 | event = rb_reserve_next_event(cpu_buffer, length); | 2514 | event = rb_reserve_next_event(buffer, cpu_buffer, length); |
1848 | if (!event) | 2515 | if (!event) |
1849 | goto out; | 2516 | goto out; |
1850 | 2517 | ||
@@ -1856,7 +2523,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1856 | 2523 | ||
1857 | ret = 0; | 2524 | ret = 0; |
1858 | out: | 2525 | out: |
1859 | ftrace_preempt_enable(resched); | 2526 | preempt_enable_notrace(); |
1860 | 2527 | ||
1861 | return ret; | 2528 | return ret; |
1862 | } | 2529 | } |
@@ -1865,9 +2532,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write); | |||
1865 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 2532 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) |
1866 | { | 2533 | { |
1867 | struct buffer_page *reader = cpu_buffer->reader_page; | 2534 | struct buffer_page *reader = cpu_buffer->reader_page; |
1868 | struct buffer_page *head = cpu_buffer->head_page; | 2535 | struct buffer_page *head = rb_set_head_page(cpu_buffer); |
1869 | struct buffer_page *commit = cpu_buffer->commit_page; | 2536 | struct buffer_page *commit = cpu_buffer->commit_page; |
1870 | 2537 | ||
2538 | /* In case of error, head will be NULL */ | ||
2539 | if (unlikely(!head)) | ||
2540 | return 1; | ||
2541 | |||
1871 | return reader->read == rb_page_commit(reader) && | 2542 | return reader->read == rb_page_commit(reader) && |
1872 | (commit == reader || | 2543 | (commit == reader || |
1873 | (commit == head && | 2544 | (commit == head && |
@@ -1894,7 +2565,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable); | |||
1894 | * @buffer: The ring buffer to enable writes | 2565 | * @buffer: The ring buffer to enable writes |
1895 | * | 2566 | * |
1896 | * Note, multiple disables will need the same number of enables | 2567 | * Note, multiple disables will need the same number of enables |
1897 | * to truely enable the writing (much like preempt_disable). | 2568 | * to truly enable the writing (much like preempt_disable). |
1898 | */ | 2569 | */ |
1899 | void ring_buffer_record_enable(struct ring_buffer *buffer) | 2570 | void ring_buffer_record_enable(struct ring_buffer *buffer) |
1900 | { | 2571 | { |
@@ -1930,7 +2601,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); | |||
1930 | * @cpu: The CPU to enable. | 2601 | * @cpu: The CPU to enable. |
1931 | * | 2602 | * |
1932 | * Note, multiple disables will need the same number of enables | 2603 | * Note, multiple disables will need the same number of enables |
1933 | * to truely enable the writing (much like preempt_disable). | 2604 | * to truly enable the writing (much like preempt_disable). |
1934 | */ | 2605 | */ |
1935 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | 2606 | void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) |
1936 | { | 2607 | { |
@@ -1944,6 +2615,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | |||
1944 | } | 2615 | } |
1945 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | 2616 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); |
1946 | 2617 | ||
2618 | /* | ||
2619 | * The total entries in the ring buffer is the running counter | ||
2620 | * of entries entered into the ring buffer, minus the sum of | ||
2621 | * the entries read from the ring buffer and the number of | ||
2622 | * entries that were overwritten. | ||
2623 | */ | ||
2624 | static inline unsigned long | ||
2625 | rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) | ||
2626 | { | ||
2627 | return local_read(&cpu_buffer->entries) - | ||
2628 | (local_read(&cpu_buffer->overrun) + cpu_buffer->read); | ||
2629 | } | ||
2630 | |||
1947 | /** | 2631 | /** |
1948 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer | 2632 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer |
1949 | * @buffer: The ring buffer | 2633 | * @buffer: The ring buffer |
@@ -1952,16 +2636,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | |||
1952 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 2636 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
1953 | { | 2637 | { |
1954 | struct ring_buffer_per_cpu *cpu_buffer; | 2638 | struct ring_buffer_per_cpu *cpu_buffer; |
1955 | unsigned long ret; | ||
1956 | 2639 | ||
1957 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2640 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
1958 | return 0; | 2641 | return 0; |
1959 | 2642 | ||
1960 | cpu_buffer = buffer->buffers[cpu]; | 2643 | cpu_buffer = buffer->buffers[cpu]; |
1961 | ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) | ||
1962 | - cpu_buffer->read; | ||
1963 | 2644 | ||
1964 | return ret; | 2645 | return rb_num_of_entries(cpu_buffer); |
1965 | } | 2646 | } |
1966 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | 2647 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); |
1967 | 2648 | ||
@@ -1979,33 +2660,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | |||
1979 | return 0; | 2660 | return 0; |
1980 | 2661 | ||
1981 | cpu_buffer = buffer->buffers[cpu]; | 2662 | cpu_buffer = buffer->buffers[cpu]; |
1982 | ret = cpu_buffer->overrun; | 2663 | ret = local_read(&cpu_buffer->overrun); |
1983 | 2664 | ||
1984 | return ret; | 2665 | return ret; |
1985 | } | 2666 | } |
1986 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 2667 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); |
1987 | 2668 | ||
1988 | /** | 2669 | /** |
1989 | * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped | ||
1990 | * @buffer: The ring buffer | ||
1991 | * @cpu: The per CPU buffer to get the number of overruns from | ||
1992 | */ | ||
1993 | unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu) | ||
1994 | { | ||
1995 | struct ring_buffer_per_cpu *cpu_buffer; | ||
1996 | unsigned long ret; | ||
1997 | |||
1998 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
1999 | return 0; | ||
2000 | |||
2001 | cpu_buffer = buffer->buffers[cpu]; | ||
2002 | ret = cpu_buffer->nmi_dropped; | ||
2003 | |||
2004 | return ret; | ||
2005 | } | ||
2006 | EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu); | ||
2007 | |||
2008 | /** | ||
2009 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits | 2670 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits |
2010 | * @buffer: The ring buffer | 2671 | * @buffer: The ring buffer |
2011 | * @cpu: The per CPU buffer to get the number of overruns from | 2672 | * @cpu: The per CPU buffer to get the number of overruns from |
@@ -2020,7 +2681,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) | |||
2020 | return 0; | 2681 | return 0; |
2021 | 2682 | ||
2022 | cpu_buffer = buffer->buffers[cpu]; | 2683 | cpu_buffer = buffer->buffers[cpu]; |
2023 | ret = cpu_buffer->commit_overrun; | 2684 | ret = local_read(&cpu_buffer->commit_overrun); |
2024 | 2685 | ||
2025 | return ret; | 2686 | return ret; |
2026 | } | 2687 | } |
@@ -2042,8 +2703,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
2042 | /* if you care about this being correct, lock the buffer */ | 2703 | /* if you care about this being correct, lock the buffer */ |
2043 | for_each_buffer_cpu(buffer, cpu) { | 2704 | for_each_buffer_cpu(buffer, cpu) { |
2044 | cpu_buffer = buffer->buffers[cpu]; | 2705 | cpu_buffer = buffer->buffers[cpu]; |
2045 | entries += (local_read(&cpu_buffer->entries) - | 2706 | entries += rb_num_of_entries(cpu_buffer); |
2046 | cpu_buffer->overrun) - cpu_buffer->read; | ||
2047 | } | 2707 | } |
2048 | 2708 | ||
2049 | return entries; | 2709 | return entries; |
@@ -2051,7 +2711,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
2051 | EXPORT_SYMBOL_GPL(ring_buffer_entries); | 2711 | EXPORT_SYMBOL_GPL(ring_buffer_entries); |
2052 | 2712 | ||
2053 | /** | 2713 | /** |
2054 | * ring_buffer_overrun_cpu - get the number of overruns in buffer | 2714 | * ring_buffer_overruns - get the number of overruns in buffer |
2055 | * @buffer: The ring buffer | 2715 | * @buffer: The ring buffer |
2056 | * | 2716 | * |
2057 | * Returns the total number of overruns in the ring buffer | 2717 | * Returns the total number of overruns in the ring buffer |
@@ -2066,7 +2726,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer) | |||
2066 | /* if you care about this being correct, lock the buffer */ | 2726 | /* if you care about this being correct, lock the buffer */ |
2067 | for_each_buffer_cpu(buffer, cpu) { | 2727 | for_each_buffer_cpu(buffer, cpu) { |
2068 | cpu_buffer = buffer->buffers[cpu]; | 2728 | cpu_buffer = buffer->buffers[cpu]; |
2069 | overruns += cpu_buffer->overrun; | 2729 | overruns += local_read(&cpu_buffer->overrun); |
2070 | } | 2730 | } |
2071 | 2731 | ||
2072 | return overruns; | 2732 | return overruns; |
@@ -2079,8 +2739,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
2079 | 2739 | ||
2080 | /* Iterator usage is expected to have record disabled */ | 2740 | /* Iterator usage is expected to have record disabled */ |
2081 | if (list_empty(&cpu_buffer->reader_page->list)) { | 2741 | if (list_empty(&cpu_buffer->reader_page->list)) { |
2082 | iter->head_page = cpu_buffer->head_page; | 2742 | iter->head_page = rb_set_head_page(cpu_buffer); |
2083 | iter->head = cpu_buffer->head_page->read; | 2743 | if (unlikely(!iter->head_page)) |
2744 | return; | ||
2745 | iter->head = iter->head_page->read; | ||
2084 | } else { | 2746 | } else { |
2085 | iter->head_page = cpu_buffer->reader_page; | 2747 | iter->head_page = cpu_buffer->reader_page; |
2086 | iter->head = cpu_buffer->reader_page->read; | 2748 | iter->head = cpu_buffer->reader_page->read; |
@@ -2089,6 +2751,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
2089 | iter->read_stamp = cpu_buffer->read_stamp; | 2751 | iter->read_stamp = cpu_buffer->read_stamp; |
2090 | else | 2752 | else |
2091 | iter->read_stamp = iter->head_page->page->time_stamp; | 2753 | iter->read_stamp = iter->head_page->page->time_stamp; |
2754 | iter->cache_reader_page = cpu_buffer->reader_page; | ||
2755 | iter->cache_read = cpu_buffer->read; | ||
2092 | } | 2756 | } |
2093 | 2757 | ||
2094 | /** | 2758 | /** |
@@ -2195,11 +2859,13 @@ static struct buffer_page * | |||
2195 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 2859 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
2196 | { | 2860 | { |
2197 | struct buffer_page *reader = NULL; | 2861 | struct buffer_page *reader = NULL; |
2862 | unsigned long overwrite; | ||
2198 | unsigned long flags; | 2863 | unsigned long flags; |
2199 | int nr_loops = 0; | 2864 | int nr_loops = 0; |
2865 | int ret; | ||
2200 | 2866 | ||
2201 | local_irq_save(flags); | 2867 | local_irq_save(flags); |
2202 | __raw_spin_lock(&cpu_buffer->lock); | 2868 | arch_spin_lock(&cpu_buffer->lock); |
2203 | 2869 | ||
2204 | again: | 2870 | again: |
2205 | /* | 2871 | /* |
@@ -2230,39 +2896,83 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2230 | goto out; | 2896 | goto out; |
2231 | 2897 | ||
2232 | /* | 2898 | /* |
2233 | * Splice the empty reader page into the list around the head. | ||
2234 | * Reset the reader page to size zero. | 2899 | * Reset the reader page to size zero. |
2235 | */ | 2900 | */ |
2236 | |||
2237 | reader = cpu_buffer->head_page; | ||
2238 | cpu_buffer->reader_page->list.next = reader->list.next; | ||
2239 | cpu_buffer->reader_page->list.prev = reader->list.prev; | ||
2240 | |||
2241 | local_set(&cpu_buffer->reader_page->write, 0); | 2901 | local_set(&cpu_buffer->reader_page->write, 0); |
2242 | local_set(&cpu_buffer->reader_page->entries, 0); | 2902 | local_set(&cpu_buffer->reader_page->entries, 0); |
2243 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2903 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
2904 | cpu_buffer->reader_page->real_end = 0; | ||
2905 | |||
2906 | spin: | ||
2907 | /* | ||
2908 | * Splice the empty reader page into the list around the head. | ||
2909 | */ | ||
2910 | reader = rb_set_head_page(cpu_buffer); | ||
2911 | cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); | ||
2912 | cpu_buffer->reader_page->list.prev = reader->list.prev; | ||
2913 | |||
2914 | /* | ||
2915 | * cpu_buffer->pages just needs to point to the buffer, it | ||
2916 | * has no specific buffer page to point to. Lets move it out | ||
2917 | * of our way so we don't accidently swap it. | ||
2918 | */ | ||
2919 | cpu_buffer->pages = reader->list.prev; | ||
2244 | 2920 | ||
2245 | /* Make the reader page now replace the head */ | 2921 | /* The reader page will be pointing to the new head */ |
2246 | reader->list.prev->next = &cpu_buffer->reader_page->list; | 2922 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); |
2247 | reader->list.next->prev = &cpu_buffer->reader_page->list; | ||
2248 | 2923 | ||
2249 | /* | 2924 | /* |
2250 | * If the tail is on the reader, then we must set the head | 2925 | * We want to make sure we read the overruns after we set up our |
2251 | * to the inserted page, otherwise we set it one before. | 2926 | * pointers to the next object. The writer side does a |
2927 | * cmpxchg to cross pages which acts as the mb on the writer | ||
2928 | * side. Note, the reader will constantly fail the swap | ||
2929 | * while the writer is updating the pointers, so this | ||
2930 | * guarantees that the overwrite recorded here is the one we | ||
2931 | * want to compare with the last_overrun. | ||
2252 | */ | 2932 | */ |
2253 | cpu_buffer->head_page = cpu_buffer->reader_page; | 2933 | smp_mb(); |
2934 | overwrite = local_read(&(cpu_buffer->overrun)); | ||
2254 | 2935 | ||
2255 | if (cpu_buffer->commit_page != reader) | 2936 | /* |
2256 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); | 2937 | * Here's the tricky part. |
2938 | * | ||
2939 | * We need to move the pointer past the header page. | ||
2940 | * But we can only do that if a writer is not currently | ||
2941 | * moving it. The page before the header page has the | ||
2942 | * flag bit '1' set if it is pointing to the page we want. | ||
2943 | * but if the writer is in the process of moving it | ||
2944 | * than it will be '2' or already moved '0'. | ||
2945 | */ | ||
2946 | |||
2947 | ret = rb_head_page_replace(reader, cpu_buffer->reader_page); | ||
2948 | |||
2949 | /* | ||
2950 | * If we did not convert it, then we must try again. | ||
2951 | */ | ||
2952 | if (!ret) | ||
2953 | goto spin; | ||
2954 | |||
2955 | /* | ||
2956 | * Yeah! We succeeded in replacing the page. | ||
2957 | * | ||
2958 | * Now make the new head point back to the reader page. | ||
2959 | */ | ||
2960 | rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; | ||
2961 | rb_inc_page(cpu_buffer, &cpu_buffer->head_page); | ||
2257 | 2962 | ||
2258 | /* Finally update the reader page to the new head */ | 2963 | /* Finally update the reader page to the new head */ |
2259 | cpu_buffer->reader_page = reader; | 2964 | cpu_buffer->reader_page = reader; |
2260 | rb_reset_reader_page(cpu_buffer); | 2965 | rb_reset_reader_page(cpu_buffer); |
2261 | 2966 | ||
2967 | if (overwrite != cpu_buffer->last_overrun) { | ||
2968 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; | ||
2969 | cpu_buffer->last_overrun = overwrite; | ||
2970 | } | ||
2971 | |||
2262 | goto again; | 2972 | goto again; |
2263 | 2973 | ||
2264 | out: | 2974 | out: |
2265 | __raw_spin_unlock(&cpu_buffer->lock); | 2975 | arch_spin_unlock(&cpu_buffer->lock); |
2266 | local_irq_restore(flags); | 2976 | local_irq_restore(flags); |
2267 | 2977 | ||
2268 | return reader; | 2978 | return reader; |
@@ -2282,8 +2992,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
2282 | 2992 | ||
2283 | event = rb_reader_event(cpu_buffer); | 2993 | event = rb_reader_event(cpu_buffer); |
2284 | 2994 | ||
2285 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 2995 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
2286 | || rb_discarded_event(event)) | ||
2287 | cpu_buffer->read++; | 2996 | cpu_buffer->read++; |
2288 | 2997 | ||
2289 | rb_update_read_stamp(cpu_buffer, event); | 2998 | rb_update_read_stamp(cpu_buffer, event); |
@@ -2294,13 +3003,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
2294 | 3003 | ||
2295 | static void rb_advance_iter(struct ring_buffer_iter *iter) | 3004 | static void rb_advance_iter(struct ring_buffer_iter *iter) |
2296 | { | 3005 | { |
2297 | struct ring_buffer *buffer; | ||
2298 | struct ring_buffer_per_cpu *cpu_buffer; | 3006 | struct ring_buffer_per_cpu *cpu_buffer; |
2299 | struct ring_buffer_event *event; | 3007 | struct ring_buffer_event *event; |
2300 | unsigned length; | 3008 | unsigned length; |
2301 | 3009 | ||
2302 | cpu_buffer = iter->cpu_buffer; | 3010 | cpu_buffer = iter->cpu_buffer; |
2303 | buffer = cpu_buffer->buffer; | ||
2304 | 3011 | ||
2305 | /* | 3012 | /* |
2306 | * Check if we are at the end of the buffer. | 3013 | * Check if we are at the end of the buffer. |
@@ -2336,24 +3043,27 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
2336 | rb_advance_iter(iter); | 3043 | rb_advance_iter(iter); |
2337 | } | 3044 | } |
2338 | 3045 | ||
3046 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) | ||
3047 | { | ||
3048 | return cpu_buffer->lost_events; | ||
3049 | } | ||
3050 | |||
2339 | static struct ring_buffer_event * | 3051 | static struct ring_buffer_event * |
2340 | rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 3052 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, |
3053 | unsigned long *lost_events) | ||
2341 | { | 3054 | { |
2342 | struct ring_buffer_per_cpu *cpu_buffer; | ||
2343 | struct ring_buffer_event *event; | 3055 | struct ring_buffer_event *event; |
2344 | struct buffer_page *reader; | 3056 | struct buffer_page *reader; |
2345 | int nr_loops = 0; | 3057 | int nr_loops = 0; |
2346 | 3058 | ||
2347 | cpu_buffer = buffer->buffers[cpu]; | ||
2348 | |||
2349 | again: | 3059 | again: |
2350 | /* | 3060 | /* |
2351 | * We repeat when a timestamp is encountered. It is possible | 3061 | * We repeat when a time extend is encountered. |
2352 | * to get multiple timestamps from an interrupt entering just | 3062 | * Since the time extend is always attached to a data event, |
2353 | * as one timestamp is about to be written, or from discarded | 3063 | * we should never loop more than once. |
2354 | * commits. The most that we can have is the number on a single page. | 3064 | * (We never hit the following condition more than twice). |
2355 | */ | 3065 | */ |
2356 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3066 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
2357 | return NULL; | 3067 | return NULL; |
2358 | 3068 | ||
2359 | reader = rb_get_reader_page(cpu_buffer); | 3069 | reader = rb_get_reader_page(cpu_buffer); |
@@ -2374,7 +3084,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2374 | * the box. Return the padding, and we will release | 3084 | * the box. Return the padding, and we will release |
2375 | * the current locks, and try again. | 3085 | * the current locks, and try again. |
2376 | */ | 3086 | */ |
2377 | rb_advance_reader(cpu_buffer); | ||
2378 | return event; | 3087 | return event; |
2379 | 3088 | ||
2380 | case RINGBUF_TYPE_TIME_EXTEND: | 3089 | case RINGBUF_TYPE_TIME_EXTEND: |
@@ -2390,9 +3099,11 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2390 | case RINGBUF_TYPE_DATA: | 3099 | case RINGBUF_TYPE_DATA: |
2391 | if (ts) { | 3100 | if (ts) { |
2392 | *ts = cpu_buffer->read_stamp + event->time_delta; | 3101 | *ts = cpu_buffer->read_stamp + event->time_delta; |
2393 | ring_buffer_normalize_time_stamp(buffer, | 3102 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, |
2394 | cpu_buffer->cpu, ts); | 3103 | cpu_buffer->cpu, ts); |
2395 | } | 3104 | } |
3105 | if (lost_events) | ||
3106 | *lost_events = rb_lost_events(cpu_buffer); | ||
2396 | return event; | 3107 | return event; |
2397 | 3108 | ||
2398 | default: | 3109 | default: |
@@ -2411,27 +3122,39 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2411 | struct ring_buffer_event *event; | 3122 | struct ring_buffer_event *event; |
2412 | int nr_loops = 0; | 3123 | int nr_loops = 0; |
2413 | 3124 | ||
2414 | if (ring_buffer_iter_empty(iter)) | ||
2415 | return NULL; | ||
2416 | |||
2417 | cpu_buffer = iter->cpu_buffer; | 3125 | cpu_buffer = iter->cpu_buffer; |
2418 | buffer = cpu_buffer->buffer; | 3126 | buffer = cpu_buffer->buffer; |
2419 | 3127 | ||
3128 | /* | ||
3129 | * Check if someone performed a consuming read to | ||
3130 | * the buffer. A consuming read invalidates the iterator | ||
3131 | * and we need to reset the iterator in this case. | ||
3132 | */ | ||
3133 | if (unlikely(iter->cache_read != cpu_buffer->read || | ||
3134 | iter->cache_reader_page != cpu_buffer->reader_page)) | ||
3135 | rb_iter_reset(iter); | ||
3136 | |||
2420 | again: | 3137 | again: |
3138 | if (ring_buffer_iter_empty(iter)) | ||
3139 | return NULL; | ||
3140 | |||
2421 | /* | 3141 | /* |
2422 | * We repeat when a timestamp is encountered. | 3142 | * We repeat when a time extend is encountered. |
2423 | * We can get multiple timestamps by nested interrupts or also | 3143 | * Since the time extend is always attached to a data event, |
2424 | * if filtering is on (discarding commits). Since discarding | 3144 | * we should never loop more than once. |
2425 | * commits can be frequent we can get a lot of timestamps. | 3145 | * (We never hit the following condition more than twice). |
2426 | * But we limit them by not adding timestamps if they begin | ||
2427 | * at the start of a page. | ||
2428 | */ | 3146 | */ |
2429 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3147 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
2430 | return NULL; | 3148 | return NULL; |
2431 | 3149 | ||
2432 | if (rb_per_cpu_empty(cpu_buffer)) | 3150 | if (rb_per_cpu_empty(cpu_buffer)) |
2433 | return NULL; | 3151 | return NULL; |
2434 | 3152 | ||
3153 | if (iter->head >= local_read(&iter->head_page->page->commit)) { | ||
3154 | rb_inc_iter(iter); | ||
3155 | goto again; | ||
3156 | } | ||
3157 | |||
2435 | event = rb_iter_head_event(iter); | 3158 | event = rb_iter_head_event(iter); |
2436 | 3159 | ||
2437 | switch (event->type_len) { | 3160 | switch (event->type_len) { |
@@ -2477,7 +3200,7 @@ static inline int rb_ok_to_lock(void) | |||
2477 | * buffer too. A one time deal is all you get from reading | 3200 | * buffer too. A one time deal is all you get from reading |
2478 | * the ring buffer from an NMI. | 3201 | * the ring buffer from an NMI. |
2479 | */ | 3202 | */ |
2480 | if (likely(!in_nmi() && !oops_in_progress)) | 3203 | if (likely(!in_nmi())) |
2481 | return 1; | 3204 | return 1; |
2482 | 3205 | ||
2483 | tracing_off_permanent(); | 3206 | tracing_off_permanent(); |
@@ -2489,12 +3212,14 @@ static inline int rb_ok_to_lock(void) | |||
2489 | * @buffer: The ring buffer to read | 3212 | * @buffer: The ring buffer to read |
2490 | * @cpu: The cpu to peak at | 3213 | * @cpu: The cpu to peak at |
2491 | * @ts: The timestamp counter of this event. | 3214 | * @ts: The timestamp counter of this event. |
3215 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
2492 | * | 3216 | * |
2493 | * This will return the event that will be read next, but does | 3217 | * This will return the event that will be read next, but does |
2494 | * not consume the data. | 3218 | * not consume the data. |
2495 | */ | 3219 | */ |
2496 | struct ring_buffer_event * | 3220 | struct ring_buffer_event * |
2497 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 3221 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
3222 | unsigned long *lost_events) | ||
2498 | { | 3223 | { |
2499 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3224 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
2500 | struct ring_buffer_event *event; | 3225 | struct ring_buffer_event *event; |
@@ -2509,15 +3234,15 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2509 | local_irq_save(flags); | 3234 | local_irq_save(flags); |
2510 | if (dolock) | 3235 | if (dolock) |
2511 | spin_lock(&cpu_buffer->reader_lock); | 3236 | spin_lock(&cpu_buffer->reader_lock); |
2512 | event = rb_buffer_peek(buffer, cpu, ts); | 3237 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3238 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | ||
3239 | rb_advance_reader(cpu_buffer); | ||
2513 | if (dolock) | 3240 | if (dolock) |
2514 | spin_unlock(&cpu_buffer->reader_lock); | 3241 | spin_unlock(&cpu_buffer->reader_lock); |
2515 | local_irq_restore(flags); | 3242 | local_irq_restore(flags); |
2516 | 3243 | ||
2517 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3244 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
2518 | cpu_relax(); | ||
2519 | goto again; | 3245 | goto again; |
2520 | } | ||
2521 | 3246 | ||
2522 | return event; | 3247 | return event; |
2523 | } | 3248 | } |
@@ -2542,10 +3267,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2542 | event = rb_iter_peek(iter, ts); | 3267 | event = rb_iter_peek(iter, ts); |
2543 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3268 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2544 | 3269 | ||
2545 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3270 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
2546 | cpu_relax(); | ||
2547 | goto again; | 3271 | goto again; |
2548 | } | ||
2549 | 3272 | ||
2550 | return event; | 3273 | return event; |
2551 | } | 3274 | } |
@@ -2553,13 +3276,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2553 | /** | 3276 | /** |
2554 | * ring_buffer_consume - return an event and consume it | 3277 | * ring_buffer_consume - return an event and consume it |
2555 | * @buffer: The ring buffer to get the next event from | 3278 | * @buffer: The ring buffer to get the next event from |
3279 | * @cpu: the cpu to read the buffer from | ||
3280 | * @ts: a variable to store the timestamp (may be NULL) | ||
3281 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
2556 | * | 3282 | * |
2557 | * Returns the next event in the ring buffer, and that event is consumed. | 3283 | * Returns the next event in the ring buffer, and that event is consumed. |
2558 | * Meaning, that sequential reads will keep returning a different event, | 3284 | * Meaning, that sequential reads will keep returning a different event, |
2559 | * and eventually empty the ring buffer if the producer is slower. | 3285 | * and eventually empty the ring buffer if the producer is slower. |
2560 | */ | 3286 | */ |
2561 | struct ring_buffer_event * | 3287 | struct ring_buffer_event * |
2562 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 3288 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
3289 | unsigned long *lost_events) | ||
2563 | { | 3290 | { |
2564 | struct ring_buffer_per_cpu *cpu_buffer; | 3291 | struct ring_buffer_per_cpu *cpu_buffer; |
2565 | struct ring_buffer_event *event = NULL; | 3292 | struct ring_buffer_event *event = NULL; |
@@ -2580,13 +3307,12 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2580 | if (dolock) | 3307 | if (dolock) |
2581 | spin_lock(&cpu_buffer->reader_lock); | 3308 | spin_lock(&cpu_buffer->reader_lock); |
2582 | 3309 | ||
2583 | event = rb_buffer_peek(buffer, cpu, ts); | 3310 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
2584 | if (!event) | 3311 | if (event) { |
2585 | goto out_unlock; | 3312 | cpu_buffer->lost_events = 0; |
2586 | 3313 | rb_advance_reader(cpu_buffer); | |
2587 | rb_advance_reader(cpu_buffer); | 3314 | } |
2588 | 3315 | ||
2589 | out_unlock: | ||
2590 | if (dolock) | 3316 | if (dolock) |
2591 | spin_unlock(&cpu_buffer->reader_lock); | 3317 | spin_unlock(&cpu_buffer->reader_lock); |
2592 | local_irq_restore(flags); | 3318 | local_irq_restore(flags); |
@@ -2594,33 +3320,38 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2594 | out: | 3320 | out: |
2595 | preempt_enable(); | 3321 | preempt_enable(); |
2596 | 3322 | ||
2597 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 3323 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
2598 | cpu_relax(); | ||
2599 | goto again; | 3324 | goto again; |
2600 | } | ||
2601 | 3325 | ||
2602 | return event; | 3326 | return event; |
2603 | } | 3327 | } |
2604 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 3328 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
2605 | 3329 | ||
2606 | /** | 3330 | /** |
2607 | * ring_buffer_read_start - start a non consuming read of the buffer | 3331 | * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer |
2608 | * @buffer: The ring buffer to read from | 3332 | * @buffer: The ring buffer to read from |
2609 | * @cpu: The cpu buffer to iterate over | 3333 | * @cpu: The cpu buffer to iterate over |
2610 | * | 3334 | * |
2611 | * This starts up an iteration through the buffer. It also disables | 3335 | * This performs the initial preparations necessary to iterate |
2612 | * the recording to the buffer until the reading is finished. | 3336 | * through the buffer. Memory is allocated, buffer recording |
2613 | * This prevents the reading from being corrupted. This is not | 3337 | * is disabled, and the iterator pointer is returned to the caller. |
2614 | * a consuming read, so a producer is not expected. | ||
2615 | * | 3338 | * |
2616 | * Must be paired with ring_buffer_finish. | 3339 | * Disabling buffer recordng prevents the reading from being |
3340 | * corrupted. This is not a consuming read, so a producer is not | ||
3341 | * expected. | ||
3342 | * | ||
3343 | * After a sequence of ring_buffer_read_prepare calls, the user is | ||
3344 | * expected to make at least one call to ring_buffer_prepare_sync. | ||
3345 | * Afterwards, ring_buffer_read_start is invoked to get things going | ||
3346 | * for real. | ||
3347 | * | ||
3348 | * This overall must be paired with ring_buffer_finish. | ||
2617 | */ | 3349 | */ |
2618 | struct ring_buffer_iter * | 3350 | struct ring_buffer_iter * |
2619 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | 3351 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) |
2620 | { | 3352 | { |
2621 | struct ring_buffer_per_cpu *cpu_buffer; | 3353 | struct ring_buffer_per_cpu *cpu_buffer; |
2622 | struct ring_buffer_iter *iter; | 3354 | struct ring_buffer_iter *iter; |
2623 | unsigned long flags; | ||
2624 | 3355 | ||
2625 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3356 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2626 | return NULL; | 3357 | return NULL; |
@@ -2634,15 +3365,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | |||
2634 | iter->cpu_buffer = cpu_buffer; | 3365 | iter->cpu_buffer = cpu_buffer; |
2635 | 3366 | ||
2636 | atomic_inc(&cpu_buffer->record_disabled); | 3367 | atomic_inc(&cpu_buffer->record_disabled); |
3368 | |||
3369 | return iter; | ||
3370 | } | ||
3371 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); | ||
3372 | |||
3373 | /** | ||
3374 | * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls | ||
3375 | * | ||
3376 | * All previously invoked ring_buffer_read_prepare calls to prepare | ||
3377 | * iterators will be synchronized. Afterwards, read_buffer_read_start | ||
3378 | * calls on those iterators are allowed. | ||
3379 | */ | ||
3380 | void | ||
3381 | ring_buffer_read_prepare_sync(void) | ||
3382 | { | ||
2637 | synchronize_sched(); | 3383 | synchronize_sched(); |
3384 | } | ||
3385 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); | ||
3386 | |||
3387 | /** | ||
3388 | * ring_buffer_read_start - start a non consuming read of the buffer | ||
3389 | * @iter: The iterator returned by ring_buffer_read_prepare | ||
3390 | * | ||
3391 | * This finalizes the startup of an iteration through the buffer. | ||
3392 | * The iterator comes from a call to ring_buffer_read_prepare and | ||
3393 | * an intervening ring_buffer_read_prepare_sync must have been | ||
3394 | * performed. | ||
3395 | * | ||
3396 | * Must be paired with ring_buffer_finish. | ||
3397 | */ | ||
3398 | void | ||
3399 | ring_buffer_read_start(struct ring_buffer_iter *iter) | ||
3400 | { | ||
3401 | struct ring_buffer_per_cpu *cpu_buffer; | ||
3402 | unsigned long flags; | ||
3403 | |||
3404 | if (!iter) | ||
3405 | return; | ||
3406 | |||
3407 | cpu_buffer = iter->cpu_buffer; | ||
2638 | 3408 | ||
2639 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3409 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2640 | __raw_spin_lock(&cpu_buffer->lock); | 3410 | arch_spin_lock(&cpu_buffer->lock); |
2641 | rb_iter_reset(iter); | 3411 | rb_iter_reset(iter); |
2642 | __raw_spin_unlock(&cpu_buffer->lock); | 3412 | arch_spin_unlock(&cpu_buffer->lock); |
2643 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3413 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2644 | |||
2645 | return iter; | ||
2646 | } | 3414 | } |
2647 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 3415 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
2648 | 3416 | ||
@@ -2677,21 +3445,19 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
2677 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3445 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
2678 | unsigned long flags; | 3446 | unsigned long flags; |
2679 | 3447 | ||
2680 | again: | ||
2681 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3448 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3449 | again: | ||
2682 | event = rb_iter_peek(iter, ts); | 3450 | event = rb_iter_peek(iter, ts); |
2683 | if (!event) | 3451 | if (!event) |
2684 | goto out; | 3452 | goto out; |
2685 | 3453 | ||
3454 | if (event->type_len == RINGBUF_TYPE_PADDING) | ||
3455 | goto again; | ||
3456 | |||
2686 | rb_advance_iter(iter); | 3457 | rb_advance_iter(iter); |
2687 | out: | 3458 | out: |
2688 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3459 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2689 | 3460 | ||
2690 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | ||
2691 | cpu_relax(); | ||
2692 | goto again; | ||
2693 | } | ||
2694 | |||
2695 | return event; | 3461 | return event; |
2696 | } | 3462 | } |
2697 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 3463 | EXPORT_SYMBOL_GPL(ring_buffer_read); |
@@ -2709,8 +3475,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_size); | |||
2709 | static void | 3475 | static void |
2710 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | 3476 | rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) |
2711 | { | 3477 | { |
3478 | rb_head_page_deactivate(cpu_buffer); | ||
3479 | |||
2712 | cpu_buffer->head_page | 3480 | cpu_buffer->head_page |
2713 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 3481 | = list_entry(cpu_buffer->pages, struct buffer_page, list); |
2714 | local_set(&cpu_buffer->head_page->write, 0); | 3482 | local_set(&cpu_buffer->head_page->write, 0); |
2715 | local_set(&cpu_buffer->head_page->entries, 0); | 3483 | local_set(&cpu_buffer->head_page->entries, 0); |
2716 | local_set(&cpu_buffer->head_page->page->commit, 0); | 3484 | local_set(&cpu_buffer->head_page->page->commit, 0); |
@@ -2726,16 +3494,20 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
2726 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 3494 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
2727 | cpu_buffer->reader_page->read = 0; | 3495 | cpu_buffer->reader_page->read = 0; |
2728 | 3496 | ||
2729 | cpu_buffer->nmi_dropped = 0; | 3497 | local_set(&cpu_buffer->commit_overrun, 0); |
2730 | cpu_buffer->commit_overrun = 0; | 3498 | local_set(&cpu_buffer->overrun, 0); |
2731 | cpu_buffer->overrun = 0; | ||
2732 | cpu_buffer->read = 0; | ||
2733 | local_set(&cpu_buffer->entries, 0); | 3499 | local_set(&cpu_buffer->entries, 0); |
2734 | local_set(&cpu_buffer->committing, 0); | 3500 | local_set(&cpu_buffer->committing, 0); |
2735 | local_set(&cpu_buffer->commits, 0); | 3501 | local_set(&cpu_buffer->commits, 0); |
3502 | cpu_buffer->read = 0; | ||
2736 | 3503 | ||
2737 | cpu_buffer->write_stamp = 0; | 3504 | cpu_buffer->write_stamp = 0; |
2738 | cpu_buffer->read_stamp = 0; | 3505 | cpu_buffer->read_stamp = 0; |
3506 | |||
3507 | cpu_buffer->lost_events = 0; | ||
3508 | cpu_buffer->last_overrun = 0; | ||
3509 | |||
3510 | rb_head_page_activate(cpu_buffer); | ||
2739 | } | 3511 | } |
2740 | 3512 | ||
2741 | /** | 3513 | /** |
@@ -2755,12 +3527,16 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
2755 | 3527 | ||
2756 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3528 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2757 | 3529 | ||
2758 | __raw_spin_lock(&cpu_buffer->lock); | 3530 | if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) |
3531 | goto out; | ||
3532 | |||
3533 | arch_spin_lock(&cpu_buffer->lock); | ||
2759 | 3534 | ||
2760 | rb_reset_cpu(cpu_buffer); | 3535 | rb_reset_cpu(cpu_buffer); |
2761 | 3536 | ||
2762 | __raw_spin_unlock(&cpu_buffer->lock); | 3537 | arch_spin_unlock(&cpu_buffer->lock); |
2763 | 3538 | ||
3539 | out: | ||
2764 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3540 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2765 | 3541 | ||
2766 | atomic_dec(&cpu_buffer->record_disabled); | 3542 | atomic_dec(&cpu_buffer->record_disabled); |
@@ -2843,6 +3619,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | |||
2843 | } | 3619 | } |
2844 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); | 3620 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); |
2845 | 3621 | ||
3622 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | ||
2846 | /** | 3623 | /** |
2847 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers | 3624 | * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers |
2848 | * @buffer_a: One buffer to swap with | 3625 | * @buffer_a: One buffer to swap with |
@@ -2897,20 +3674,28 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
2897 | atomic_inc(&cpu_buffer_a->record_disabled); | 3674 | atomic_inc(&cpu_buffer_a->record_disabled); |
2898 | atomic_inc(&cpu_buffer_b->record_disabled); | 3675 | atomic_inc(&cpu_buffer_b->record_disabled); |
2899 | 3676 | ||
3677 | ret = -EBUSY; | ||
3678 | if (local_read(&cpu_buffer_a->committing)) | ||
3679 | goto out_dec; | ||
3680 | if (local_read(&cpu_buffer_b->committing)) | ||
3681 | goto out_dec; | ||
3682 | |||
2900 | buffer_a->buffers[cpu] = cpu_buffer_b; | 3683 | buffer_a->buffers[cpu] = cpu_buffer_b; |
2901 | buffer_b->buffers[cpu] = cpu_buffer_a; | 3684 | buffer_b->buffers[cpu] = cpu_buffer_a; |
2902 | 3685 | ||
2903 | cpu_buffer_b->buffer = buffer_a; | 3686 | cpu_buffer_b->buffer = buffer_a; |
2904 | cpu_buffer_a->buffer = buffer_b; | 3687 | cpu_buffer_a->buffer = buffer_b; |
2905 | 3688 | ||
3689 | ret = 0; | ||
3690 | |||
3691 | out_dec: | ||
2906 | atomic_dec(&cpu_buffer_a->record_disabled); | 3692 | atomic_dec(&cpu_buffer_a->record_disabled); |
2907 | atomic_dec(&cpu_buffer_b->record_disabled); | 3693 | atomic_dec(&cpu_buffer_b->record_disabled); |
2908 | |||
2909 | ret = 0; | ||
2910 | out: | 3694 | out: |
2911 | return ret; | 3695 | return ret; |
2912 | } | 3696 | } |
2913 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 3697 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); |
3698 | #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ | ||
2914 | 3699 | ||
2915 | /** | 3700 | /** |
2916 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 3701 | * ring_buffer_alloc_read_page - allocate a page to read from buffer |
@@ -2997,6 +3782,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
2997 | struct ring_buffer_event *event; | 3782 | struct ring_buffer_event *event; |
2998 | struct buffer_data_page *bpage; | 3783 | struct buffer_data_page *bpage; |
2999 | struct buffer_page *reader; | 3784 | struct buffer_page *reader; |
3785 | unsigned long missed_events; | ||
3000 | unsigned long flags; | 3786 | unsigned long flags; |
3001 | unsigned int commit; | 3787 | unsigned int commit; |
3002 | unsigned int read; | 3788 | unsigned int read; |
@@ -3033,6 +3819,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3033 | read = reader->read; | 3819 | read = reader->read; |
3034 | commit = rb_page_commit(reader); | 3820 | commit = rb_page_commit(reader); |
3035 | 3821 | ||
3822 | /* Check if any events were dropped */ | ||
3823 | missed_events = cpu_buffer->lost_events; | ||
3824 | |||
3036 | /* | 3825 | /* |
3037 | * If this page has been partially read or | 3826 | * If this page has been partially read or |
3038 | * if len is not big enough to read the rest of the page or | 3827 | * if len is not big enough to read the rest of the page or |
@@ -3053,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3053 | if (len > (commit - read)) | 3842 | if (len > (commit - read)) |
3054 | len = (commit - read); | 3843 | len = (commit - read); |
3055 | 3844 | ||
3056 | size = rb_event_length(event); | 3845 | /* Always keep the time extend and data together */ |
3846 | size = rb_event_ts_length(event); | ||
3057 | 3847 | ||
3058 | if (len < size) | 3848 | if (len < size) |
3059 | goto out_unlock; | 3849 | goto out_unlock; |
@@ -3063,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3063 | 3853 | ||
3064 | /* Need to copy one event at a time */ | 3854 | /* Need to copy one event at a time */ |
3065 | do { | 3855 | do { |
3856 | /* We need the size of one event, because | ||
3857 | * rb_advance_reader only advances by one event, | ||
3858 | * whereas rb_event_ts_length may include the size of | ||
3859 | * one or two events. | ||
3860 | * We have already ensured there's enough space if this | ||
3861 | * is a time extend. */ | ||
3862 | size = rb_event_length(event); | ||
3066 | memcpy(bpage->data + pos, rpage->data + rpos, size); | 3863 | memcpy(bpage->data + pos, rpage->data + rpos, size); |
3067 | 3864 | ||
3068 | len -= size; | 3865 | len -= size; |
@@ -3071,9 +3868,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3071 | rpos = reader->read; | 3868 | rpos = reader->read; |
3072 | pos += size; | 3869 | pos += size; |
3073 | 3870 | ||
3871 | if (rpos >= commit) | ||
3872 | break; | ||
3873 | |||
3074 | event = rb_reader_event(cpu_buffer); | 3874 | event = rb_reader_event(cpu_buffer); |
3075 | size = rb_event_length(event); | 3875 | /* Always keep the time extend and data together */ |
3076 | } while (len > size); | 3876 | size = rb_event_ts_length(event); |
3877 | } while (len >= size); | ||
3077 | 3878 | ||
3078 | /* update bpage */ | 3879 | /* update bpage */ |
3079 | local_set(&bpage->commit, pos); | 3880 | local_set(&bpage->commit, pos); |
@@ -3083,7 +3884,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3083 | read = 0; | 3884 | read = 0; |
3084 | } else { | 3885 | } else { |
3085 | /* update the entry counter */ | 3886 | /* update the entry counter */ |
3086 | cpu_buffer->read += local_read(&reader->entries); | 3887 | cpu_buffer->read += rb_page_entries(reader); |
3087 | 3888 | ||
3088 | /* swap the pages */ | 3889 | /* swap the pages */ |
3089 | rb_init_page(bpage); | 3890 | rb_init_page(bpage); |
@@ -3093,9 +3894,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3093 | local_set(&reader->entries, 0); | 3894 | local_set(&reader->entries, 0); |
3094 | reader->read = 0; | 3895 | reader->read = 0; |
3095 | *data_page = bpage; | 3896 | *data_page = bpage; |
3897 | |||
3898 | /* | ||
3899 | * Use the real_end for the data size, | ||
3900 | * This gives us a chance to store the lost events | ||
3901 | * on the page. | ||
3902 | */ | ||
3903 | if (reader->real_end) | ||
3904 | local_set(&bpage->commit, reader->real_end); | ||
3096 | } | 3905 | } |
3097 | ret = read; | 3906 | ret = read; |
3098 | 3907 | ||
3908 | cpu_buffer->lost_events = 0; | ||
3909 | |||
3910 | commit = local_read(&bpage->commit); | ||
3911 | /* | ||
3912 | * Set a flag in the commit field if we lost events | ||
3913 | */ | ||
3914 | if (missed_events) { | ||
3915 | /* If there is room at the end of the page to save the | ||
3916 | * missed events, then record it there. | ||
3917 | */ | ||
3918 | if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { | ||
3919 | memcpy(&bpage->data[commit], &missed_events, | ||
3920 | sizeof(missed_events)); | ||
3921 | local_add(RB_MISSED_STORED, &bpage->commit); | ||
3922 | commit += sizeof(missed_events); | ||
3923 | } | ||
3924 | local_add(RB_MISSED_EVENTS, &bpage->commit); | ||
3925 | } | ||
3926 | |||
3927 | /* | ||
3928 | * This page may be off to user land. Zero it out here. | ||
3929 | */ | ||
3930 | if (commit < BUF_PAGE_SIZE) | ||
3931 | memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); | ||
3932 | |||
3099 | out_unlock: | 3933 | out_unlock: |
3100 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3934 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3101 | 3935 | ||
@@ -3104,6 +3938,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3104 | } | 3938 | } |
3105 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); | 3939 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); |
3106 | 3940 | ||
3941 | #ifdef CONFIG_TRACING | ||
3107 | static ssize_t | 3942 | static ssize_t |
3108 | rb_simple_read(struct file *filp, char __user *ubuf, | 3943 | rb_simple_read(struct file *filp, char __user *ubuf, |
3109 | size_t cnt, loff_t *ppos) | 3944 | size_t cnt, loff_t *ppos) |
@@ -3155,6 +3990,7 @@ static const struct file_operations rb_simple_fops = { | |||
3155 | .open = tracing_open_generic, | 3990 | .open = tracing_open_generic, |
3156 | .read = rb_simple_read, | 3991 | .read = rb_simple_read, |
3157 | .write = rb_simple_write, | 3992 | .write = rb_simple_write, |
3993 | .llseek = default_llseek, | ||
3158 | }; | 3994 | }; |
3159 | 3995 | ||
3160 | 3996 | ||
@@ -3171,6 +4007,7 @@ static __init int rb_init_debugfs(void) | |||
3171 | } | 4007 | } |
3172 | 4008 | ||
3173 | fs_initcall(rb_init_debugfs); | 4009 | fs_initcall(rb_init_debugfs); |
4010 | #endif | ||
3174 | 4011 | ||
3175 | #ifdef CONFIG_HOTPLUG_CPU | 4012 | #ifdef CONFIG_HOTPLUG_CPU |
3176 | static int rb_cpu_notify(struct notifier_block *self, | 4013 | static int rb_cpu_notify(struct notifier_block *self, |
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 573d3cc762c3..302f8a614635 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/kthread.h> | 8 | #include <linux/kthread.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
11 | #include <asm/local.h> | ||
11 | 12 | ||
12 | struct rb_page { | 13 | struct rb_page { |
13 | u64 ts; | 14 | u64 ts; |
@@ -35,6 +36,28 @@ static int disable_reader; | |||
35 | module_param(disable_reader, uint, 0644); | 36 | module_param(disable_reader, uint, 0644); |
36 | MODULE_PARM_DESC(disable_reader, "only run producer"); | 37 | MODULE_PARM_DESC(disable_reader, "only run producer"); |
37 | 38 | ||
39 | static int write_iteration = 50; | ||
40 | module_param(write_iteration, uint, 0644); | ||
41 | MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); | ||
42 | |||
43 | static int producer_nice = 19; | ||
44 | static int consumer_nice = 19; | ||
45 | |||
46 | static int producer_fifo = -1; | ||
47 | static int consumer_fifo = -1; | ||
48 | |||
49 | module_param(producer_nice, uint, 0644); | ||
50 | MODULE_PARM_DESC(producer_nice, "nice prio for producer"); | ||
51 | |||
52 | module_param(consumer_nice, uint, 0644); | ||
53 | MODULE_PARM_DESC(consumer_nice, "nice prio for consumer"); | ||
54 | |||
55 | module_param(producer_fifo, uint, 0644); | ||
56 | MODULE_PARM_DESC(producer_fifo, "fifo prio for producer"); | ||
57 | |||
58 | module_param(consumer_fifo, uint, 0644); | ||
59 | MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer"); | ||
60 | |||
38 | static int read_events; | 61 | static int read_events; |
39 | 62 | ||
40 | static int kill_test; | 63 | static int kill_test; |
@@ -58,7 +81,7 @@ static enum event_status read_event(int cpu) | |||
58 | int *entry; | 81 | int *entry; |
59 | u64 ts; | 82 | u64 ts; |
60 | 83 | ||
61 | event = ring_buffer_consume(buffer, cpu, &ts); | 84 | event = ring_buffer_consume(buffer, cpu, &ts, NULL); |
62 | if (!event) | 85 | if (!event) |
63 | return EVENT_DROPPED; | 86 | return EVENT_DROPPED; |
64 | 87 | ||
@@ -90,7 +113,8 @@ static enum event_status read_page(int cpu) | |||
90 | ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); | 113 | ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); |
91 | if (ret >= 0) { | 114 | if (ret >= 0) { |
92 | rpage = bpage; | 115 | rpage = bpage; |
93 | commit = local_read(&rpage->commit); | 116 | /* The commit may have missed event flags set, clear them */ |
117 | commit = local_read(&rpage->commit) & 0xfffff; | ||
94 | for (i = 0; i < commit && !kill_test; i += inc) { | 118 | for (i = 0; i < commit && !kill_test; i += inc) { |
95 | 119 | ||
96 | if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { | 120 | if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { |
@@ -208,15 +232,18 @@ static void ring_buffer_producer(void) | |||
208 | do { | 232 | do { |
209 | struct ring_buffer_event *event; | 233 | struct ring_buffer_event *event; |
210 | int *entry; | 234 | int *entry; |
211 | 235 | int i; | |
212 | event = ring_buffer_lock_reserve(buffer, 10); | 236 | |
213 | if (!event) { | 237 | for (i = 0; i < write_iteration; i++) { |
214 | missed++; | 238 | event = ring_buffer_lock_reserve(buffer, 10); |
215 | } else { | 239 | if (!event) { |
216 | hit++; | 240 | missed++; |
217 | entry = ring_buffer_event_data(event); | 241 | } else { |
218 | *entry = smp_processor_id(); | 242 | hit++; |
219 | ring_buffer_unlock_commit(buffer, event); | 243 | entry = ring_buffer_event_data(event); |
244 | *entry = smp_processor_id(); | ||
245 | ring_buffer_unlock_commit(buffer, event); | ||
246 | } | ||
220 | } | 247 | } |
221 | do_gettimeofday(&end_tv); | 248 | do_gettimeofday(&end_tv); |
222 | 249 | ||
@@ -263,6 +290,27 @@ static void ring_buffer_producer(void) | |||
263 | 290 | ||
264 | if (kill_test) | 291 | if (kill_test) |
265 | trace_printk("ERROR!\n"); | 292 | trace_printk("ERROR!\n"); |
293 | |||
294 | if (!disable_reader) { | ||
295 | if (consumer_fifo < 0) | ||
296 | trace_printk("Running Consumer at nice: %d\n", | ||
297 | consumer_nice); | ||
298 | else | ||
299 | trace_printk("Running Consumer at SCHED_FIFO %d\n", | ||
300 | consumer_fifo); | ||
301 | } | ||
302 | if (producer_fifo < 0) | ||
303 | trace_printk("Running Producer at nice: %d\n", | ||
304 | producer_nice); | ||
305 | else | ||
306 | trace_printk("Running Producer at SCHED_FIFO %d\n", | ||
307 | producer_fifo); | ||
308 | |||
309 | /* Let the user know that the test is running at low priority */ | ||
310 | if (producer_fifo < 0 && consumer_fifo < 0 && | ||
311 | producer_nice == 19 && consumer_nice == 19) | ||
312 | trace_printk("WARNING!!! This test is running at lowest priority.\n"); | ||
313 | |||
266 | trace_printk("Time: %lld (usecs)\n", time); | 314 | trace_printk("Time: %lld (usecs)\n", time); |
267 | trace_printk("Overruns: %lld\n", overruns); | 315 | trace_printk("Overruns: %lld\n", overruns); |
268 | if (disable_reader) | 316 | if (disable_reader) |
@@ -392,6 +440,27 @@ static int __init ring_buffer_benchmark_init(void) | |||
392 | if (IS_ERR(producer)) | 440 | if (IS_ERR(producer)) |
393 | goto out_kill; | 441 | goto out_kill; |
394 | 442 | ||
443 | /* | ||
444 | * Run them as low-prio background tasks by default: | ||
445 | */ | ||
446 | if (!disable_reader) { | ||
447 | if (consumer_fifo >= 0) { | ||
448 | struct sched_param param = { | ||
449 | .sched_priority = consumer_fifo | ||
450 | }; | ||
451 | sched_setscheduler(consumer, SCHED_FIFO, ¶m); | ||
452 | } else | ||
453 | set_user_nice(consumer, consumer_nice); | ||
454 | } | ||
455 | |||
456 | if (producer_fifo >= 0) { | ||
457 | struct sched_param param = { | ||
458 | .sched_priority = consumer_fifo | ||
459 | }; | ||
460 | sched_setscheduler(producer, SCHED_FIFO, ¶m); | ||
461 | } else | ||
462 | set_user_nice(producer, producer_nice); | ||
463 | |||
395 | return 0; | 464 | return 0; |
396 | 465 | ||
397 | out_kill: | 466 | out_kill: |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 076fa6f0ee48..dc53ecb80589 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -12,7 +12,7 @@ | |||
12 | * Copyright (C) 2004 William Lee Irwin III | 12 | * Copyright (C) 2004 William Lee Irwin III |
13 | */ | 13 | */ |
14 | #include <linux/ring_buffer.h> | 14 | #include <linux/ring_buffer.h> |
15 | #include <linux/utsrelease.h> | 15 | #include <generated/utsrelease.h> |
16 | #include <linux/stacktrace.h> | 16 | #include <linux/stacktrace.h> |
17 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
18 | #include <linux/kallsyms.h> | 18 | #include <linux/kallsyms.h> |
@@ -31,10 +31,11 @@ | |||
31 | #include <linux/splice.h> | 31 | #include <linux/splice.h> |
32 | #include <linux/kdebug.h> | 32 | #include <linux/kdebug.h> |
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/rwsem.h> | ||
35 | #include <linux/slab.h> | ||
34 | #include <linux/ctype.h> | 36 | #include <linux/ctype.h> |
35 | #include <linux/init.h> | 37 | #include <linux/init.h> |
36 | #include <linux/poll.h> | 38 | #include <linux/poll.h> |
37 | #include <linux/gfp.h> | ||
38 | #include <linux/fs.h> | 39 | #include <linux/fs.h> |
39 | 40 | ||
40 | #include "trace.h" | 41 | #include "trace.h" |
@@ -42,14 +43,11 @@ | |||
42 | 43 | ||
43 | #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) | 44 | #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) |
44 | 45 | ||
45 | unsigned long __read_mostly tracing_max_latency; | ||
46 | unsigned long __read_mostly tracing_thresh; | ||
47 | |||
48 | /* | 46 | /* |
49 | * On boot up, the ring buffer is set to the minimum size, so that | 47 | * On boot up, the ring buffer is set to the minimum size, so that |
50 | * we do not waste memory on systems that are not using tracing. | 48 | * we do not waste memory on systems that are not using tracing. |
51 | */ | 49 | */ |
52 | static int ring_buffer_expanded; | 50 | int ring_buffer_expanded; |
53 | 51 | ||
54 | /* | 52 | /* |
55 | * We need to change this state when a selftest is running. | 53 | * We need to change this state when a selftest is running. |
@@ -63,7 +61,7 @@ static bool __read_mostly tracing_selftest_running; | |||
63 | /* | 61 | /* |
64 | * If a tracer is running, we do not want to run SELFTEST. | 62 | * If a tracer is running, we do not want to run SELFTEST. |
65 | */ | 63 | */ |
66 | static bool __read_mostly tracing_selftest_disabled; | 64 | bool __read_mostly tracing_selftest_disabled; |
67 | 65 | ||
68 | /* For tracers that don't implement custom flags */ | 66 | /* For tracers that don't implement custom flags */ |
69 | static struct tracer_opt dummy_tracer_opt[] = { | 67 | static struct tracer_opt dummy_tracer_opt[] = { |
@@ -88,27 +86,21 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) | |||
88 | */ | 86 | */ |
89 | static int tracing_disabled = 1; | 87 | static int tracing_disabled = 1; |
90 | 88 | ||
91 | static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); | 89 | DEFINE_PER_CPU(int, ftrace_cpu_disabled); |
92 | 90 | ||
93 | static inline void ftrace_disable_cpu(void) | 91 | static inline void ftrace_disable_cpu(void) |
94 | { | 92 | { |
95 | preempt_disable(); | 93 | preempt_disable(); |
96 | local_inc(&__get_cpu_var(ftrace_cpu_disabled)); | 94 | __this_cpu_inc(ftrace_cpu_disabled); |
97 | } | 95 | } |
98 | 96 | ||
99 | static inline void ftrace_enable_cpu(void) | 97 | static inline void ftrace_enable_cpu(void) |
100 | { | 98 | { |
101 | local_dec(&__get_cpu_var(ftrace_cpu_disabled)); | 99 | __this_cpu_dec(ftrace_cpu_disabled); |
102 | preempt_enable(); | 100 | preempt_enable(); |
103 | } | 101 | } |
104 | 102 | ||
105 | static cpumask_var_t __read_mostly tracing_buffer_mask; | 103 | cpumask_var_t __read_mostly tracing_buffer_mask; |
106 | |||
107 | /* Define which cpu buffers are currently read in trace_pipe */ | ||
108 | static cpumask_var_t tracing_reader_cpumask; | ||
109 | |||
110 | #define for_each_tracing_cpu(cpu) \ | ||
111 | for_each_cpu(cpu, tracing_buffer_mask) | ||
112 | 104 | ||
113 | /* | 105 | /* |
114 | * ftrace_dump_on_oops - variable to dump ftrace buffer on oops | 106 | * ftrace_dump_on_oops - variable to dump ftrace buffer on oops |
@@ -121,30 +113,42 @@ static cpumask_var_t tracing_reader_cpumask; | |||
121 | * | 113 | * |
122 | * It is default off, but you can enable it with either specifying | 114 | * It is default off, but you can enable it with either specifying |
123 | * "ftrace_dump_on_oops" in the kernel command line, or setting | 115 | * "ftrace_dump_on_oops" in the kernel command line, or setting |
124 | * /proc/sys/kernel/ftrace_dump_on_oops to true. | 116 | * /proc/sys/kernel/ftrace_dump_on_oops |
117 | * Set 1 if you want to dump buffers of all CPUs | ||
118 | * Set 2 if you want to dump the buffer of the CPU that triggered oops | ||
125 | */ | 119 | */ |
126 | int ftrace_dump_on_oops; | 120 | |
121 | enum ftrace_dump_mode ftrace_dump_on_oops; | ||
127 | 122 | ||
128 | static int tracing_set_tracer(const char *buf); | 123 | static int tracing_set_tracer(const char *buf); |
129 | 124 | ||
130 | #define BOOTUP_TRACER_SIZE 100 | 125 | #define MAX_TRACER_SIZE 100 |
131 | static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; | 126 | static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; |
132 | static char *default_bootup_tracer; | 127 | static char *default_bootup_tracer; |
133 | 128 | ||
134 | static int __init set_ftrace(char *str) | 129 | static int __init set_cmdline_ftrace(char *str) |
135 | { | 130 | { |
136 | strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); | 131 | strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); |
137 | default_bootup_tracer = bootup_tracer_buf; | 132 | default_bootup_tracer = bootup_tracer_buf; |
138 | /* We are using ftrace early, expand it */ | 133 | /* We are using ftrace early, expand it */ |
139 | ring_buffer_expanded = 1; | 134 | ring_buffer_expanded = 1; |
140 | return 1; | 135 | return 1; |
141 | } | 136 | } |
142 | __setup("ftrace=", set_ftrace); | 137 | __setup("ftrace=", set_cmdline_ftrace); |
143 | 138 | ||
144 | static int __init set_ftrace_dump_on_oops(char *str) | 139 | static int __init set_ftrace_dump_on_oops(char *str) |
145 | { | 140 | { |
146 | ftrace_dump_on_oops = 1; | 141 | if (*str++ != '=' || !*str) { |
147 | return 1; | 142 | ftrace_dump_on_oops = DUMP_ALL; |
143 | return 1; | ||
144 | } | ||
145 | |||
146 | if (!strcmp("orig_cpu", str)) { | ||
147 | ftrace_dump_on_oops = DUMP_ORIG; | ||
148 | return 1; | ||
149 | } | ||
150 | |||
151 | return 0; | ||
148 | } | 152 | } |
149 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); | 153 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); |
150 | 154 | ||
@@ -171,10 +175,11 @@ static struct trace_array global_trace; | |||
171 | 175 | ||
172 | static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); | 176 | static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); |
173 | 177 | ||
174 | int filter_current_check_discard(struct ftrace_event_call *call, void *rec, | 178 | int filter_current_check_discard(struct ring_buffer *buffer, |
179 | struct ftrace_event_call *call, void *rec, | ||
175 | struct ring_buffer_event *event) | 180 | struct ring_buffer_event *event) |
176 | { | 181 | { |
177 | return filter_check_discard(call, rec, global_trace.buffer, event); | 182 | return filter_check_discard(call, rec, buffer, event); |
178 | } | 183 | } |
179 | EXPORT_SYMBOL_GPL(filter_current_check_discard); | 184 | EXPORT_SYMBOL_GPL(filter_current_check_discard); |
180 | 185 | ||
@@ -204,7 +209,7 @@ cycle_t ftrace_now(int cpu) | |||
204 | */ | 209 | */ |
205 | static struct trace_array max_tr; | 210 | static struct trace_array max_tr; |
206 | 211 | ||
207 | static DEFINE_PER_CPU(struct trace_array_cpu, max_data); | 212 | static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data); |
208 | 213 | ||
209 | /* tracer_enabled is used to toggle activation of a tracer */ | 214 | /* tracer_enabled is used to toggle activation of a tracer */ |
210 | static int tracer_enabled = 1; | 215 | static int tracer_enabled = 1; |
@@ -243,19 +248,91 @@ static struct tracer *trace_types __read_mostly; | |||
243 | static struct tracer *current_trace __read_mostly; | 248 | static struct tracer *current_trace __read_mostly; |
244 | 249 | ||
245 | /* | 250 | /* |
246 | * max_tracer_type_len is used to simplify the allocating of | 251 | * trace_types_lock is used to protect the trace_types list. |
247 | * buffers to read userspace tracer names. We keep track of | ||
248 | * the longest tracer name registered. | ||
249 | */ | 252 | */ |
250 | static int max_tracer_type_len; | 253 | static DEFINE_MUTEX(trace_types_lock); |
251 | 254 | ||
252 | /* | 255 | /* |
253 | * trace_types_lock is used to protect the trace_types list. | 256 | * serialize the access of the ring buffer |
254 | * This lock is also used to keep user access serialized. | 257 | * |
255 | * Accesses from userspace will grab this lock while userspace | 258 | * ring buffer serializes readers, but it is low level protection. |
256 | * activities happen inside the kernel. | 259 | * The validity of the events (which returns by ring_buffer_peek() ..etc) |
260 | * are not protected by ring buffer. | ||
261 | * | ||
262 | * The content of events may become garbage if we allow other process consumes | ||
263 | * these events concurrently: | ||
264 | * A) the page of the consumed events may become a normal page | ||
265 | * (not reader page) in ring buffer, and this page will be rewrited | ||
266 | * by events producer. | ||
267 | * B) The page of the consumed events may become a page for splice_read, | ||
268 | * and this page will be returned to system. | ||
269 | * | ||
270 | * These primitives allow multi process access to different cpu ring buffer | ||
271 | * concurrently. | ||
272 | * | ||
273 | * These primitives don't distinguish read-only and read-consume access. | ||
274 | * Multi read-only access are also serialized. | ||
257 | */ | 275 | */ |
258 | static DEFINE_MUTEX(trace_types_lock); | 276 | |
277 | #ifdef CONFIG_SMP | ||
278 | static DECLARE_RWSEM(all_cpu_access_lock); | ||
279 | static DEFINE_PER_CPU(struct mutex, cpu_access_lock); | ||
280 | |||
281 | static inline void trace_access_lock(int cpu) | ||
282 | { | ||
283 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
284 | /* gain it for accessing the whole ring buffer. */ | ||
285 | down_write(&all_cpu_access_lock); | ||
286 | } else { | ||
287 | /* gain it for accessing a cpu ring buffer. */ | ||
288 | |||
289 | /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ | ||
290 | down_read(&all_cpu_access_lock); | ||
291 | |||
292 | /* Secondly block other access to this @cpu ring buffer. */ | ||
293 | mutex_lock(&per_cpu(cpu_access_lock, cpu)); | ||
294 | } | ||
295 | } | ||
296 | |||
297 | static inline void trace_access_unlock(int cpu) | ||
298 | { | ||
299 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
300 | up_write(&all_cpu_access_lock); | ||
301 | } else { | ||
302 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); | ||
303 | up_read(&all_cpu_access_lock); | ||
304 | } | ||
305 | } | ||
306 | |||
307 | static inline void trace_access_lock_init(void) | ||
308 | { | ||
309 | int cpu; | ||
310 | |||
311 | for_each_possible_cpu(cpu) | ||
312 | mutex_init(&per_cpu(cpu_access_lock, cpu)); | ||
313 | } | ||
314 | |||
315 | #else | ||
316 | |||
317 | static DEFINE_MUTEX(access_lock); | ||
318 | |||
319 | static inline void trace_access_lock(int cpu) | ||
320 | { | ||
321 | (void)cpu; | ||
322 | mutex_lock(&access_lock); | ||
323 | } | ||
324 | |||
325 | static inline void trace_access_unlock(int cpu) | ||
326 | { | ||
327 | (void)cpu; | ||
328 | mutex_unlock(&access_lock); | ||
329 | } | ||
330 | |||
331 | static inline void trace_access_lock_init(void) | ||
332 | { | ||
333 | } | ||
334 | |||
335 | #endif | ||
259 | 336 | ||
260 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | 337 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ |
261 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | 338 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); |
@@ -263,7 +340,10 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | |||
263 | /* trace_flags holds trace_options default values */ | 340 | /* trace_flags holds trace_options default values */ |
264 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | 341 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | |
265 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | | 342 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | |
266 | TRACE_ITER_GRAPH_TIME; | 343 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD; |
344 | |||
345 | static int trace_stop_count; | ||
346 | static DEFINE_SPINLOCK(tracing_start_lock); | ||
267 | 347 | ||
268 | /** | 348 | /** |
269 | * trace_wake_up - wake up tasks waiting for trace input | 349 | * trace_wake_up - wake up tasks waiting for trace input |
@@ -273,30 +353,50 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
273 | */ | 353 | */ |
274 | void trace_wake_up(void) | 354 | void trace_wake_up(void) |
275 | { | 355 | { |
356 | int cpu; | ||
357 | |||
358 | if (trace_flags & TRACE_ITER_BLOCK) | ||
359 | return; | ||
276 | /* | 360 | /* |
277 | * The runqueue_is_locked() can fail, but this is the best we | 361 | * The runqueue_is_locked() can fail, but this is the best we |
278 | * have for now: | 362 | * have for now: |
279 | */ | 363 | */ |
280 | if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked()) | 364 | cpu = get_cpu(); |
365 | if (!runqueue_is_locked(cpu)) | ||
281 | wake_up(&trace_wait); | 366 | wake_up(&trace_wait); |
367 | put_cpu(); | ||
282 | } | 368 | } |
283 | 369 | ||
284 | static int __init set_buf_size(char *str) | 370 | static int __init set_buf_size(char *str) |
285 | { | 371 | { |
286 | unsigned long buf_size; | 372 | unsigned long buf_size; |
287 | int ret; | ||
288 | 373 | ||
289 | if (!str) | 374 | if (!str) |
290 | return 0; | 375 | return 0; |
291 | ret = strict_strtoul(str, 0, &buf_size); | 376 | buf_size = memparse(str, &str); |
292 | /* nr_entries can not be zero */ | 377 | /* nr_entries can not be zero */ |
293 | if (ret < 0 || buf_size == 0) | 378 | if (buf_size == 0) |
294 | return 0; | 379 | return 0; |
295 | trace_buf_size = buf_size; | 380 | trace_buf_size = buf_size; |
296 | return 1; | 381 | return 1; |
297 | } | 382 | } |
298 | __setup("trace_buf_size=", set_buf_size); | 383 | __setup("trace_buf_size=", set_buf_size); |
299 | 384 | ||
385 | static int __init set_tracing_thresh(char *str) | ||
386 | { | ||
387 | unsigned long threshhold; | ||
388 | int ret; | ||
389 | |||
390 | if (!str) | ||
391 | return 0; | ||
392 | ret = strict_strtoul(str, 0, &threshhold); | ||
393 | if (ret < 0) | ||
394 | return 0; | ||
395 | tracing_thresh = threshhold * 1000; | ||
396 | return 1; | ||
397 | } | ||
398 | __setup("tracing_thresh=", set_tracing_thresh); | ||
399 | |||
300 | unsigned long nsecs_to_usecs(unsigned long nsecs) | 400 | unsigned long nsecs_to_usecs(unsigned long nsecs) |
301 | { | 401 | { |
302 | return nsecs / 1000; | 402 | return nsecs / 1000; |
@@ -313,7 +413,6 @@ static const char *trace_options[] = { | |||
313 | "bin", | 413 | "bin", |
314 | "block", | 414 | "block", |
315 | "stacktrace", | 415 | "stacktrace", |
316 | "sched-tree", | ||
317 | "trace_printk", | 416 | "trace_printk", |
318 | "ftrace_preempt", | 417 | "ftrace_preempt", |
319 | "branch", | 418 | "branch", |
@@ -323,49 +422,126 @@ static const char *trace_options[] = { | |||
323 | "printk-msg-only", | 422 | "printk-msg-only", |
324 | "context-info", | 423 | "context-info", |
325 | "latency-format", | 424 | "latency-format", |
326 | "global-clock", | ||
327 | "sleep-time", | 425 | "sleep-time", |
328 | "graph-time", | 426 | "graph-time", |
427 | "record-cmd", | ||
329 | NULL | 428 | NULL |
330 | }; | 429 | }; |
331 | 430 | ||
431 | static struct { | ||
432 | u64 (*func)(void); | ||
433 | const char *name; | ||
434 | } trace_clocks[] = { | ||
435 | { trace_clock_local, "local" }, | ||
436 | { trace_clock_global, "global" }, | ||
437 | }; | ||
438 | |||
439 | int trace_clock_id; | ||
440 | |||
332 | /* | 441 | /* |
333 | * ftrace_max_lock is used to protect the swapping of buffers | 442 | * trace_parser_get_init - gets the buffer for trace parser |
334 | * when taking a max snapshot. The buffers themselves are | ||
335 | * protected by per_cpu spinlocks. But the action of the swap | ||
336 | * needs its own lock. | ||
337 | * | ||
338 | * This is defined as a raw_spinlock_t in order to help | ||
339 | * with performance when lockdep debugging is enabled. | ||
340 | */ | 443 | */ |
341 | static raw_spinlock_t ftrace_max_lock = | 444 | int trace_parser_get_init(struct trace_parser *parser, int size) |
342 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 445 | { |
446 | memset(parser, 0, sizeof(*parser)); | ||
447 | |||
448 | parser->buffer = kmalloc(size, GFP_KERNEL); | ||
449 | if (!parser->buffer) | ||
450 | return 1; | ||
451 | |||
452 | parser->size = size; | ||
453 | return 0; | ||
454 | } | ||
343 | 455 | ||
344 | /* | 456 | /* |
345 | * Copy the new maximum trace into the separate maximum-trace | 457 | * trace_parser_put - frees the buffer for trace parser |
346 | * structure. (this way the maximum trace is permanently saved, | ||
347 | * for later retrieval via /sys/kernel/debug/tracing/latency_trace) | ||
348 | */ | 458 | */ |
349 | static void | 459 | void trace_parser_put(struct trace_parser *parser) |
350 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | ||
351 | { | 460 | { |
352 | struct trace_array_cpu *data = tr->data[cpu]; | 461 | kfree(parser->buffer); |
462 | } | ||
353 | 463 | ||
354 | max_tr.cpu = cpu; | 464 | /* |
355 | max_tr.time_start = data->preempt_timestamp; | 465 | * trace_get_user - reads the user input string separated by space |
466 | * (matched by isspace(ch)) | ||
467 | * | ||
468 | * For each string found the 'struct trace_parser' is updated, | ||
469 | * and the function returns. | ||
470 | * | ||
471 | * Returns number of bytes read. | ||
472 | * | ||
473 | * See kernel/trace/trace.h for 'struct trace_parser' details. | ||
474 | */ | ||
475 | int trace_get_user(struct trace_parser *parser, const char __user *ubuf, | ||
476 | size_t cnt, loff_t *ppos) | ||
477 | { | ||
478 | char ch; | ||
479 | size_t read = 0; | ||
480 | ssize_t ret; | ||
481 | |||
482 | if (!*ppos) | ||
483 | trace_parser_clear(parser); | ||
356 | 484 | ||
357 | data = max_tr.data[cpu]; | 485 | ret = get_user(ch, ubuf++); |
358 | data->saved_latency = tracing_max_latency; | 486 | if (ret) |
487 | goto out; | ||
359 | 488 | ||
360 | memcpy(data->comm, tsk->comm, TASK_COMM_LEN); | 489 | read++; |
361 | data->pid = tsk->pid; | 490 | cnt--; |
362 | data->uid = task_uid(tsk); | ||
363 | data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; | ||
364 | data->policy = tsk->policy; | ||
365 | data->rt_priority = tsk->rt_priority; | ||
366 | 491 | ||
367 | /* record this tasks comm */ | 492 | /* |
368 | tracing_record_cmdline(tsk); | 493 | * The parser is not finished with the last write, |
494 | * continue reading the user input without skipping spaces. | ||
495 | */ | ||
496 | if (!parser->cont) { | ||
497 | /* skip white space */ | ||
498 | while (cnt && isspace(ch)) { | ||
499 | ret = get_user(ch, ubuf++); | ||
500 | if (ret) | ||
501 | goto out; | ||
502 | read++; | ||
503 | cnt--; | ||
504 | } | ||
505 | |||
506 | /* only spaces were written */ | ||
507 | if (isspace(ch)) { | ||
508 | *ppos += read; | ||
509 | ret = read; | ||
510 | goto out; | ||
511 | } | ||
512 | |||
513 | parser->idx = 0; | ||
514 | } | ||
515 | |||
516 | /* read the non-space input */ | ||
517 | while (cnt && !isspace(ch)) { | ||
518 | if (parser->idx < parser->size - 1) | ||
519 | parser->buffer[parser->idx++] = ch; | ||
520 | else { | ||
521 | ret = -EINVAL; | ||
522 | goto out; | ||
523 | } | ||
524 | ret = get_user(ch, ubuf++); | ||
525 | if (ret) | ||
526 | goto out; | ||
527 | read++; | ||
528 | cnt--; | ||
529 | } | ||
530 | |||
531 | /* We either got finished input or we have to wait for another call. */ | ||
532 | if (isspace(ch)) { | ||
533 | parser->buffer[parser->idx] = 0; | ||
534 | parser->cont = false; | ||
535 | } else { | ||
536 | parser->cont = true; | ||
537 | parser->buffer[parser->idx++] = ch; | ||
538 | } | ||
539 | |||
540 | *ppos += read; | ||
541 | ret = read; | ||
542 | |||
543 | out: | ||
544 | return ret; | ||
369 | } | 545 | } |
370 | 546 | ||
371 | ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) | 547 | ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) |
@@ -411,6 +587,57 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) | |||
411 | return cnt; | 587 | return cnt; |
412 | } | 588 | } |
413 | 589 | ||
590 | /* | ||
591 | * ftrace_max_lock is used to protect the swapping of buffers | ||
592 | * when taking a max snapshot. The buffers themselves are | ||
593 | * protected by per_cpu spinlocks. But the action of the swap | ||
594 | * needs its own lock. | ||
595 | * | ||
596 | * This is defined as a arch_spinlock_t in order to help | ||
597 | * with performance when lockdep debugging is enabled. | ||
598 | * | ||
599 | * It is also used in other places outside the update_max_tr | ||
600 | * so it needs to be defined outside of the | ||
601 | * CONFIG_TRACER_MAX_TRACE. | ||
602 | */ | ||
603 | static arch_spinlock_t ftrace_max_lock = | ||
604 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | ||
605 | |||
606 | unsigned long __read_mostly tracing_thresh; | ||
607 | |||
608 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
609 | unsigned long __read_mostly tracing_max_latency; | ||
610 | |||
611 | /* | ||
612 | * Copy the new maximum trace into the separate maximum-trace | ||
613 | * structure. (this way the maximum trace is permanently saved, | ||
614 | * for later retrieval via /sys/kernel/debug/tracing/latency_trace) | ||
615 | */ | ||
616 | static void | ||
617 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | ||
618 | { | ||
619 | struct trace_array_cpu *data = tr->data[cpu]; | ||
620 | struct trace_array_cpu *max_data; | ||
621 | |||
622 | max_tr.cpu = cpu; | ||
623 | max_tr.time_start = data->preempt_timestamp; | ||
624 | |||
625 | max_data = max_tr.data[cpu]; | ||
626 | max_data->saved_latency = tracing_max_latency; | ||
627 | max_data->critical_start = data->critical_start; | ||
628 | max_data->critical_end = data->critical_end; | ||
629 | |||
630 | memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); | ||
631 | max_data->pid = tsk->pid; | ||
632 | max_data->uid = task_uid(tsk); | ||
633 | max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; | ||
634 | max_data->policy = tsk->policy; | ||
635 | max_data->rt_priority = tsk->rt_priority; | ||
636 | |||
637 | /* record this tasks comm */ | ||
638 | tracing_record_cmdline(tsk); | ||
639 | } | ||
640 | |||
414 | /** | 641 | /** |
415 | * update_max_tr - snapshot all trace buffers from global_trace to max_tr | 642 | * update_max_tr - snapshot all trace buffers from global_trace to max_tr |
416 | * @tr: tracer | 643 | * @tr: tracer |
@@ -425,18 +652,21 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
425 | { | 652 | { |
426 | struct ring_buffer *buf = tr->buffer; | 653 | struct ring_buffer *buf = tr->buffer; |
427 | 654 | ||
655 | if (trace_stop_count) | ||
656 | return; | ||
657 | |||
428 | WARN_ON_ONCE(!irqs_disabled()); | 658 | WARN_ON_ONCE(!irqs_disabled()); |
429 | __raw_spin_lock(&ftrace_max_lock); | 659 | if (!current_trace->use_max_tr) { |
660 | WARN_ON_ONCE(1); | ||
661 | return; | ||
662 | } | ||
663 | arch_spin_lock(&ftrace_max_lock); | ||
430 | 664 | ||
431 | tr->buffer = max_tr.buffer; | 665 | tr->buffer = max_tr.buffer; |
432 | max_tr.buffer = buf; | 666 | max_tr.buffer = buf; |
433 | 667 | ||
434 | ftrace_disable_cpu(); | ||
435 | ring_buffer_reset(tr->buffer); | ||
436 | ftrace_enable_cpu(); | ||
437 | |||
438 | __update_max_tr(tr, tsk, cpu); | 668 | __update_max_tr(tr, tsk, cpu); |
439 | __raw_spin_unlock(&ftrace_max_lock); | 669 | arch_spin_unlock(&ftrace_max_lock); |
440 | } | 670 | } |
441 | 671 | ||
442 | /** | 672 | /** |
@@ -452,21 +682,40 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
452 | { | 682 | { |
453 | int ret; | 683 | int ret; |
454 | 684 | ||
685 | if (trace_stop_count) | ||
686 | return; | ||
687 | |||
455 | WARN_ON_ONCE(!irqs_disabled()); | 688 | WARN_ON_ONCE(!irqs_disabled()); |
456 | __raw_spin_lock(&ftrace_max_lock); | 689 | if (!current_trace->use_max_tr) { |
690 | WARN_ON_ONCE(1); | ||
691 | return; | ||
692 | } | ||
693 | |||
694 | arch_spin_lock(&ftrace_max_lock); | ||
457 | 695 | ||
458 | ftrace_disable_cpu(); | 696 | ftrace_disable_cpu(); |
459 | 697 | ||
460 | ring_buffer_reset(max_tr.buffer); | ||
461 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); | 698 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); |
462 | 699 | ||
700 | if (ret == -EBUSY) { | ||
701 | /* | ||
702 | * We failed to swap the buffer due to a commit taking | ||
703 | * place on this CPU. We fail to record, but we reset | ||
704 | * the max trace buffer (no one writes directly to it) | ||
705 | * and flag that it failed. | ||
706 | */ | ||
707 | trace_array_printk(&max_tr, _THIS_IP_, | ||
708 | "Failed to swap buffers due to commit in progress\n"); | ||
709 | } | ||
710 | |||
463 | ftrace_enable_cpu(); | 711 | ftrace_enable_cpu(); |
464 | 712 | ||
465 | WARN_ON_ONCE(ret && ret != -EAGAIN); | 713 | WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); |
466 | 714 | ||
467 | __update_max_tr(tr, tsk, cpu); | 715 | __update_max_tr(tr, tsk, cpu); |
468 | __raw_spin_unlock(&ftrace_max_lock); | 716 | arch_spin_unlock(&ftrace_max_lock); |
469 | } | 717 | } |
718 | #endif /* CONFIG_TRACER_MAX_TRACE */ | ||
470 | 719 | ||
471 | /** | 720 | /** |
472 | * register_tracer - register a tracer with the ftrace system. | 721 | * register_tracer - register a tracer with the ftrace system. |
@@ -479,7 +728,6 @@ __releases(kernel_lock) | |||
479 | __acquires(kernel_lock) | 728 | __acquires(kernel_lock) |
480 | { | 729 | { |
481 | struct tracer *t; | 730 | struct tracer *t; |
482 | int len; | ||
483 | int ret = 0; | 731 | int ret = 0; |
484 | 732 | ||
485 | if (!type->name) { | 733 | if (!type->name) { |
@@ -487,13 +735,11 @@ __acquires(kernel_lock) | |||
487 | return -1; | 735 | return -1; |
488 | } | 736 | } |
489 | 737 | ||
490 | /* | 738 | if (strlen(type->name) >= MAX_TRACER_SIZE) { |
491 | * When this gets called we hold the BKL which means that | 739 | pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); |
492 | * preemption is disabled. Various trace selftests however | 740 | return -1; |
493 | * need to disable and enable preemption for successful tests. | 741 | } |
494 | * So we drop the BKL here and grab it after the tests again. | 742 | |
495 | */ | ||
496 | unlock_kernel(); | ||
497 | mutex_lock(&trace_types_lock); | 743 | mutex_lock(&trace_types_lock); |
498 | 744 | ||
499 | tracing_selftest_running = true; | 745 | tracing_selftest_running = true; |
@@ -501,7 +747,7 @@ __acquires(kernel_lock) | |||
501 | for (t = trace_types; t; t = t->next) { | 747 | for (t = trace_types; t; t = t->next) { |
502 | if (strcmp(type->name, t->name) == 0) { | 748 | if (strcmp(type->name, t->name) == 0) { |
503 | /* already found */ | 749 | /* already found */ |
504 | pr_info("Trace %s already registered\n", | 750 | pr_info("Tracer %s already registered\n", |
505 | type->name); | 751 | type->name); |
506 | ret = -1; | 752 | ret = -1; |
507 | goto out; | 753 | goto out; |
@@ -523,7 +769,6 @@ __acquires(kernel_lock) | |||
523 | if (type->selftest && !tracing_selftest_disabled) { | 769 | if (type->selftest && !tracing_selftest_disabled) { |
524 | struct tracer *saved_tracer = current_trace; | 770 | struct tracer *saved_tracer = current_trace; |
525 | struct trace_array *tr = &global_trace; | 771 | struct trace_array *tr = &global_trace; |
526 | int i; | ||
527 | 772 | ||
528 | /* | 773 | /* |
529 | * Run a selftest on this tracer. | 774 | * Run a selftest on this tracer. |
@@ -532,8 +777,7 @@ __acquires(kernel_lock) | |||
532 | * internal tracing to verify that everything is in order. | 777 | * internal tracing to verify that everything is in order. |
533 | * If we fail, we do not register this tracer. | 778 | * If we fail, we do not register this tracer. |
534 | */ | 779 | */ |
535 | for_each_tracing_cpu(i) | 780 | tracing_reset_online_cpus(tr); |
536 | tracing_reset(tr, i); | ||
537 | 781 | ||
538 | current_trace = type; | 782 | current_trace = type; |
539 | /* the test is responsible for initializing and enabling */ | 783 | /* the test is responsible for initializing and enabling */ |
@@ -546,8 +790,7 @@ __acquires(kernel_lock) | |||
546 | goto out; | 790 | goto out; |
547 | } | 791 | } |
548 | /* Only reset on passing, to avoid touching corrupted buffers */ | 792 | /* Only reset on passing, to avoid touching corrupted buffers */ |
549 | for_each_tracing_cpu(i) | 793 | tracing_reset_online_cpus(tr); |
550 | tracing_reset(tr, i); | ||
551 | 794 | ||
552 | printk(KERN_CONT "PASSED\n"); | 795 | printk(KERN_CONT "PASSED\n"); |
553 | } | 796 | } |
@@ -555,9 +798,6 @@ __acquires(kernel_lock) | |||
555 | 798 | ||
556 | type->next = trace_types; | 799 | type->next = trace_types; |
557 | trace_types = type; | 800 | trace_types = type; |
558 | len = strlen(type->name); | ||
559 | if (len > max_tracer_type_len) | ||
560 | max_tracer_type_len = len; | ||
561 | 801 | ||
562 | out: | 802 | out: |
563 | tracing_selftest_running = false; | 803 | tracing_selftest_running = false; |
@@ -566,7 +806,7 @@ __acquires(kernel_lock) | |||
566 | if (ret || !default_bootup_tracer) | 806 | if (ret || !default_bootup_tracer) |
567 | goto out_unlock; | 807 | goto out_unlock; |
568 | 808 | ||
569 | if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) | 809 | if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) |
570 | goto out_unlock; | 810 | goto out_unlock; |
571 | 811 | ||
572 | printk(KERN_INFO "Starting tracer '%s'\n", type->name); | 812 | printk(KERN_INFO "Starting tracer '%s'\n", type->name); |
@@ -581,21 +821,19 @@ __acquires(kernel_lock) | |||
581 | #endif | 821 | #endif |
582 | 822 | ||
583 | out_unlock: | 823 | out_unlock: |
584 | lock_kernel(); | ||
585 | return ret; | 824 | return ret; |
586 | } | 825 | } |
587 | 826 | ||
588 | void unregister_tracer(struct tracer *type) | 827 | void unregister_tracer(struct tracer *type) |
589 | { | 828 | { |
590 | struct tracer **t; | 829 | struct tracer **t; |
591 | int len; | ||
592 | 830 | ||
593 | mutex_lock(&trace_types_lock); | 831 | mutex_lock(&trace_types_lock); |
594 | for (t = &trace_types; *t; t = &(*t)->next) { | 832 | for (t = &trace_types; *t; t = &(*t)->next) { |
595 | if (*t == type) | 833 | if (*t == type) |
596 | goto found; | 834 | goto found; |
597 | } | 835 | } |
598 | pr_info("Trace %s not registered\n", type->name); | 836 | pr_info("Tracer %s not registered\n", type->name); |
599 | goto out; | 837 | goto out; |
600 | 838 | ||
601 | found: | 839 | found: |
@@ -608,35 +846,46 @@ void unregister_tracer(struct tracer *type) | |||
608 | current_trace->stop(&global_trace); | 846 | current_trace->stop(&global_trace); |
609 | current_trace = &nop_trace; | 847 | current_trace = &nop_trace; |
610 | } | 848 | } |
611 | 849 | out: | |
612 | if (strlen(type->name) != max_tracer_type_len) | ||
613 | goto out; | ||
614 | |||
615 | max_tracer_type_len = 0; | ||
616 | for (t = &trace_types; *t; t = &(*t)->next) { | ||
617 | len = strlen((*t)->name); | ||
618 | if (len > max_tracer_type_len) | ||
619 | max_tracer_type_len = len; | ||
620 | } | ||
621 | out: | ||
622 | mutex_unlock(&trace_types_lock); | 850 | mutex_unlock(&trace_types_lock); |
623 | } | 851 | } |
624 | 852 | ||
625 | void tracing_reset(struct trace_array *tr, int cpu) | 853 | static void __tracing_reset(struct ring_buffer *buffer, int cpu) |
626 | { | 854 | { |
627 | ftrace_disable_cpu(); | 855 | ftrace_disable_cpu(); |
628 | ring_buffer_reset_cpu(tr->buffer, cpu); | 856 | ring_buffer_reset_cpu(buffer, cpu); |
629 | ftrace_enable_cpu(); | 857 | ftrace_enable_cpu(); |
630 | } | 858 | } |
631 | 859 | ||
860 | void tracing_reset(struct trace_array *tr, int cpu) | ||
861 | { | ||
862 | struct ring_buffer *buffer = tr->buffer; | ||
863 | |||
864 | ring_buffer_record_disable(buffer); | ||
865 | |||
866 | /* Make sure all commits have finished */ | ||
867 | synchronize_sched(); | ||
868 | __tracing_reset(buffer, cpu); | ||
869 | |||
870 | ring_buffer_record_enable(buffer); | ||
871 | } | ||
872 | |||
632 | void tracing_reset_online_cpus(struct trace_array *tr) | 873 | void tracing_reset_online_cpus(struct trace_array *tr) |
633 | { | 874 | { |
875 | struct ring_buffer *buffer = tr->buffer; | ||
634 | int cpu; | 876 | int cpu; |
635 | 877 | ||
878 | ring_buffer_record_disable(buffer); | ||
879 | |||
880 | /* Make sure all commits have finished */ | ||
881 | synchronize_sched(); | ||
882 | |||
636 | tr->time_start = ftrace_now(tr->cpu); | 883 | tr->time_start = ftrace_now(tr->cpu); |
637 | 884 | ||
638 | for_each_online_cpu(cpu) | 885 | for_each_online_cpu(cpu) |
639 | tracing_reset(tr, cpu); | 886 | __tracing_reset(buffer, cpu); |
887 | |||
888 | ring_buffer_record_enable(buffer); | ||
640 | } | 889 | } |
641 | 890 | ||
642 | void tracing_reset_current(int cpu) | 891 | void tracing_reset_current(int cpu) |
@@ -655,7 +904,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; | |||
655 | static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; | 904 | static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; |
656 | static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; | 905 | static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; |
657 | static int cmdline_idx; | 906 | static int cmdline_idx; |
658 | static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; | 907 | static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
659 | 908 | ||
660 | /* temporary disable recording */ | 909 | /* temporary disable recording */ |
661 | static atomic_t trace_record_cmdline_disabled __read_mostly; | 910 | static atomic_t trace_record_cmdline_disabled __read_mostly; |
@@ -667,8 +916,10 @@ static void trace_init_cmdlines(void) | |||
667 | cmdline_idx = 0; | 916 | cmdline_idx = 0; |
668 | } | 917 | } |
669 | 918 | ||
670 | static int trace_stop_count; | 919 | int is_tracing_stopped(void) |
671 | static DEFINE_SPINLOCK(tracing_start_lock); | 920 | { |
921 | return trace_stop_count; | ||
922 | } | ||
672 | 923 | ||
673 | /** | 924 | /** |
674 | * ftrace_off_permanent - disable all ftrace code permanently | 925 | * ftrace_off_permanent - disable all ftrace code permanently |
@@ -709,6 +960,8 @@ void tracing_start(void) | |||
709 | goto out; | 960 | goto out; |
710 | } | 961 | } |
711 | 962 | ||
963 | /* Prevent the buffers from switching */ | ||
964 | arch_spin_lock(&ftrace_max_lock); | ||
712 | 965 | ||
713 | buffer = global_trace.buffer; | 966 | buffer = global_trace.buffer; |
714 | if (buffer) | 967 | if (buffer) |
@@ -718,6 +971,8 @@ void tracing_start(void) | |||
718 | if (buffer) | 971 | if (buffer) |
719 | ring_buffer_record_enable(buffer); | 972 | ring_buffer_record_enable(buffer); |
720 | 973 | ||
974 | arch_spin_unlock(&ftrace_max_lock); | ||
975 | |||
721 | ftrace_start(); | 976 | ftrace_start(); |
722 | out: | 977 | out: |
723 | spin_unlock_irqrestore(&tracing_start_lock, flags); | 978 | spin_unlock_irqrestore(&tracing_start_lock, flags); |
@@ -739,6 +994,9 @@ void tracing_stop(void) | |||
739 | if (trace_stop_count++) | 994 | if (trace_stop_count++) |
740 | goto out; | 995 | goto out; |
741 | 996 | ||
997 | /* Prevent the buffers from switching */ | ||
998 | arch_spin_lock(&ftrace_max_lock); | ||
999 | |||
742 | buffer = global_trace.buffer; | 1000 | buffer = global_trace.buffer; |
743 | if (buffer) | 1001 | if (buffer) |
744 | ring_buffer_record_disable(buffer); | 1002 | ring_buffer_record_disable(buffer); |
@@ -747,6 +1005,8 @@ void tracing_stop(void) | |||
747 | if (buffer) | 1005 | if (buffer) |
748 | ring_buffer_record_disable(buffer); | 1006 | ring_buffer_record_disable(buffer); |
749 | 1007 | ||
1008 | arch_spin_unlock(&ftrace_max_lock); | ||
1009 | |||
750 | out: | 1010 | out: |
751 | spin_unlock_irqrestore(&tracing_start_lock, flags); | 1011 | spin_unlock_irqrestore(&tracing_start_lock, flags); |
752 | } | 1012 | } |
@@ -766,7 +1026,7 @@ static void trace_save_cmdline(struct task_struct *tsk) | |||
766 | * nor do we want to disable interrupts, | 1026 | * nor do we want to disable interrupts, |
767 | * so if we miss here, then better luck next time. | 1027 | * so if we miss here, then better luck next time. |
768 | */ | 1028 | */ |
769 | if (!__raw_spin_trylock(&trace_cmdline_lock)) | 1029 | if (!arch_spin_trylock(&trace_cmdline_lock)) |
770 | return; | 1030 | return; |
771 | 1031 | ||
772 | idx = map_pid_to_cmdline[tsk->pid]; | 1032 | idx = map_pid_to_cmdline[tsk->pid]; |
@@ -791,7 +1051,7 @@ static void trace_save_cmdline(struct task_struct *tsk) | |||
791 | 1051 | ||
792 | memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); | 1052 | memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); |
793 | 1053 | ||
794 | __raw_spin_unlock(&trace_cmdline_lock); | 1054 | arch_spin_unlock(&trace_cmdline_lock); |
795 | } | 1055 | } |
796 | 1056 | ||
797 | void trace_find_cmdline(int pid, char comm[]) | 1057 | void trace_find_cmdline(int pid, char comm[]) |
@@ -803,20 +1063,25 @@ void trace_find_cmdline(int pid, char comm[]) | |||
803 | return; | 1063 | return; |
804 | } | 1064 | } |
805 | 1065 | ||
1066 | if (WARN_ON_ONCE(pid < 0)) { | ||
1067 | strcpy(comm, "<XXX>"); | ||
1068 | return; | ||
1069 | } | ||
1070 | |||
806 | if (pid > PID_MAX_DEFAULT) { | 1071 | if (pid > PID_MAX_DEFAULT) { |
807 | strcpy(comm, "<...>"); | 1072 | strcpy(comm, "<...>"); |
808 | return; | 1073 | return; |
809 | } | 1074 | } |
810 | 1075 | ||
811 | preempt_disable(); | 1076 | preempt_disable(); |
812 | __raw_spin_lock(&trace_cmdline_lock); | 1077 | arch_spin_lock(&trace_cmdline_lock); |
813 | map = map_pid_to_cmdline[pid]; | 1078 | map = map_pid_to_cmdline[pid]; |
814 | if (map != NO_CMDLINE_MAP) | 1079 | if (map != NO_CMDLINE_MAP) |
815 | strcpy(comm, saved_cmdlines[map]); | 1080 | strcpy(comm, saved_cmdlines[map]); |
816 | else | 1081 | else |
817 | strcpy(comm, "<...>"); | 1082 | strcpy(comm, "<...>"); |
818 | 1083 | ||
819 | __raw_spin_unlock(&trace_cmdline_lock); | 1084 | arch_spin_unlock(&trace_cmdline_lock); |
820 | preempt_enable(); | 1085 | preempt_enable(); |
821 | } | 1086 | } |
822 | 1087 | ||
@@ -837,7 +1102,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |||
837 | 1102 | ||
838 | entry->preempt_count = pc & 0xff; | 1103 | entry->preempt_count = pc & 0xff; |
839 | entry->pid = (tsk) ? tsk->pid : 0; | 1104 | entry->pid = (tsk) ? tsk->pid : 0; |
840 | entry->tgid = (tsk) ? tsk->tgid : 0; | 1105 | entry->lock_depth = (tsk) ? tsk->lock_depth : 0; |
841 | entry->flags = | 1106 | entry->flags = |
842 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | 1107 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT |
843 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | | 1108 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | |
@@ -848,15 +1113,17 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |||
848 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | | 1113 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | |
849 | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); | 1114 | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); |
850 | } | 1115 | } |
1116 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); | ||
851 | 1117 | ||
852 | struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, | 1118 | struct ring_buffer_event * |
853 | int type, | 1119 | trace_buffer_lock_reserve(struct ring_buffer *buffer, |
854 | unsigned long len, | 1120 | int type, |
855 | unsigned long flags, int pc) | 1121 | unsigned long len, |
1122 | unsigned long flags, int pc) | ||
856 | { | 1123 | { |
857 | struct ring_buffer_event *event; | 1124 | struct ring_buffer_event *event; |
858 | 1125 | ||
859 | event = ring_buffer_lock_reserve(tr->buffer, len); | 1126 | event = ring_buffer_lock_reserve(buffer, len); |
860 | if (event != NULL) { | 1127 | if (event != NULL) { |
861 | struct trace_entry *ent = ring_buffer_event_data(event); | 1128 | struct trace_entry *ent = ring_buffer_event_data(event); |
862 | 1129 | ||
@@ -866,58 +1133,60 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, | |||
866 | 1133 | ||
867 | return event; | 1134 | return event; |
868 | } | 1135 | } |
869 | static void ftrace_trace_stack(struct trace_array *tr, | ||
870 | unsigned long flags, int skip, int pc); | ||
871 | static void ftrace_trace_userstack(struct trace_array *tr, | ||
872 | unsigned long flags, int pc); | ||
873 | 1136 | ||
874 | static inline void __trace_buffer_unlock_commit(struct trace_array *tr, | 1137 | static inline void |
875 | struct ring_buffer_event *event, | 1138 | __trace_buffer_unlock_commit(struct ring_buffer *buffer, |
876 | unsigned long flags, int pc, | 1139 | struct ring_buffer_event *event, |
877 | int wake) | 1140 | unsigned long flags, int pc, |
1141 | int wake) | ||
878 | { | 1142 | { |
879 | ring_buffer_unlock_commit(tr->buffer, event); | 1143 | ring_buffer_unlock_commit(buffer, event); |
880 | 1144 | ||
881 | ftrace_trace_stack(tr, flags, 6, pc); | 1145 | ftrace_trace_stack(buffer, flags, 6, pc); |
882 | ftrace_trace_userstack(tr, flags, pc); | 1146 | ftrace_trace_userstack(buffer, flags, pc); |
883 | 1147 | ||
884 | if (wake) | 1148 | if (wake) |
885 | trace_wake_up(); | 1149 | trace_wake_up(); |
886 | } | 1150 | } |
887 | 1151 | ||
888 | void trace_buffer_unlock_commit(struct trace_array *tr, | 1152 | void trace_buffer_unlock_commit(struct ring_buffer *buffer, |
889 | struct ring_buffer_event *event, | 1153 | struct ring_buffer_event *event, |
890 | unsigned long flags, int pc) | 1154 | unsigned long flags, int pc) |
891 | { | 1155 | { |
892 | __trace_buffer_unlock_commit(tr, event, flags, pc, 1); | 1156 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); |
893 | } | 1157 | } |
894 | 1158 | ||
895 | struct ring_buffer_event * | 1159 | struct ring_buffer_event * |
896 | trace_current_buffer_lock_reserve(int type, unsigned long len, | 1160 | trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, |
1161 | int type, unsigned long len, | ||
897 | unsigned long flags, int pc) | 1162 | unsigned long flags, int pc) |
898 | { | 1163 | { |
899 | return trace_buffer_lock_reserve(&global_trace, | 1164 | *current_rb = global_trace.buffer; |
1165 | return trace_buffer_lock_reserve(*current_rb, | ||
900 | type, len, flags, pc); | 1166 | type, len, flags, pc); |
901 | } | 1167 | } |
902 | EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); | 1168 | EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); |
903 | 1169 | ||
904 | void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, | 1170 | void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, |
1171 | struct ring_buffer_event *event, | ||
905 | unsigned long flags, int pc) | 1172 | unsigned long flags, int pc) |
906 | { | 1173 | { |
907 | __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); | 1174 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); |
908 | } | 1175 | } |
909 | EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); | 1176 | EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); |
910 | 1177 | ||
911 | void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, | 1178 | void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, |
912 | unsigned long flags, int pc) | 1179 | struct ring_buffer_event *event, |
1180 | unsigned long flags, int pc) | ||
913 | { | 1181 | { |
914 | __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); | 1182 | __trace_buffer_unlock_commit(buffer, event, flags, pc, 0); |
915 | } | 1183 | } |
916 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); | 1184 | EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); |
917 | 1185 | ||
918 | void trace_current_buffer_discard_commit(struct ring_buffer_event *event) | 1186 | void trace_current_buffer_discard_commit(struct ring_buffer *buffer, |
1187 | struct ring_buffer_event *event) | ||
919 | { | 1188 | { |
920 | ring_buffer_discard_commit(global_trace.buffer, event); | 1189 | ring_buffer_discard_commit(buffer, event); |
921 | } | 1190 | } |
922 | EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); | 1191 | EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); |
923 | 1192 | ||
@@ -927,14 +1196,15 @@ trace_function(struct trace_array *tr, | |||
927 | int pc) | 1196 | int pc) |
928 | { | 1197 | { |
929 | struct ftrace_event_call *call = &event_function; | 1198 | struct ftrace_event_call *call = &event_function; |
1199 | struct ring_buffer *buffer = tr->buffer; | ||
930 | struct ring_buffer_event *event; | 1200 | struct ring_buffer_event *event; |
931 | struct ftrace_entry *entry; | 1201 | struct ftrace_entry *entry; |
932 | 1202 | ||
933 | /* If we are reading the ring buffer, don't trace */ | 1203 | /* If we are reading the ring buffer, don't trace */ |
934 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | 1204 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) |
935 | return; | 1205 | return; |
936 | 1206 | ||
937 | event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), | 1207 | event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), |
938 | flags, pc); | 1208 | flags, pc); |
939 | if (!event) | 1209 | if (!event) |
940 | return; | 1210 | return; |
@@ -942,57 +1212,9 @@ trace_function(struct trace_array *tr, | |||
942 | entry->ip = ip; | 1212 | entry->ip = ip; |
943 | entry->parent_ip = parent_ip; | 1213 | entry->parent_ip = parent_ip; |
944 | 1214 | ||
945 | if (!filter_check_discard(call, entry, tr->buffer, event)) | 1215 | if (!filter_check_discard(call, entry, buffer, event)) |
946 | ring_buffer_unlock_commit(tr->buffer, event); | 1216 | ring_buffer_unlock_commit(buffer, event); |
947 | } | ||
948 | |||
949 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
950 | static int __trace_graph_entry(struct trace_array *tr, | ||
951 | struct ftrace_graph_ent *trace, | ||
952 | unsigned long flags, | ||
953 | int pc) | ||
954 | { | ||
955 | struct ftrace_event_call *call = &event_funcgraph_entry; | ||
956 | struct ring_buffer_event *event; | ||
957 | struct ftrace_graph_ent_entry *entry; | ||
958 | |||
959 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | ||
960 | return 0; | ||
961 | |||
962 | event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, | ||
963 | sizeof(*entry), flags, pc); | ||
964 | if (!event) | ||
965 | return 0; | ||
966 | entry = ring_buffer_event_data(event); | ||
967 | entry->graph_ent = *trace; | ||
968 | if (!filter_current_check_discard(call, entry, event)) | ||
969 | ring_buffer_unlock_commit(global_trace.buffer, event); | ||
970 | |||
971 | return 1; | ||
972 | } | ||
973 | |||
974 | static void __trace_graph_return(struct trace_array *tr, | ||
975 | struct ftrace_graph_ret *trace, | ||
976 | unsigned long flags, | ||
977 | int pc) | ||
978 | { | ||
979 | struct ftrace_event_call *call = &event_funcgraph_exit; | ||
980 | struct ring_buffer_event *event; | ||
981 | struct ftrace_graph_ret_entry *entry; | ||
982 | |||
983 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | ||
984 | return; | ||
985 | |||
986 | event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET, | ||
987 | sizeof(*entry), flags, pc); | ||
988 | if (!event) | ||
989 | return; | ||
990 | entry = ring_buffer_event_data(event); | ||
991 | entry->ret = *trace; | ||
992 | if (!filter_current_check_discard(call, entry, event)) | ||
993 | ring_buffer_unlock_commit(global_trace.buffer, event); | ||
994 | } | 1217 | } |
995 | #endif | ||
996 | 1218 | ||
997 | void | 1219 | void |
998 | ftrace(struct trace_array *tr, struct trace_array_cpu *data, | 1220 | ftrace(struct trace_array *tr, struct trace_array_cpu *data, |
@@ -1003,17 +1225,17 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, | |||
1003 | trace_function(tr, ip, parent_ip, flags, pc); | 1225 | trace_function(tr, ip, parent_ip, flags, pc); |
1004 | } | 1226 | } |
1005 | 1227 | ||
1006 | static void __ftrace_trace_stack(struct trace_array *tr, | 1228 | #ifdef CONFIG_STACKTRACE |
1229 | static void __ftrace_trace_stack(struct ring_buffer *buffer, | ||
1007 | unsigned long flags, | 1230 | unsigned long flags, |
1008 | int skip, int pc) | 1231 | int skip, int pc) |
1009 | { | 1232 | { |
1010 | #ifdef CONFIG_STACKTRACE | ||
1011 | struct ftrace_event_call *call = &event_kernel_stack; | 1233 | struct ftrace_event_call *call = &event_kernel_stack; |
1012 | struct ring_buffer_event *event; | 1234 | struct ring_buffer_event *event; |
1013 | struct stack_entry *entry; | 1235 | struct stack_entry *entry; |
1014 | struct stack_trace trace; | 1236 | struct stack_trace trace; |
1015 | 1237 | ||
1016 | event = trace_buffer_lock_reserve(tr, TRACE_STACK, | 1238 | event = trace_buffer_lock_reserve(buffer, TRACE_STACK, |
1017 | sizeof(*entry), flags, pc); | 1239 | sizeof(*entry), flags, pc); |
1018 | if (!event) | 1240 | if (!event) |
1019 | return; | 1241 | return; |
@@ -1026,32 +1248,46 @@ static void __ftrace_trace_stack(struct trace_array *tr, | |||
1026 | trace.entries = entry->caller; | 1248 | trace.entries = entry->caller; |
1027 | 1249 | ||
1028 | save_stack_trace(&trace); | 1250 | save_stack_trace(&trace); |
1029 | if (!filter_check_discard(call, entry, tr->buffer, event)) | 1251 | if (!filter_check_discard(call, entry, buffer, event)) |
1030 | ring_buffer_unlock_commit(tr->buffer, event); | 1252 | ring_buffer_unlock_commit(buffer, event); |
1031 | #endif | ||
1032 | } | 1253 | } |
1033 | 1254 | ||
1034 | static void ftrace_trace_stack(struct trace_array *tr, | 1255 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, |
1035 | unsigned long flags, | 1256 | int skip, int pc) |
1036 | int skip, int pc) | ||
1037 | { | 1257 | { |
1038 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) | 1258 | if (!(trace_flags & TRACE_ITER_STACKTRACE)) |
1039 | return; | 1259 | return; |
1040 | 1260 | ||
1041 | __ftrace_trace_stack(tr, flags, skip, pc); | 1261 | __ftrace_trace_stack(buffer, flags, skip, pc); |
1042 | } | 1262 | } |
1043 | 1263 | ||
1044 | void __trace_stack(struct trace_array *tr, | 1264 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, |
1045 | unsigned long flags, | 1265 | int pc) |
1046 | int skip, int pc) | ||
1047 | { | 1266 | { |
1048 | __ftrace_trace_stack(tr, flags, skip, pc); | 1267 | __ftrace_trace_stack(tr->buffer, flags, skip, pc); |
1049 | } | 1268 | } |
1050 | 1269 | ||
1051 | static void ftrace_trace_userstack(struct trace_array *tr, | 1270 | /** |
1052 | unsigned long flags, int pc) | 1271 | * trace_dump_stack - record a stack back trace in the trace buffer |
1272 | */ | ||
1273 | void trace_dump_stack(void) | ||
1274 | { | ||
1275 | unsigned long flags; | ||
1276 | |||
1277 | if (tracing_disabled || tracing_selftest_running) | ||
1278 | return; | ||
1279 | |||
1280 | local_save_flags(flags); | ||
1281 | |||
1282 | /* skipping 3 traces, seems to get us at the caller of this function */ | ||
1283 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); | ||
1284 | } | ||
1285 | |||
1286 | static DEFINE_PER_CPU(int, user_stack_count); | ||
1287 | |||
1288 | void | ||
1289 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | ||
1053 | { | 1290 | { |
1054 | #ifdef CONFIG_STACKTRACE | ||
1055 | struct ftrace_event_call *call = &event_user_stack; | 1291 | struct ftrace_event_call *call = &event_user_stack; |
1056 | struct ring_buffer_event *event; | 1292 | struct ring_buffer_event *event; |
1057 | struct userstack_entry *entry; | 1293 | struct userstack_entry *entry; |
@@ -1060,12 +1296,30 @@ static void ftrace_trace_userstack(struct trace_array *tr, | |||
1060 | if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) | 1296 | if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) |
1061 | return; | 1297 | return; |
1062 | 1298 | ||
1063 | event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, | 1299 | /* |
1300 | * NMIs can not handle page faults, even with fix ups. | ||
1301 | * The save user stack can (and often does) fault. | ||
1302 | */ | ||
1303 | if (unlikely(in_nmi())) | ||
1304 | return; | ||
1305 | |||
1306 | /* | ||
1307 | * prevent recursion, since the user stack tracing may | ||
1308 | * trigger other kernel events. | ||
1309 | */ | ||
1310 | preempt_disable(); | ||
1311 | if (__this_cpu_read(user_stack_count)) | ||
1312 | goto out; | ||
1313 | |||
1314 | __this_cpu_inc(user_stack_count); | ||
1315 | |||
1316 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, | ||
1064 | sizeof(*entry), flags, pc); | 1317 | sizeof(*entry), flags, pc); |
1065 | if (!event) | 1318 | if (!event) |
1066 | return; | 1319 | goto out_drop_count; |
1067 | entry = ring_buffer_event_data(event); | 1320 | entry = ring_buffer_event_data(event); |
1068 | 1321 | ||
1322 | entry->tgid = current->tgid; | ||
1069 | memset(&entry->caller, 0, sizeof(entry->caller)); | 1323 | memset(&entry->caller, 0, sizeof(entry->caller)); |
1070 | 1324 | ||
1071 | trace.nr_entries = 0; | 1325 | trace.nr_entries = 0; |
@@ -1074,9 +1328,13 @@ static void ftrace_trace_userstack(struct trace_array *tr, | |||
1074 | trace.entries = entry->caller; | 1328 | trace.entries = entry->caller; |
1075 | 1329 | ||
1076 | save_stack_trace_user(&trace); | 1330 | save_stack_trace_user(&trace); |
1077 | if (!filter_check_discard(call, entry, tr->buffer, event)) | 1331 | if (!filter_check_discard(call, entry, buffer, event)) |
1078 | ring_buffer_unlock_commit(tr->buffer, event); | 1332 | ring_buffer_unlock_commit(buffer, event); |
1079 | #endif | 1333 | |
1334 | out_drop_count: | ||
1335 | __this_cpu_dec(user_stack_count); | ||
1336 | out: | ||
1337 | preempt_enable(); | ||
1080 | } | 1338 | } |
1081 | 1339 | ||
1082 | #ifdef UNUSED | 1340 | #ifdef UNUSED |
@@ -1086,174 +1344,7 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags) | |||
1086 | } | 1344 | } |
1087 | #endif /* UNUSED */ | 1345 | #endif /* UNUSED */ |
1088 | 1346 | ||
1089 | static void | 1347 | #endif /* CONFIG_STACKTRACE */ |
1090 | ftrace_trace_special(void *__tr, | ||
1091 | unsigned long arg1, unsigned long arg2, unsigned long arg3, | ||
1092 | int pc) | ||
1093 | { | ||
1094 | struct ring_buffer_event *event; | ||
1095 | struct trace_array *tr = __tr; | ||
1096 | struct special_entry *entry; | ||
1097 | |||
1098 | event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL, | ||
1099 | sizeof(*entry), 0, pc); | ||
1100 | if (!event) | ||
1101 | return; | ||
1102 | entry = ring_buffer_event_data(event); | ||
1103 | entry->arg1 = arg1; | ||
1104 | entry->arg2 = arg2; | ||
1105 | entry->arg3 = arg3; | ||
1106 | trace_buffer_unlock_commit(tr, event, 0, pc); | ||
1107 | } | ||
1108 | |||
1109 | void | ||
1110 | __trace_special(void *__tr, void *__data, | ||
1111 | unsigned long arg1, unsigned long arg2, unsigned long arg3) | ||
1112 | { | ||
1113 | ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count()); | ||
1114 | } | ||
1115 | |||
1116 | void | ||
1117 | tracing_sched_switch_trace(struct trace_array *tr, | ||
1118 | struct task_struct *prev, | ||
1119 | struct task_struct *next, | ||
1120 | unsigned long flags, int pc) | ||
1121 | { | ||
1122 | struct ftrace_event_call *call = &event_context_switch; | ||
1123 | struct ring_buffer_event *event; | ||
1124 | struct ctx_switch_entry *entry; | ||
1125 | |||
1126 | event = trace_buffer_lock_reserve(tr, TRACE_CTX, | ||
1127 | sizeof(*entry), flags, pc); | ||
1128 | if (!event) | ||
1129 | return; | ||
1130 | entry = ring_buffer_event_data(event); | ||
1131 | entry->prev_pid = prev->pid; | ||
1132 | entry->prev_prio = prev->prio; | ||
1133 | entry->prev_state = prev->state; | ||
1134 | entry->next_pid = next->pid; | ||
1135 | entry->next_prio = next->prio; | ||
1136 | entry->next_state = next->state; | ||
1137 | entry->next_cpu = task_cpu(next); | ||
1138 | |||
1139 | if (!filter_check_discard(call, entry, tr->buffer, event)) | ||
1140 | trace_buffer_unlock_commit(tr, event, flags, pc); | ||
1141 | } | ||
1142 | |||
1143 | void | ||
1144 | tracing_sched_wakeup_trace(struct trace_array *tr, | ||
1145 | struct task_struct *wakee, | ||
1146 | struct task_struct *curr, | ||
1147 | unsigned long flags, int pc) | ||
1148 | { | ||
1149 | struct ftrace_event_call *call = &event_wakeup; | ||
1150 | struct ring_buffer_event *event; | ||
1151 | struct ctx_switch_entry *entry; | ||
1152 | |||
1153 | event = trace_buffer_lock_reserve(tr, TRACE_WAKE, | ||
1154 | sizeof(*entry), flags, pc); | ||
1155 | if (!event) | ||
1156 | return; | ||
1157 | entry = ring_buffer_event_data(event); | ||
1158 | entry->prev_pid = curr->pid; | ||
1159 | entry->prev_prio = curr->prio; | ||
1160 | entry->prev_state = curr->state; | ||
1161 | entry->next_pid = wakee->pid; | ||
1162 | entry->next_prio = wakee->prio; | ||
1163 | entry->next_state = wakee->state; | ||
1164 | entry->next_cpu = task_cpu(wakee); | ||
1165 | |||
1166 | if (!filter_check_discard(call, entry, tr->buffer, event)) | ||
1167 | ring_buffer_unlock_commit(tr->buffer, event); | ||
1168 | ftrace_trace_stack(tr, flags, 6, pc); | ||
1169 | ftrace_trace_userstack(tr, flags, pc); | ||
1170 | } | ||
1171 | |||
1172 | void | ||
1173 | ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) | ||
1174 | { | ||
1175 | struct trace_array *tr = &global_trace; | ||
1176 | struct trace_array_cpu *data; | ||
1177 | unsigned long flags; | ||
1178 | int cpu; | ||
1179 | int pc; | ||
1180 | |||
1181 | if (tracing_disabled) | ||
1182 | return; | ||
1183 | |||
1184 | pc = preempt_count(); | ||
1185 | local_irq_save(flags); | ||
1186 | cpu = raw_smp_processor_id(); | ||
1187 | data = tr->data[cpu]; | ||
1188 | |||
1189 | if (likely(atomic_inc_return(&data->disabled) == 1)) | ||
1190 | ftrace_trace_special(tr, arg1, arg2, arg3, pc); | ||
1191 | |||
1192 | atomic_dec(&data->disabled); | ||
1193 | local_irq_restore(flags); | ||
1194 | } | ||
1195 | |||
1196 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1197 | int trace_graph_entry(struct ftrace_graph_ent *trace) | ||
1198 | { | ||
1199 | struct trace_array *tr = &global_trace; | ||
1200 | struct trace_array_cpu *data; | ||
1201 | unsigned long flags; | ||
1202 | long disabled; | ||
1203 | int ret; | ||
1204 | int cpu; | ||
1205 | int pc; | ||
1206 | |||
1207 | if (!ftrace_trace_task(current)) | ||
1208 | return 0; | ||
1209 | |||
1210 | if (!ftrace_graph_addr(trace->func)) | ||
1211 | return 0; | ||
1212 | |||
1213 | local_irq_save(flags); | ||
1214 | cpu = raw_smp_processor_id(); | ||
1215 | data = tr->data[cpu]; | ||
1216 | disabled = atomic_inc_return(&data->disabled); | ||
1217 | if (likely(disabled == 1)) { | ||
1218 | pc = preempt_count(); | ||
1219 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
1220 | } else { | ||
1221 | ret = 0; | ||
1222 | } | ||
1223 | /* Only do the atomic if it is not already set */ | ||
1224 | if (!test_tsk_trace_graph(current)) | ||
1225 | set_tsk_trace_graph(current); | ||
1226 | |||
1227 | atomic_dec(&data->disabled); | ||
1228 | local_irq_restore(flags); | ||
1229 | |||
1230 | return ret; | ||
1231 | } | ||
1232 | |||
1233 | void trace_graph_return(struct ftrace_graph_ret *trace) | ||
1234 | { | ||
1235 | struct trace_array *tr = &global_trace; | ||
1236 | struct trace_array_cpu *data; | ||
1237 | unsigned long flags; | ||
1238 | long disabled; | ||
1239 | int cpu; | ||
1240 | int pc; | ||
1241 | |||
1242 | local_irq_save(flags); | ||
1243 | cpu = raw_smp_processor_id(); | ||
1244 | data = tr->data[cpu]; | ||
1245 | disabled = atomic_inc_return(&data->disabled); | ||
1246 | if (likely(disabled == 1)) { | ||
1247 | pc = preempt_count(); | ||
1248 | __trace_graph_return(tr, trace, flags, pc); | ||
1249 | } | ||
1250 | if (!trace->depth) | ||
1251 | clear_tsk_trace_graph(current); | ||
1252 | atomic_dec(&data->disabled); | ||
1253 | local_irq_restore(flags); | ||
1254 | } | ||
1255 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
1256 | |||
1257 | 1348 | ||
1258 | /** | 1349 | /** |
1259 | * trace_vbprintk - write binary msg to tracing buffer | 1350 | * trace_vbprintk - write binary msg to tracing buffer |
@@ -1261,18 +1352,18 @@ void trace_graph_return(struct ftrace_graph_ret *trace) | |||
1261 | */ | 1352 | */ |
1262 | int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | 1353 | int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) |
1263 | { | 1354 | { |
1264 | static raw_spinlock_t trace_buf_lock = | 1355 | static arch_spinlock_t trace_buf_lock = |
1265 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 1356 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
1266 | static u32 trace_buf[TRACE_BUF_SIZE]; | 1357 | static u32 trace_buf[TRACE_BUF_SIZE]; |
1267 | 1358 | ||
1268 | struct ftrace_event_call *call = &event_bprint; | 1359 | struct ftrace_event_call *call = &event_bprint; |
1269 | struct ring_buffer_event *event; | 1360 | struct ring_buffer_event *event; |
1361 | struct ring_buffer *buffer; | ||
1270 | struct trace_array *tr = &global_trace; | 1362 | struct trace_array *tr = &global_trace; |
1271 | struct trace_array_cpu *data; | 1363 | struct trace_array_cpu *data; |
1272 | struct bprint_entry *entry; | 1364 | struct bprint_entry *entry; |
1273 | unsigned long flags; | 1365 | unsigned long flags; |
1274 | int disable; | 1366 | int disable; |
1275 | int resched; | ||
1276 | int cpu, len = 0, size, pc; | 1367 | int cpu, len = 0, size, pc; |
1277 | 1368 | ||
1278 | if (unlikely(tracing_selftest_running || tracing_disabled)) | 1369 | if (unlikely(tracing_selftest_running || tracing_disabled)) |
@@ -1282,7 +1373,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | |||
1282 | pause_graph_tracing(); | 1373 | pause_graph_tracing(); |
1283 | 1374 | ||
1284 | pc = preempt_count(); | 1375 | pc = preempt_count(); |
1285 | resched = ftrace_preempt_disable(); | 1376 | preempt_disable_notrace(); |
1286 | cpu = raw_smp_processor_id(); | 1377 | cpu = raw_smp_processor_id(); |
1287 | data = tr->data[cpu]; | 1378 | data = tr->data[cpu]; |
1288 | 1379 | ||
@@ -1292,14 +1383,16 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | |||
1292 | 1383 | ||
1293 | /* Lockdep uses trace_printk for lock tracing */ | 1384 | /* Lockdep uses trace_printk for lock tracing */ |
1294 | local_irq_save(flags); | 1385 | local_irq_save(flags); |
1295 | __raw_spin_lock(&trace_buf_lock); | 1386 | arch_spin_lock(&trace_buf_lock); |
1296 | len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); | 1387 | len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); |
1297 | 1388 | ||
1298 | if (len > TRACE_BUF_SIZE || len < 0) | 1389 | if (len > TRACE_BUF_SIZE || len < 0) |
1299 | goto out_unlock; | 1390 | goto out_unlock; |
1300 | 1391 | ||
1301 | size = sizeof(*entry) + sizeof(u32) * len; | 1392 | size = sizeof(*entry) + sizeof(u32) * len; |
1302 | event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); | 1393 | buffer = tr->buffer; |
1394 | event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, | ||
1395 | flags, pc); | ||
1303 | if (!event) | 1396 | if (!event) |
1304 | goto out_unlock; | 1397 | goto out_unlock; |
1305 | entry = ring_buffer_event_data(event); | 1398 | entry = ring_buffer_event_data(event); |
@@ -1307,30 +1400,48 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | |||
1307 | entry->fmt = fmt; | 1400 | entry->fmt = fmt; |
1308 | 1401 | ||
1309 | memcpy(entry->buf, trace_buf, sizeof(u32) * len); | 1402 | memcpy(entry->buf, trace_buf, sizeof(u32) * len); |
1310 | if (!filter_check_discard(call, entry, tr->buffer, event)) | 1403 | if (!filter_check_discard(call, entry, buffer, event)) { |
1311 | ring_buffer_unlock_commit(tr->buffer, event); | 1404 | ring_buffer_unlock_commit(buffer, event); |
1405 | ftrace_trace_stack(buffer, flags, 6, pc); | ||
1406 | } | ||
1312 | 1407 | ||
1313 | out_unlock: | 1408 | out_unlock: |
1314 | __raw_spin_unlock(&trace_buf_lock); | 1409 | arch_spin_unlock(&trace_buf_lock); |
1315 | local_irq_restore(flags); | 1410 | local_irq_restore(flags); |
1316 | 1411 | ||
1317 | out: | 1412 | out: |
1318 | atomic_dec_return(&data->disabled); | 1413 | atomic_dec_return(&data->disabled); |
1319 | ftrace_preempt_enable(resched); | 1414 | preempt_enable_notrace(); |
1320 | unpause_graph_tracing(); | 1415 | unpause_graph_tracing(); |
1321 | 1416 | ||
1322 | return len; | 1417 | return len; |
1323 | } | 1418 | } |
1324 | EXPORT_SYMBOL_GPL(trace_vbprintk); | 1419 | EXPORT_SYMBOL_GPL(trace_vbprintk); |
1325 | 1420 | ||
1326 | int trace_vprintk(unsigned long ip, const char *fmt, va_list args) | 1421 | int trace_array_printk(struct trace_array *tr, |
1422 | unsigned long ip, const char *fmt, ...) | ||
1327 | { | 1423 | { |
1328 | static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; | 1424 | int ret; |
1425 | va_list ap; | ||
1426 | |||
1427 | if (!(trace_flags & TRACE_ITER_PRINTK)) | ||
1428 | return 0; | ||
1429 | |||
1430 | va_start(ap, fmt); | ||
1431 | ret = trace_array_vprintk(tr, ip, fmt, ap); | ||
1432 | va_end(ap); | ||
1433 | return ret; | ||
1434 | } | ||
1435 | |||
1436 | int trace_array_vprintk(struct trace_array *tr, | ||
1437 | unsigned long ip, const char *fmt, va_list args) | ||
1438 | { | ||
1439 | static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED; | ||
1329 | static char trace_buf[TRACE_BUF_SIZE]; | 1440 | static char trace_buf[TRACE_BUF_SIZE]; |
1330 | 1441 | ||
1331 | struct ftrace_event_call *call = &event_print; | 1442 | struct ftrace_event_call *call = &event_print; |
1332 | struct ring_buffer_event *event; | 1443 | struct ring_buffer_event *event; |
1333 | struct trace_array *tr = &global_trace; | 1444 | struct ring_buffer *buffer; |
1334 | struct trace_array_cpu *data; | 1445 | struct trace_array_cpu *data; |
1335 | int cpu, len = 0, size, pc; | 1446 | int cpu, len = 0, size, pc; |
1336 | struct print_entry *entry; | 1447 | struct print_entry *entry; |
@@ -1351,26 +1462,27 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) | |||
1351 | 1462 | ||
1352 | pause_graph_tracing(); | 1463 | pause_graph_tracing(); |
1353 | raw_local_irq_save(irq_flags); | 1464 | raw_local_irq_save(irq_flags); |
1354 | __raw_spin_lock(&trace_buf_lock); | 1465 | arch_spin_lock(&trace_buf_lock); |
1355 | len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); | 1466 | len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); |
1356 | 1467 | ||
1357 | len = min(len, TRACE_BUF_SIZE-1); | ||
1358 | trace_buf[len] = 0; | ||
1359 | |||
1360 | size = sizeof(*entry) + len + 1; | 1468 | size = sizeof(*entry) + len + 1; |
1361 | event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); | 1469 | buffer = tr->buffer; |
1470 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, | ||
1471 | irq_flags, pc); | ||
1362 | if (!event) | 1472 | if (!event) |
1363 | goto out_unlock; | 1473 | goto out_unlock; |
1364 | entry = ring_buffer_event_data(event); | 1474 | entry = ring_buffer_event_data(event); |
1365 | entry->ip = ip; | 1475 | entry->ip = ip; |
1366 | 1476 | ||
1367 | memcpy(&entry->buf, trace_buf, len); | 1477 | memcpy(&entry->buf, trace_buf, len); |
1368 | entry->buf[len] = 0; | 1478 | entry->buf[len] = '\0'; |
1369 | if (!filter_check_discard(call, entry, tr->buffer, event)) | 1479 | if (!filter_check_discard(call, entry, buffer, event)) { |
1370 | ring_buffer_unlock_commit(tr->buffer, event); | 1480 | ring_buffer_unlock_commit(buffer, event); |
1481 | ftrace_trace_stack(buffer, irq_flags, 6, pc); | ||
1482 | } | ||
1371 | 1483 | ||
1372 | out_unlock: | 1484 | out_unlock: |
1373 | __raw_spin_unlock(&trace_buf_lock); | 1485 | arch_spin_unlock(&trace_buf_lock); |
1374 | raw_local_irq_restore(irq_flags); | 1486 | raw_local_irq_restore(irq_flags); |
1375 | unpause_graph_tracing(); | 1487 | unpause_graph_tracing(); |
1376 | out: | 1488 | out: |
@@ -1379,12 +1491,12 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) | |||
1379 | 1491 | ||
1380 | return len; | 1492 | return len; |
1381 | } | 1493 | } |
1382 | EXPORT_SYMBOL_GPL(trace_vprintk); | ||
1383 | 1494 | ||
1384 | enum trace_file_type { | 1495 | int trace_vprintk(unsigned long ip, const char *fmt, va_list args) |
1385 | TRACE_FILE_LAT_FMT = 1, | 1496 | { |
1386 | TRACE_FILE_ANNOTATE = 2, | 1497 | return trace_array_vprintk(&global_trace, ip, fmt, args); |
1387 | }; | 1498 | } |
1499 | EXPORT_SYMBOL_GPL(trace_vprintk); | ||
1388 | 1500 | ||
1389 | static void trace_iterator_increment(struct trace_iterator *iter) | 1501 | static void trace_iterator_increment(struct trace_iterator *iter) |
1390 | { | 1502 | { |
@@ -1399,7 +1511,8 @@ static void trace_iterator_increment(struct trace_iterator *iter) | |||
1399 | } | 1511 | } |
1400 | 1512 | ||
1401 | static struct trace_entry * | 1513 | static struct trace_entry * |
1402 | peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) | 1514 | peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, |
1515 | unsigned long *lost_events) | ||
1403 | { | 1516 | { |
1404 | struct ring_buffer_event *event; | 1517 | struct ring_buffer_event *event; |
1405 | struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; | 1518 | struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; |
@@ -1410,7 +1523,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) | |||
1410 | if (buf_iter) | 1523 | if (buf_iter) |
1411 | event = ring_buffer_iter_peek(buf_iter, ts); | 1524 | event = ring_buffer_iter_peek(buf_iter, ts); |
1412 | else | 1525 | else |
1413 | event = ring_buffer_peek(iter->tr->buffer, cpu, ts); | 1526 | event = ring_buffer_peek(iter->tr->buffer, cpu, ts, |
1527 | lost_events); | ||
1414 | 1528 | ||
1415 | ftrace_enable_cpu(); | 1529 | ftrace_enable_cpu(); |
1416 | 1530 | ||
@@ -1418,10 +1532,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) | |||
1418 | } | 1532 | } |
1419 | 1533 | ||
1420 | static struct trace_entry * | 1534 | static struct trace_entry * |
1421 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) | 1535 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, |
1536 | unsigned long *missing_events, u64 *ent_ts) | ||
1422 | { | 1537 | { |
1423 | struct ring_buffer *buffer = iter->tr->buffer; | 1538 | struct ring_buffer *buffer = iter->tr->buffer; |
1424 | struct trace_entry *ent, *next = NULL; | 1539 | struct trace_entry *ent, *next = NULL; |
1540 | unsigned long lost_events = 0, next_lost = 0; | ||
1425 | int cpu_file = iter->cpu_file; | 1541 | int cpu_file = iter->cpu_file; |
1426 | u64 next_ts = 0, ts; | 1542 | u64 next_ts = 0, ts; |
1427 | int next_cpu = -1; | 1543 | int next_cpu = -1; |
@@ -1434,7 +1550,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) | |||
1434 | if (cpu_file > TRACE_PIPE_ALL_CPU) { | 1550 | if (cpu_file > TRACE_PIPE_ALL_CPU) { |
1435 | if (ring_buffer_empty_cpu(buffer, cpu_file)) | 1551 | if (ring_buffer_empty_cpu(buffer, cpu_file)) |
1436 | return NULL; | 1552 | return NULL; |
1437 | ent = peek_next_entry(iter, cpu_file, ent_ts); | 1553 | ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); |
1438 | if (ent_cpu) | 1554 | if (ent_cpu) |
1439 | *ent_cpu = cpu_file; | 1555 | *ent_cpu = cpu_file; |
1440 | 1556 | ||
@@ -1446,7 +1562,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) | |||
1446 | if (ring_buffer_empty_cpu(buffer, cpu)) | 1562 | if (ring_buffer_empty_cpu(buffer, cpu)) |
1447 | continue; | 1563 | continue; |
1448 | 1564 | ||
1449 | ent = peek_next_entry(iter, cpu, &ts); | 1565 | ent = peek_next_entry(iter, cpu, &ts, &lost_events); |
1450 | 1566 | ||
1451 | /* | 1567 | /* |
1452 | * Pick the entry with the smallest timestamp: | 1568 | * Pick the entry with the smallest timestamp: |
@@ -1455,6 +1571,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) | |||
1455 | next = ent; | 1571 | next = ent; |
1456 | next_cpu = cpu; | 1572 | next_cpu = cpu; |
1457 | next_ts = ts; | 1573 | next_ts = ts; |
1574 | next_lost = lost_events; | ||
1458 | } | 1575 | } |
1459 | } | 1576 | } |
1460 | 1577 | ||
@@ -1464,6 +1581,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) | |||
1464 | if (ent_ts) | 1581 | if (ent_ts) |
1465 | *ent_ts = next_ts; | 1582 | *ent_ts = next_ts; |
1466 | 1583 | ||
1584 | if (missing_events) | ||
1585 | *missing_events = next_lost; | ||
1586 | |||
1467 | return next; | 1587 | return next; |
1468 | } | 1588 | } |
1469 | 1589 | ||
@@ -1471,13 +1591,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) | |||
1471 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, | 1591 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, |
1472 | int *ent_cpu, u64 *ent_ts) | 1592 | int *ent_cpu, u64 *ent_ts) |
1473 | { | 1593 | { |
1474 | return __find_next_entry(iter, ent_cpu, ent_ts); | 1594 | return __find_next_entry(iter, ent_cpu, NULL, ent_ts); |
1475 | } | 1595 | } |
1476 | 1596 | ||
1477 | /* Find the next real entry, and increment the iterator to the next entry */ | 1597 | /* Find the next real entry, and increment the iterator to the next entry */ |
1478 | static void *find_next_entry_inc(struct trace_iterator *iter) | 1598 | void *trace_find_next_entry_inc(struct trace_iterator *iter) |
1479 | { | 1599 | { |
1480 | iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); | 1600 | iter->ent = __find_next_entry(iter, &iter->cpu, |
1601 | &iter->lost_events, &iter->ts); | ||
1481 | 1602 | ||
1482 | if (iter->ent) | 1603 | if (iter->ent) |
1483 | trace_iterator_increment(iter); | 1604 | trace_iterator_increment(iter); |
@@ -1489,7 +1610,8 @@ static void trace_consume(struct trace_iterator *iter) | |||
1489 | { | 1610 | { |
1490 | /* Don't allow ftrace to trace into the ring buffers */ | 1611 | /* Don't allow ftrace to trace into the ring buffers */ |
1491 | ftrace_disable_cpu(); | 1612 | ftrace_disable_cpu(); |
1492 | ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); | 1613 | ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, |
1614 | &iter->lost_events); | ||
1493 | ftrace_enable_cpu(); | 1615 | ftrace_enable_cpu(); |
1494 | } | 1616 | } |
1495 | 1617 | ||
@@ -1499,6 +1621,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) | |||
1499 | int i = (int)*pos; | 1621 | int i = (int)*pos; |
1500 | void *ent; | 1622 | void *ent; |
1501 | 1623 | ||
1624 | WARN_ON_ONCE(iter->leftover); | ||
1625 | |||
1502 | (*pos)++; | 1626 | (*pos)++; |
1503 | 1627 | ||
1504 | /* can't go backwards */ | 1628 | /* can't go backwards */ |
@@ -1506,25 +1630,50 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) | |||
1506 | return NULL; | 1630 | return NULL; |
1507 | 1631 | ||
1508 | if (iter->idx < 0) | 1632 | if (iter->idx < 0) |
1509 | ent = find_next_entry_inc(iter); | 1633 | ent = trace_find_next_entry_inc(iter); |
1510 | else | 1634 | else |
1511 | ent = iter; | 1635 | ent = iter; |
1512 | 1636 | ||
1513 | while (ent && iter->idx < i) | 1637 | while (ent && iter->idx < i) |
1514 | ent = find_next_entry_inc(iter); | 1638 | ent = trace_find_next_entry_inc(iter); |
1515 | 1639 | ||
1516 | iter->pos = *pos; | 1640 | iter->pos = *pos; |
1517 | 1641 | ||
1518 | return ent; | 1642 | return ent; |
1519 | } | 1643 | } |
1520 | 1644 | ||
1645 | void tracing_iter_reset(struct trace_iterator *iter, int cpu) | ||
1646 | { | ||
1647 | struct trace_array *tr = iter->tr; | ||
1648 | struct ring_buffer_event *event; | ||
1649 | struct ring_buffer_iter *buf_iter; | ||
1650 | unsigned long entries = 0; | ||
1651 | u64 ts; | ||
1652 | |||
1653 | tr->data[cpu]->skipped_entries = 0; | ||
1654 | |||
1655 | if (!iter->buffer_iter[cpu]) | ||
1656 | return; | ||
1657 | |||
1658 | buf_iter = iter->buffer_iter[cpu]; | ||
1659 | ring_buffer_iter_reset(buf_iter); | ||
1660 | |||
1661 | /* | ||
1662 | * We could have the case with the max latency tracers | ||
1663 | * that a reset never took place on a cpu. This is evident | ||
1664 | * by the timestamp being before the start of the buffer. | ||
1665 | */ | ||
1666 | while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { | ||
1667 | if (ts >= iter->tr->time_start) | ||
1668 | break; | ||
1669 | entries++; | ||
1670 | ring_buffer_read(buf_iter, NULL); | ||
1671 | } | ||
1672 | |||
1673 | tr->data[cpu]->skipped_entries = entries; | ||
1674 | } | ||
1675 | |||
1521 | /* | 1676 | /* |
1522 | * No necessary locking here. The worst thing which can | ||
1523 | * happen is loosing events consumed at the same time | ||
1524 | * by a trace_pipe reader. | ||
1525 | * Other than that, we don't risk to crash the ring buffer | ||
1526 | * because it serializes the readers. | ||
1527 | * | ||
1528 | * The current tracer is copied to avoid a global locking | 1677 | * The current tracer is copied to avoid a global locking |
1529 | * all around. | 1678 | * all around. |
1530 | */ | 1679 | */ |
@@ -1556,28 +1705,40 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
1556 | 1705 | ||
1557 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | 1706 | if (cpu_file == TRACE_PIPE_ALL_CPU) { |
1558 | for_each_tracing_cpu(cpu) | 1707 | for_each_tracing_cpu(cpu) |
1559 | ring_buffer_iter_reset(iter->buffer_iter[cpu]); | 1708 | tracing_iter_reset(iter, cpu); |
1560 | } else | 1709 | } else |
1561 | ring_buffer_iter_reset(iter->buffer_iter[cpu_file]); | 1710 | tracing_iter_reset(iter, cpu_file); |
1562 | |||
1563 | 1711 | ||
1564 | ftrace_enable_cpu(); | 1712 | ftrace_enable_cpu(); |
1565 | 1713 | ||
1714 | iter->leftover = 0; | ||
1566 | for (p = iter; p && l < *pos; p = s_next(m, p, &l)) | 1715 | for (p = iter; p && l < *pos; p = s_next(m, p, &l)) |
1567 | ; | 1716 | ; |
1568 | 1717 | ||
1569 | } else { | 1718 | } else { |
1570 | l = *pos - 1; | 1719 | /* |
1571 | p = s_next(m, p, &l); | 1720 | * If we overflowed the seq_file before, then we want |
1721 | * to just reuse the trace_seq buffer again. | ||
1722 | */ | ||
1723 | if (iter->leftover) | ||
1724 | p = iter; | ||
1725 | else { | ||
1726 | l = *pos - 1; | ||
1727 | p = s_next(m, p, &l); | ||
1728 | } | ||
1572 | } | 1729 | } |
1573 | 1730 | ||
1574 | trace_event_read_lock(); | 1731 | trace_event_read_lock(); |
1732 | trace_access_lock(cpu_file); | ||
1575 | return p; | 1733 | return p; |
1576 | } | 1734 | } |
1577 | 1735 | ||
1578 | static void s_stop(struct seq_file *m, void *p) | 1736 | static void s_stop(struct seq_file *m, void *p) |
1579 | { | 1737 | { |
1738 | struct trace_iterator *iter = m->private; | ||
1739 | |||
1580 | atomic_dec(&trace_record_cmdline_disabled); | 1740 | atomic_dec(&trace_record_cmdline_disabled); |
1741 | trace_access_unlock(iter->cpu_file); | ||
1581 | trace_event_read_unlock(); | 1742 | trace_event_read_unlock(); |
1582 | } | 1743 | } |
1583 | 1744 | ||
@@ -1588,10 +1749,10 @@ static void print_lat_help_header(struct seq_file *m) | |||
1588 | seq_puts(m, "# | / _----=> need-resched \n"); | 1749 | seq_puts(m, "# | / _----=> need-resched \n"); |
1589 | seq_puts(m, "# || / _---=> hardirq/softirq \n"); | 1750 | seq_puts(m, "# || / _---=> hardirq/softirq \n"); |
1590 | seq_puts(m, "# ||| / _--=> preempt-depth \n"); | 1751 | seq_puts(m, "# ||| / _--=> preempt-depth \n"); |
1591 | seq_puts(m, "# |||| / \n"); | 1752 | seq_puts(m, "# |||| /_--=> lock-depth \n"); |
1592 | seq_puts(m, "# ||||| delay \n"); | 1753 | seq_puts(m, "# |||||/ delay \n"); |
1593 | seq_puts(m, "# cmd pid ||||| time | caller \n"); | 1754 | seq_puts(m, "# cmd pid |||||| time | caller \n"); |
1594 | seq_puts(m, "# \\ / ||||| \\ | / \n"); | 1755 | seq_puts(m, "# \\ / |||||| \\ | / \n"); |
1595 | } | 1756 | } |
1596 | 1757 | ||
1597 | static void print_func_help_header(struct seq_file *m) | 1758 | static void print_func_help_header(struct seq_file *m) |
@@ -1601,23 +1762,39 @@ static void print_func_help_header(struct seq_file *m) | |||
1601 | } | 1762 | } |
1602 | 1763 | ||
1603 | 1764 | ||
1604 | static void | 1765 | void |
1605 | print_trace_header(struct seq_file *m, struct trace_iterator *iter) | 1766 | print_trace_header(struct seq_file *m, struct trace_iterator *iter) |
1606 | { | 1767 | { |
1607 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); | 1768 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); |
1608 | struct trace_array *tr = iter->tr; | 1769 | struct trace_array *tr = iter->tr; |
1609 | struct trace_array_cpu *data = tr->data[tr->cpu]; | 1770 | struct trace_array_cpu *data = tr->data[tr->cpu]; |
1610 | struct tracer *type = current_trace; | 1771 | struct tracer *type = current_trace; |
1611 | unsigned long total; | 1772 | unsigned long entries = 0; |
1612 | unsigned long entries; | 1773 | unsigned long total = 0; |
1774 | unsigned long count; | ||
1613 | const char *name = "preemption"; | 1775 | const char *name = "preemption"; |
1776 | int cpu; | ||
1614 | 1777 | ||
1615 | if (type) | 1778 | if (type) |
1616 | name = type->name; | 1779 | name = type->name; |
1617 | 1780 | ||
1618 | entries = ring_buffer_entries(iter->tr->buffer); | 1781 | |
1619 | total = entries + | 1782 | for_each_tracing_cpu(cpu) { |
1620 | ring_buffer_overruns(iter->tr->buffer); | 1783 | count = ring_buffer_entries_cpu(tr->buffer, cpu); |
1784 | /* | ||
1785 | * If this buffer has skipped entries, then we hold all | ||
1786 | * entries for the trace and we need to ignore the | ||
1787 | * ones before the time stamp. | ||
1788 | */ | ||
1789 | if (tr->data[cpu]->skipped_entries) { | ||
1790 | count -= tr->data[cpu]->skipped_entries; | ||
1791 | /* total is the same as the entries */ | ||
1792 | total += count; | ||
1793 | } else | ||
1794 | total += count + | ||
1795 | ring_buffer_overrun_cpu(tr->buffer, cpu); | ||
1796 | entries += count; | ||
1797 | } | ||
1621 | 1798 | ||
1622 | seq_printf(m, "# %s latency trace v1.1.5 on %s\n", | 1799 | seq_printf(m, "# %s latency trace v1.1.5 on %s\n", |
1623 | name, UTS_RELEASE); | 1800 | name, UTS_RELEASE); |
@@ -1659,7 +1836,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) | |||
1659 | seq_puts(m, "\n# => ended at: "); | 1836 | seq_puts(m, "\n# => ended at: "); |
1660 | seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); | 1837 | seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); |
1661 | trace_print_seq(m, &iter->seq); | 1838 | trace_print_seq(m, &iter->seq); |
1662 | seq_puts(m, "#\n"); | 1839 | seq_puts(m, "\n#\n"); |
1663 | } | 1840 | } |
1664 | 1841 | ||
1665 | seq_puts(m, "#\n"); | 1842 | seq_puts(m, "#\n"); |
@@ -1678,6 +1855,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter) | |||
1678 | if (cpumask_test_cpu(iter->cpu, iter->started)) | 1855 | if (cpumask_test_cpu(iter->cpu, iter->started)) |
1679 | return; | 1856 | return; |
1680 | 1857 | ||
1858 | if (iter->tr->data[iter->cpu]->skipped_entries) | ||
1859 | return; | ||
1860 | |||
1681 | cpumask_set_cpu(iter->cpu, iter->started); | 1861 | cpumask_set_cpu(iter->cpu, iter->started); |
1682 | 1862 | ||
1683 | /* Don't print started cpu buffer for the first entry of the trace */ | 1863 | /* Don't print started cpu buffer for the first entry of the trace */ |
@@ -1710,7 +1890,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) | |||
1710 | } | 1890 | } |
1711 | 1891 | ||
1712 | if (event) | 1892 | if (event) |
1713 | return event->trace(iter, sym_flags); | 1893 | return event->funcs->trace(iter, sym_flags, event); |
1714 | 1894 | ||
1715 | if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) | 1895 | if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) |
1716 | goto partial; | 1896 | goto partial; |
@@ -1736,7 +1916,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) | |||
1736 | 1916 | ||
1737 | event = ftrace_find_event(entry->type); | 1917 | event = ftrace_find_event(entry->type); |
1738 | if (event) | 1918 | if (event) |
1739 | return event->raw(iter, 0); | 1919 | return event->funcs->raw(iter, 0, event); |
1740 | 1920 | ||
1741 | if (!trace_seq_printf(s, "%d ?\n", entry->type)) | 1921 | if (!trace_seq_printf(s, "%d ?\n", entry->type)) |
1742 | goto partial; | 1922 | goto partial; |
@@ -1763,7 +1943,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) | |||
1763 | 1943 | ||
1764 | event = ftrace_find_event(entry->type); | 1944 | event = ftrace_find_event(entry->type); |
1765 | if (event) { | 1945 | if (event) { |
1766 | enum print_line_t ret = event->hex(iter, 0); | 1946 | enum print_line_t ret = event->funcs->hex(iter, 0, event); |
1767 | if (ret != TRACE_TYPE_HANDLED) | 1947 | if (ret != TRACE_TYPE_HANDLED) |
1768 | return ret; | 1948 | return ret; |
1769 | } | 1949 | } |
@@ -1788,10 +1968,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) | |||
1788 | } | 1968 | } |
1789 | 1969 | ||
1790 | event = ftrace_find_event(entry->type); | 1970 | event = ftrace_find_event(entry->type); |
1791 | return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; | 1971 | return event ? event->funcs->binary(iter, 0, event) : |
1972 | TRACE_TYPE_HANDLED; | ||
1792 | } | 1973 | } |
1793 | 1974 | ||
1794 | static int trace_empty(struct trace_iterator *iter) | 1975 | int trace_empty(struct trace_iterator *iter) |
1795 | { | 1976 | { |
1796 | int cpu; | 1977 | int cpu; |
1797 | 1978 | ||
@@ -1822,10 +2003,14 @@ static int trace_empty(struct trace_iterator *iter) | |||
1822 | } | 2003 | } |
1823 | 2004 | ||
1824 | /* Called with trace_event_read_lock() held. */ | 2005 | /* Called with trace_event_read_lock() held. */ |
1825 | static enum print_line_t print_trace_line(struct trace_iterator *iter) | 2006 | enum print_line_t print_trace_line(struct trace_iterator *iter) |
1826 | { | 2007 | { |
1827 | enum print_line_t ret; | 2008 | enum print_line_t ret; |
1828 | 2009 | ||
2010 | if (iter->lost_events) | ||
2011 | trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", | ||
2012 | iter->cpu, iter->lost_events); | ||
2013 | |||
1829 | if (iter->trace && iter->trace->print_line) { | 2014 | if (iter->trace && iter->trace->print_line) { |
1830 | ret = iter->trace->print_line(iter); | 2015 | ret = iter->trace->print_line(iter); |
1831 | if (ret != TRACE_TYPE_UNHANDLED) | 2016 | if (ret != TRACE_TYPE_UNHANDLED) |
@@ -1854,9 +2039,27 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) | |||
1854 | return print_trace_fmt(iter); | 2039 | return print_trace_fmt(iter); |
1855 | } | 2040 | } |
1856 | 2041 | ||
2042 | void trace_default_header(struct seq_file *m) | ||
2043 | { | ||
2044 | struct trace_iterator *iter = m->private; | ||
2045 | |||
2046 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { | ||
2047 | /* print nothing if the buffers are empty */ | ||
2048 | if (trace_empty(iter)) | ||
2049 | return; | ||
2050 | print_trace_header(m, iter); | ||
2051 | if (!(trace_flags & TRACE_ITER_VERBOSE)) | ||
2052 | print_lat_help_header(m); | ||
2053 | } else { | ||
2054 | if (!(trace_flags & TRACE_ITER_VERBOSE)) | ||
2055 | print_func_help_header(m); | ||
2056 | } | ||
2057 | } | ||
2058 | |||
1857 | static int s_show(struct seq_file *m, void *v) | 2059 | static int s_show(struct seq_file *m, void *v) |
1858 | { | 2060 | { |
1859 | struct trace_iterator *iter = v; | 2061 | struct trace_iterator *iter = v; |
2062 | int ret; | ||
1860 | 2063 | ||
1861 | if (iter->ent == NULL) { | 2064 | if (iter->ent == NULL) { |
1862 | if (iter->tr) { | 2065 | if (iter->tr) { |
@@ -1865,26 +2068,36 @@ static int s_show(struct seq_file *m, void *v) | |||
1865 | } | 2068 | } |
1866 | if (iter->trace && iter->trace->print_header) | 2069 | if (iter->trace && iter->trace->print_header) |
1867 | iter->trace->print_header(m); | 2070 | iter->trace->print_header(m); |
1868 | else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { | 2071 | else |
1869 | /* print nothing if the buffers are empty */ | 2072 | trace_default_header(m); |
1870 | if (trace_empty(iter)) | 2073 | |
1871 | return 0; | 2074 | } else if (iter->leftover) { |
1872 | print_trace_header(m, iter); | 2075 | /* |
1873 | if (!(trace_flags & TRACE_ITER_VERBOSE)) | 2076 | * If we filled the seq_file buffer earlier, we |
1874 | print_lat_help_header(m); | 2077 | * want to just show it now. |
1875 | } else { | 2078 | */ |
1876 | if (!(trace_flags & TRACE_ITER_VERBOSE)) | 2079 | ret = trace_print_seq(m, &iter->seq); |
1877 | print_func_help_header(m); | 2080 | |
1878 | } | 2081 | /* ret should this time be zero, but you never know */ |
2082 | iter->leftover = ret; | ||
2083 | |||
1879 | } else { | 2084 | } else { |
1880 | print_trace_line(iter); | 2085 | print_trace_line(iter); |
1881 | trace_print_seq(m, &iter->seq); | 2086 | ret = trace_print_seq(m, &iter->seq); |
2087 | /* | ||
2088 | * If we overflow the seq_file buffer, then it will | ||
2089 | * ask us for this data again at start up. | ||
2090 | * Use that instead. | ||
2091 | * ret is 0 if seq_file write succeeded. | ||
2092 | * -1 otherwise. | ||
2093 | */ | ||
2094 | iter->leftover = ret; | ||
1882 | } | 2095 | } |
1883 | 2096 | ||
1884 | return 0; | 2097 | return 0; |
1885 | } | 2098 | } |
1886 | 2099 | ||
1887 | static struct seq_operations tracer_seq_ops = { | 2100 | static const struct seq_operations tracer_seq_ops = { |
1888 | .start = s_start, | 2101 | .start = s_start, |
1889 | .next = s_next, | 2102 | .next = s_next, |
1890 | .stop = s_stop, | 2103 | .stop = s_stop, |
@@ -1919,11 +2132,9 @@ __tracing_open(struct inode *inode, struct file *file) | |||
1919 | if (current_trace) | 2132 | if (current_trace) |
1920 | *iter->trace = *current_trace; | 2133 | *iter->trace = *current_trace; |
1921 | 2134 | ||
1922 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) | 2135 | if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) |
1923 | goto fail; | 2136 | goto fail; |
1924 | 2137 | ||
1925 | cpumask_clear(iter->started); | ||
1926 | |||
1927 | if (current_trace && current_trace->print_max) | 2138 | if (current_trace && current_trace->print_max) |
1928 | iter->tr = &max_tr; | 2139 | iter->tr = &max_tr; |
1929 | else | 2140 | else |
@@ -1940,19 +2151,28 @@ __tracing_open(struct inode *inode, struct file *file) | |||
1940 | if (ring_buffer_overruns(iter->tr->buffer)) | 2151 | if (ring_buffer_overruns(iter->tr->buffer)) |
1941 | iter->iter_flags |= TRACE_FILE_ANNOTATE; | 2152 | iter->iter_flags |= TRACE_FILE_ANNOTATE; |
1942 | 2153 | ||
2154 | /* stop the trace while dumping */ | ||
2155 | tracing_stop(); | ||
2156 | |||
1943 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { | 2157 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { |
1944 | for_each_tracing_cpu(cpu) { | 2158 | for_each_tracing_cpu(cpu) { |
1945 | |||
1946 | iter->buffer_iter[cpu] = | 2159 | iter->buffer_iter[cpu] = |
1947 | ring_buffer_read_start(iter->tr->buffer, cpu); | 2160 | ring_buffer_read_prepare(iter->tr->buffer, cpu); |
2161 | } | ||
2162 | ring_buffer_read_prepare_sync(); | ||
2163 | for_each_tracing_cpu(cpu) { | ||
2164 | ring_buffer_read_start(iter->buffer_iter[cpu]); | ||
2165 | tracing_iter_reset(iter, cpu); | ||
1948 | } | 2166 | } |
1949 | } else { | 2167 | } else { |
1950 | cpu = iter->cpu_file; | 2168 | cpu = iter->cpu_file; |
1951 | iter->buffer_iter[cpu] = | 2169 | iter->buffer_iter[cpu] = |
1952 | ring_buffer_read_start(iter->tr->buffer, cpu); | 2170 | ring_buffer_read_prepare(iter->tr->buffer, cpu); |
2171 | ring_buffer_read_prepare_sync(); | ||
2172 | ring_buffer_read_start(iter->buffer_iter[cpu]); | ||
2173 | tracing_iter_reset(iter, cpu); | ||
1953 | } | 2174 | } |
1954 | 2175 | ||
1955 | /* TODO stop tracer */ | ||
1956 | ret = seq_open(file, &tracer_seq_ops); | 2176 | ret = seq_open(file, &tracer_seq_ops); |
1957 | if (ret < 0) { | 2177 | if (ret < 0) { |
1958 | fail_ret = ERR_PTR(ret); | 2178 | fail_ret = ERR_PTR(ret); |
@@ -1962,9 +2182,6 @@ __tracing_open(struct inode *inode, struct file *file) | |||
1962 | m = file->private_data; | 2182 | m = file->private_data; |
1963 | m->private = iter; | 2183 | m->private = iter; |
1964 | 2184 | ||
1965 | /* stop the trace while dumping */ | ||
1966 | tracing_stop(); | ||
1967 | |||
1968 | mutex_unlock(&trace_types_lock); | 2185 | mutex_unlock(&trace_types_lock); |
1969 | 2186 | ||
1970 | return iter; | 2187 | return iter; |
@@ -1975,6 +2192,7 @@ __tracing_open(struct inode *inode, struct file *file) | |||
1975 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | 2192 | ring_buffer_read_finish(iter->buffer_iter[cpu]); |
1976 | } | 2193 | } |
1977 | free_cpumask_var(iter->started); | 2194 | free_cpumask_var(iter->started); |
2195 | tracing_start(); | ||
1978 | fail: | 2196 | fail: |
1979 | mutex_unlock(&trace_types_lock); | 2197 | mutex_unlock(&trace_types_lock); |
1980 | kfree(iter->trace); | 2198 | kfree(iter->trace); |
@@ -1994,7 +2212,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp) | |||
1994 | 2212 | ||
1995 | static int tracing_release(struct inode *inode, struct file *file) | 2213 | static int tracing_release(struct inode *inode, struct file *file) |
1996 | { | 2214 | { |
1997 | struct seq_file *m = (struct seq_file *)file->private_data; | 2215 | struct seq_file *m = file->private_data; |
1998 | struct trace_iterator *iter; | 2216 | struct trace_iterator *iter; |
1999 | int cpu; | 2217 | int cpu; |
2000 | 2218 | ||
@@ -2031,7 +2249,7 @@ static int tracing_open(struct inode *inode, struct file *file) | |||
2031 | 2249 | ||
2032 | /* If this file was open for write, then erase contents */ | 2250 | /* If this file was open for write, then erase contents */ |
2033 | if ((file->f_mode & FMODE_WRITE) && | 2251 | if ((file->f_mode & FMODE_WRITE) && |
2034 | !(file->f_flags & O_APPEND)) { | 2252 | (file->f_flags & O_TRUNC)) { |
2035 | long cpu = (long) inode->i_private; | 2253 | long cpu = (long) inode->i_private; |
2036 | 2254 | ||
2037 | if (cpu == TRACE_PIPE_ALL_CPU) | 2255 | if (cpu == TRACE_PIPE_ALL_CPU) |
@@ -2053,25 +2271,23 @@ static int tracing_open(struct inode *inode, struct file *file) | |||
2053 | static void * | 2271 | static void * |
2054 | t_next(struct seq_file *m, void *v, loff_t *pos) | 2272 | t_next(struct seq_file *m, void *v, loff_t *pos) |
2055 | { | 2273 | { |
2056 | struct tracer *t = m->private; | 2274 | struct tracer *t = v; |
2057 | 2275 | ||
2058 | (*pos)++; | 2276 | (*pos)++; |
2059 | 2277 | ||
2060 | if (t) | 2278 | if (t) |
2061 | t = t->next; | 2279 | t = t->next; |
2062 | 2280 | ||
2063 | m->private = t; | ||
2064 | |||
2065 | return t; | 2281 | return t; |
2066 | } | 2282 | } |
2067 | 2283 | ||
2068 | static void *t_start(struct seq_file *m, loff_t *pos) | 2284 | static void *t_start(struct seq_file *m, loff_t *pos) |
2069 | { | 2285 | { |
2070 | struct tracer *t = m->private; | 2286 | struct tracer *t; |
2071 | loff_t l = 0; | 2287 | loff_t l = 0; |
2072 | 2288 | ||
2073 | mutex_lock(&trace_types_lock); | 2289 | mutex_lock(&trace_types_lock); |
2074 | for (; t && l < *pos; t = t_next(m, t, &l)) | 2290 | for (t = trace_types; t && l < *pos; t = t_next(m, t, &l)) |
2075 | ; | 2291 | ; |
2076 | 2292 | ||
2077 | return t; | 2293 | return t; |
@@ -2098,7 +2314,7 @@ static int t_show(struct seq_file *m, void *v) | |||
2098 | return 0; | 2314 | return 0; |
2099 | } | 2315 | } |
2100 | 2316 | ||
2101 | static struct seq_operations show_traces_seq_ops = { | 2317 | static const struct seq_operations show_traces_seq_ops = { |
2102 | .start = t_start, | 2318 | .start = t_start, |
2103 | .next = t_next, | 2319 | .next = t_next, |
2104 | .stop = t_stop, | 2320 | .stop = t_stop, |
@@ -2107,18 +2323,10 @@ static struct seq_operations show_traces_seq_ops = { | |||
2107 | 2323 | ||
2108 | static int show_traces_open(struct inode *inode, struct file *file) | 2324 | static int show_traces_open(struct inode *inode, struct file *file) |
2109 | { | 2325 | { |
2110 | int ret; | ||
2111 | |||
2112 | if (tracing_disabled) | 2326 | if (tracing_disabled) |
2113 | return -ENODEV; | 2327 | return -ENODEV; |
2114 | 2328 | ||
2115 | ret = seq_open(file, &show_traces_seq_ops); | 2329 | return seq_open(file, &show_traces_seq_ops); |
2116 | if (!ret) { | ||
2117 | struct seq_file *m = file->private_data; | ||
2118 | m->private = trace_types; | ||
2119 | } | ||
2120 | |||
2121 | return ret; | ||
2122 | } | 2330 | } |
2123 | 2331 | ||
2124 | static ssize_t | 2332 | static ssize_t |
@@ -2128,11 +2336,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, | |||
2128 | return count; | 2336 | return count; |
2129 | } | 2337 | } |
2130 | 2338 | ||
2339 | static loff_t tracing_seek(struct file *file, loff_t offset, int origin) | ||
2340 | { | ||
2341 | if (file->f_mode & FMODE_READ) | ||
2342 | return seq_lseek(file, offset, origin); | ||
2343 | else | ||
2344 | return 0; | ||
2345 | } | ||
2346 | |||
2131 | static const struct file_operations tracing_fops = { | 2347 | static const struct file_operations tracing_fops = { |
2132 | .open = tracing_open, | 2348 | .open = tracing_open, |
2133 | .read = seq_read, | 2349 | .read = seq_read, |
2134 | .write = tracing_write_stub, | 2350 | .write = tracing_write_stub, |
2135 | .llseek = seq_lseek, | 2351 | .llseek = tracing_seek, |
2136 | .release = tracing_release, | 2352 | .release = tracing_release, |
2137 | }; | 2353 | }; |
2138 | 2354 | ||
@@ -2140,6 +2356,7 @@ static const struct file_operations show_traces_fops = { | |||
2140 | .open = show_traces_open, | 2356 | .open = show_traces_open, |
2141 | .read = seq_read, | 2357 | .read = seq_read, |
2142 | .release = seq_release, | 2358 | .release = seq_release, |
2359 | .llseek = seq_lseek, | ||
2143 | }; | 2360 | }; |
2144 | 2361 | ||
2145 | /* | 2362 | /* |
@@ -2198,7 +2415,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
2198 | mutex_lock(&tracing_cpumask_update_lock); | 2415 | mutex_lock(&tracing_cpumask_update_lock); |
2199 | 2416 | ||
2200 | local_irq_disable(); | 2417 | local_irq_disable(); |
2201 | __raw_spin_lock(&ftrace_max_lock); | 2418 | arch_spin_lock(&ftrace_max_lock); |
2202 | for_each_tracing_cpu(cpu) { | 2419 | for_each_tracing_cpu(cpu) { |
2203 | /* | 2420 | /* |
2204 | * Increase/decrease the disabled counter if we are | 2421 | * Increase/decrease the disabled counter if we are |
@@ -2213,7 +2430,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
2213 | atomic_dec(&global_trace.data[cpu]->disabled); | 2430 | atomic_dec(&global_trace.data[cpu]->disabled); |
2214 | } | 2431 | } |
2215 | } | 2432 | } |
2216 | __raw_spin_unlock(&ftrace_max_lock); | 2433 | arch_spin_unlock(&ftrace_max_lock); |
2217 | local_irq_enable(); | 2434 | local_irq_enable(); |
2218 | 2435 | ||
2219 | cpumask_copy(tracing_cpumask, tracing_cpumask_new); | 2436 | cpumask_copy(tracing_cpumask, tracing_cpumask_new); |
@@ -2233,103 +2450,70 @@ static const struct file_operations tracing_cpumask_fops = { | |||
2233 | .open = tracing_open_generic, | 2450 | .open = tracing_open_generic, |
2234 | .read = tracing_cpumask_read, | 2451 | .read = tracing_cpumask_read, |
2235 | .write = tracing_cpumask_write, | 2452 | .write = tracing_cpumask_write, |
2453 | .llseek = generic_file_llseek, | ||
2236 | }; | 2454 | }; |
2237 | 2455 | ||
2238 | static ssize_t | 2456 | static int tracing_trace_options_show(struct seq_file *m, void *v) |
2239 | tracing_trace_options_read(struct file *filp, char __user *ubuf, | ||
2240 | size_t cnt, loff_t *ppos) | ||
2241 | { | 2457 | { |
2242 | struct tracer_opt *trace_opts; | 2458 | struct tracer_opt *trace_opts; |
2243 | u32 tracer_flags; | 2459 | u32 tracer_flags; |
2244 | int len = 0; | ||
2245 | char *buf; | ||
2246 | int r = 0; | ||
2247 | int i; | 2460 | int i; |
2248 | 2461 | ||
2249 | |||
2250 | /* calculate max size */ | ||
2251 | for (i = 0; trace_options[i]; i++) { | ||
2252 | len += strlen(trace_options[i]); | ||
2253 | len += 3; /* "no" and newline */ | ||
2254 | } | ||
2255 | |||
2256 | mutex_lock(&trace_types_lock); | 2462 | mutex_lock(&trace_types_lock); |
2257 | tracer_flags = current_trace->flags->val; | 2463 | tracer_flags = current_trace->flags->val; |
2258 | trace_opts = current_trace->flags->opts; | 2464 | trace_opts = current_trace->flags->opts; |
2259 | 2465 | ||
2260 | /* | ||
2261 | * Increase the size with names of options specific | ||
2262 | * of the current tracer. | ||
2263 | */ | ||
2264 | for (i = 0; trace_opts[i].name; i++) { | ||
2265 | len += strlen(trace_opts[i].name); | ||
2266 | len += 3; /* "no" and newline */ | ||
2267 | } | ||
2268 | |||
2269 | /* +2 for \n and \0 */ | ||
2270 | buf = kmalloc(len + 2, GFP_KERNEL); | ||
2271 | if (!buf) { | ||
2272 | mutex_unlock(&trace_types_lock); | ||
2273 | return -ENOMEM; | ||
2274 | } | ||
2275 | |||
2276 | for (i = 0; trace_options[i]; i++) { | 2466 | for (i = 0; trace_options[i]; i++) { |
2277 | if (trace_flags & (1 << i)) | 2467 | if (trace_flags & (1 << i)) |
2278 | r += sprintf(buf + r, "%s\n", trace_options[i]); | 2468 | seq_printf(m, "%s\n", trace_options[i]); |
2279 | else | 2469 | else |
2280 | r += sprintf(buf + r, "no%s\n", trace_options[i]); | 2470 | seq_printf(m, "no%s\n", trace_options[i]); |
2281 | } | 2471 | } |
2282 | 2472 | ||
2283 | for (i = 0; trace_opts[i].name; i++) { | 2473 | for (i = 0; trace_opts[i].name; i++) { |
2284 | if (tracer_flags & trace_opts[i].bit) | 2474 | if (tracer_flags & trace_opts[i].bit) |
2285 | r += sprintf(buf + r, "%s\n", | 2475 | seq_printf(m, "%s\n", trace_opts[i].name); |
2286 | trace_opts[i].name); | ||
2287 | else | 2476 | else |
2288 | r += sprintf(buf + r, "no%s\n", | 2477 | seq_printf(m, "no%s\n", trace_opts[i].name); |
2289 | trace_opts[i].name); | ||
2290 | } | 2478 | } |
2291 | mutex_unlock(&trace_types_lock); | 2479 | mutex_unlock(&trace_types_lock); |
2292 | 2480 | ||
2293 | WARN_ON(r >= len + 2); | 2481 | return 0; |
2482 | } | ||
2294 | 2483 | ||
2295 | r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 2484 | static int __set_tracer_option(struct tracer *trace, |
2485 | struct tracer_flags *tracer_flags, | ||
2486 | struct tracer_opt *opts, int neg) | ||
2487 | { | ||
2488 | int ret; | ||
2296 | 2489 | ||
2297 | kfree(buf); | 2490 | ret = trace->set_flag(tracer_flags->val, opts->bit, !neg); |
2298 | return r; | 2491 | if (ret) |
2492 | return ret; | ||
2493 | |||
2494 | if (neg) | ||
2495 | tracer_flags->val &= ~opts->bit; | ||
2496 | else | ||
2497 | tracer_flags->val |= opts->bit; | ||
2498 | return 0; | ||
2299 | } | 2499 | } |
2300 | 2500 | ||
2301 | /* Try to assign a tracer specific option */ | 2501 | /* Try to assign a tracer specific option */ |
2302 | static int set_tracer_option(struct tracer *trace, char *cmp, int neg) | 2502 | static int set_tracer_option(struct tracer *trace, char *cmp, int neg) |
2303 | { | 2503 | { |
2304 | struct tracer_flags *trace_flags = trace->flags; | 2504 | struct tracer_flags *tracer_flags = trace->flags; |
2305 | struct tracer_opt *opts = NULL; | 2505 | struct tracer_opt *opts = NULL; |
2306 | int ret = 0, i = 0; | 2506 | int i; |
2307 | int len; | ||
2308 | 2507 | ||
2309 | for (i = 0; trace_flags->opts[i].name; i++) { | 2508 | for (i = 0; tracer_flags->opts[i].name; i++) { |
2310 | opts = &trace_flags->opts[i]; | 2509 | opts = &tracer_flags->opts[i]; |
2311 | len = strlen(opts->name); | ||
2312 | 2510 | ||
2313 | if (strncmp(cmp, opts->name, len) == 0) { | 2511 | if (strcmp(cmp, opts->name) == 0) |
2314 | ret = trace->set_flag(trace_flags->val, | 2512 | return __set_tracer_option(trace, trace->flags, |
2315 | opts->bit, !neg); | 2513 | opts, neg); |
2316 | break; | ||
2317 | } | ||
2318 | } | 2514 | } |
2319 | /* Not found */ | ||
2320 | if (!trace_flags->opts[i].name) | ||
2321 | return -EINVAL; | ||
2322 | |||
2323 | /* Refused to handle */ | ||
2324 | if (ret) | ||
2325 | return ret; | ||
2326 | 2515 | ||
2327 | if (neg) | 2516 | return -EINVAL; |
2328 | trace_flags->val &= ~opts->bit; | ||
2329 | else | ||
2330 | trace_flags->val |= opts->bit; | ||
2331 | |||
2332 | return 0; | ||
2333 | } | 2517 | } |
2334 | 2518 | ||
2335 | static void set_tracer_flags(unsigned int mask, int enabled) | 2519 | static void set_tracer_flags(unsigned int mask, int enabled) |
@@ -2343,21 +2527,8 @@ static void set_tracer_flags(unsigned int mask, int enabled) | |||
2343 | else | 2527 | else |
2344 | trace_flags &= ~mask; | 2528 | trace_flags &= ~mask; |
2345 | 2529 | ||
2346 | if (mask == TRACE_ITER_GLOBAL_CLK) { | 2530 | if (mask == TRACE_ITER_RECORD_CMD) |
2347 | u64 (*func)(void); | 2531 | trace_event_enable_cmd_record(enabled); |
2348 | |||
2349 | if (enabled) | ||
2350 | func = trace_clock_global; | ||
2351 | else | ||
2352 | func = trace_clock_local; | ||
2353 | |||
2354 | mutex_lock(&trace_types_lock); | ||
2355 | ring_buffer_set_clock(global_trace.buffer, func); | ||
2356 | |||
2357 | if (max_tr.buffer) | ||
2358 | ring_buffer_set_clock(max_tr.buffer, func); | ||
2359 | mutex_unlock(&trace_types_lock); | ||
2360 | } | ||
2361 | } | 2532 | } |
2362 | 2533 | ||
2363 | static ssize_t | 2534 | static ssize_t |
@@ -2365,7 +2536,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, | |||
2365 | size_t cnt, loff_t *ppos) | 2536 | size_t cnt, loff_t *ppos) |
2366 | { | 2537 | { |
2367 | char buf[64]; | 2538 | char buf[64]; |
2368 | char *cmp = buf; | 2539 | char *cmp; |
2369 | int neg = 0; | 2540 | int neg = 0; |
2370 | int ret; | 2541 | int ret; |
2371 | int i; | 2542 | int i; |
@@ -2377,16 +2548,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, | |||
2377 | return -EFAULT; | 2548 | return -EFAULT; |
2378 | 2549 | ||
2379 | buf[cnt] = 0; | 2550 | buf[cnt] = 0; |
2551 | cmp = strstrip(buf); | ||
2380 | 2552 | ||
2381 | if (strncmp(buf, "no", 2) == 0) { | 2553 | if (strncmp(cmp, "no", 2) == 0) { |
2382 | neg = 1; | 2554 | neg = 1; |
2383 | cmp += 2; | 2555 | cmp += 2; |
2384 | } | 2556 | } |
2385 | 2557 | ||
2386 | for (i = 0; trace_options[i]; i++) { | 2558 | for (i = 0; trace_options[i]; i++) { |
2387 | int len = strlen(trace_options[i]); | 2559 | if (strcmp(cmp, trace_options[i]) == 0) { |
2388 | |||
2389 | if (strncmp(cmp, trace_options[i], len) == 0) { | ||
2390 | set_tracer_flags(1 << i, !neg); | 2560 | set_tracer_flags(1 << i, !neg); |
2391 | break; | 2561 | break; |
2392 | } | 2562 | } |
@@ -2401,14 +2571,23 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, | |||
2401 | return ret; | 2571 | return ret; |
2402 | } | 2572 | } |
2403 | 2573 | ||
2404 | filp->f_pos += cnt; | 2574 | *ppos += cnt; |
2405 | 2575 | ||
2406 | return cnt; | 2576 | return cnt; |
2407 | } | 2577 | } |
2408 | 2578 | ||
2579 | static int tracing_trace_options_open(struct inode *inode, struct file *file) | ||
2580 | { | ||
2581 | if (tracing_disabled) | ||
2582 | return -ENODEV; | ||
2583 | return single_open(file, tracing_trace_options_show, NULL); | ||
2584 | } | ||
2585 | |||
2409 | static const struct file_operations tracing_iter_fops = { | 2586 | static const struct file_operations tracing_iter_fops = { |
2410 | .open = tracing_open_generic, | 2587 | .open = tracing_trace_options_open, |
2411 | .read = tracing_trace_options_read, | 2588 | .read = seq_read, |
2589 | .llseek = seq_lseek, | ||
2590 | .release = single_release, | ||
2412 | .write = tracing_trace_options_write, | 2591 | .write = tracing_trace_options_write, |
2413 | }; | 2592 | }; |
2414 | 2593 | ||
@@ -2441,6 +2620,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf, | |||
2441 | static const struct file_operations tracing_readme_fops = { | 2620 | static const struct file_operations tracing_readme_fops = { |
2442 | .open = tracing_open_generic, | 2621 | .open = tracing_open_generic, |
2443 | .read = tracing_readme_read, | 2622 | .read = tracing_readme_read, |
2623 | .llseek = generic_file_llseek, | ||
2444 | }; | 2624 | }; |
2445 | 2625 | ||
2446 | static ssize_t | 2626 | static ssize_t |
@@ -2491,6 +2671,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, | |||
2491 | static const struct file_operations tracing_saved_cmdlines_fops = { | 2671 | static const struct file_operations tracing_saved_cmdlines_fops = { |
2492 | .open = tracing_open_generic, | 2672 | .open = tracing_open_generic, |
2493 | .read = tracing_saved_cmdlines_read, | 2673 | .read = tracing_saved_cmdlines_read, |
2674 | .llseek = generic_file_llseek, | ||
2494 | }; | 2675 | }; |
2495 | 2676 | ||
2496 | static ssize_t | 2677 | static ssize_t |
@@ -2543,7 +2724,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, | |||
2543 | } | 2724 | } |
2544 | mutex_unlock(&trace_types_lock); | 2725 | mutex_unlock(&trace_types_lock); |
2545 | 2726 | ||
2546 | filp->f_pos += cnt; | 2727 | *ppos += cnt; |
2547 | 2728 | ||
2548 | return cnt; | 2729 | return cnt; |
2549 | } | 2730 | } |
@@ -2552,7 +2733,7 @@ static ssize_t | |||
2552 | tracing_set_trace_read(struct file *filp, char __user *ubuf, | 2733 | tracing_set_trace_read(struct file *filp, char __user *ubuf, |
2553 | size_t cnt, loff_t *ppos) | 2734 | size_t cnt, loff_t *ppos) |
2554 | { | 2735 | { |
2555 | char buf[max_tracer_type_len+2]; | 2736 | char buf[MAX_TRACER_SIZE+2]; |
2556 | int r; | 2737 | int r; |
2557 | 2738 | ||
2558 | mutex_lock(&trace_types_lock); | 2739 | mutex_lock(&trace_types_lock); |
@@ -2586,6 +2767,9 @@ static int tracing_resize_ring_buffer(unsigned long size) | |||
2586 | if (ret < 0) | 2767 | if (ret < 0) |
2587 | return ret; | 2768 | return ret; |
2588 | 2769 | ||
2770 | if (!current_trace->use_max_tr) | ||
2771 | goto out; | ||
2772 | |||
2589 | ret = ring_buffer_resize(max_tr.buffer, size); | 2773 | ret = ring_buffer_resize(max_tr.buffer, size); |
2590 | if (ret < 0) { | 2774 | if (ret < 0) { |
2591 | int r; | 2775 | int r; |
@@ -2613,11 +2797,14 @@ static int tracing_resize_ring_buffer(unsigned long size) | |||
2613 | return ret; | 2797 | return ret; |
2614 | } | 2798 | } |
2615 | 2799 | ||
2800 | max_tr.entries = size; | ||
2801 | out: | ||
2616 | global_trace.entries = size; | 2802 | global_trace.entries = size; |
2617 | 2803 | ||
2618 | return ret; | 2804 | return ret; |
2619 | } | 2805 | } |
2620 | 2806 | ||
2807 | |||
2621 | /** | 2808 | /** |
2622 | * tracing_update_buffers - used by tracing facility to expand ring buffers | 2809 | * tracing_update_buffers - used by tracing facility to expand ring buffers |
2623 | * | 2810 | * |
@@ -2678,12 +2865,26 @@ static int tracing_set_tracer(const char *buf) | |||
2678 | trace_branch_disable(); | 2865 | trace_branch_disable(); |
2679 | if (current_trace && current_trace->reset) | 2866 | if (current_trace && current_trace->reset) |
2680 | current_trace->reset(tr); | 2867 | current_trace->reset(tr); |
2681 | 2868 | if (current_trace && current_trace->use_max_tr) { | |
2869 | /* | ||
2870 | * We don't free the ring buffer. instead, resize it because | ||
2871 | * The max_tr ring buffer has some state (e.g. ring->clock) and | ||
2872 | * we want preserve it. | ||
2873 | */ | ||
2874 | ring_buffer_resize(max_tr.buffer, 1); | ||
2875 | max_tr.entries = 1; | ||
2876 | } | ||
2682 | destroy_trace_option_files(topts); | 2877 | destroy_trace_option_files(topts); |
2683 | 2878 | ||
2684 | current_trace = t; | 2879 | current_trace = t; |
2685 | 2880 | ||
2686 | topts = create_trace_option_files(current_trace); | 2881 | topts = create_trace_option_files(current_trace); |
2882 | if (current_trace->use_max_tr) { | ||
2883 | ret = ring_buffer_resize(max_tr.buffer, global_trace.entries); | ||
2884 | if (ret < 0) | ||
2885 | goto out; | ||
2886 | max_tr.entries = global_trace.entries; | ||
2887 | } | ||
2687 | 2888 | ||
2688 | if (t->init) { | 2889 | if (t->init) { |
2689 | ret = tracer_init(t, tr); | 2890 | ret = tracer_init(t, tr); |
@@ -2702,15 +2903,15 @@ static ssize_t | |||
2702 | tracing_set_trace_write(struct file *filp, const char __user *ubuf, | 2903 | tracing_set_trace_write(struct file *filp, const char __user *ubuf, |
2703 | size_t cnt, loff_t *ppos) | 2904 | size_t cnt, loff_t *ppos) |
2704 | { | 2905 | { |
2705 | char buf[max_tracer_type_len+1]; | 2906 | char buf[MAX_TRACER_SIZE+1]; |
2706 | int i; | 2907 | int i; |
2707 | size_t ret; | 2908 | size_t ret; |
2708 | int err; | 2909 | int err; |
2709 | 2910 | ||
2710 | ret = cnt; | 2911 | ret = cnt; |
2711 | 2912 | ||
2712 | if (cnt > max_tracer_type_len) | 2913 | if (cnt > MAX_TRACER_SIZE) |
2713 | cnt = max_tracer_type_len; | 2914 | cnt = MAX_TRACER_SIZE; |
2714 | 2915 | ||
2715 | if (copy_from_user(&buf, ubuf, cnt)) | 2916 | if (copy_from_user(&buf, ubuf, cnt)) |
2716 | return -EFAULT; | 2917 | return -EFAULT; |
@@ -2725,7 +2926,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, | |||
2725 | if (err) | 2926 | if (err) |
2726 | return err; | 2927 | return err; |
2727 | 2928 | ||
2728 | filp->f_pos += ret; | 2929 | *ppos += ret; |
2729 | 2930 | ||
2730 | return ret; | 2931 | return ret; |
2731 | } | 2932 | } |
@@ -2782,22 +2983,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
2782 | 2983 | ||
2783 | mutex_lock(&trace_types_lock); | 2984 | mutex_lock(&trace_types_lock); |
2784 | 2985 | ||
2785 | /* We only allow one reader per cpu */ | ||
2786 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | ||
2787 | if (!cpumask_empty(tracing_reader_cpumask)) { | ||
2788 | ret = -EBUSY; | ||
2789 | goto out; | ||
2790 | } | ||
2791 | cpumask_setall(tracing_reader_cpumask); | ||
2792 | } else { | ||
2793 | if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) | ||
2794 | cpumask_set_cpu(cpu_file, tracing_reader_cpumask); | ||
2795 | else { | ||
2796 | ret = -EBUSY; | ||
2797 | goto out; | ||
2798 | } | ||
2799 | } | ||
2800 | |||
2801 | /* create a buffer to store the information to pass to userspace */ | 2986 | /* create a buffer to store the information to pass to userspace */ |
2802 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 2987 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
2803 | if (!iter) { | 2988 | if (!iter) { |
@@ -2836,6 +3021,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
2836 | if (iter->trace->pipe_open) | 3021 | if (iter->trace->pipe_open) |
2837 | iter->trace->pipe_open(iter); | 3022 | iter->trace->pipe_open(iter); |
2838 | 3023 | ||
3024 | nonseekable_open(inode, filp); | ||
2839 | out: | 3025 | out: |
2840 | mutex_unlock(&trace_types_lock); | 3026 | mutex_unlock(&trace_types_lock); |
2841 | return ret; | 3027 | return ret; |
@@ -2853,10 +3039,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
2853 | 3039 | ||
2854 | mutex_lock(&trace_types_lock); | 3040 | mutex_lock(&trace_types_lock); |
2855 | 3041 | ||
2856 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) | 3042 | if (iter->trace->pipe_close) |
2857 | cpumask_clear(tracing_reader_cpumask); | 3043 | iter->trace->pipe_close(iter); |
2858 | else | ||
2859 | cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); | ||
2860 | 3044 | ||
2861 | mutex_unlock(&trace_types_lock); | 3045 | mutex_unlock(&trace_types_lock); |
2862 | 3046 | ||
@@ -3016,7 +3200,8 @@ waitagain: | |||
3016 | iter->pos = -1; | 3200 | iter->pos = -1; |
3017 | 3201 | ||
3018 | trace_event_read_lock(); | 3202 | trace_event_read_lock(); |
3019 | while (find_next_entry_inc(iter) != NULL) { | 3203 | trace_access_lock(iter->cpu_file); |
3204 | while (trace_find_next_entry_inc(iter) != NULL) { | ||
3020 | enum print_line_t ret; | 3205 | enum print_line_t ret; |
3021 | int len = iter->seq.len; | 3206 | int len = iter->seq.len; |
3022 | 3207 | ||
@@ -3032,6 +3217,7 @@ waitagain: | |||
3032 | if (iter->seq.len >= cnt) | 3217 | if (iter->seq.len >= cnt) |
3033 | break; | 3218 | break; |
3034 | } | 3219 | } |
3220 | trace_access_unlock(iter->cpu_file); | ||
3035 | trace_event_read_unlock(); | 3221 | trace_event_read_unlock(); |
3036 | 3222 | ||
3037 | /* Now copy what we have to the user */ | 3223 | /* Now copy what we have to the user */ |
@@ -3064,7 +3250,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, | |||
3064 | __free_page(spd->pages[idx]); | 3250 | __free_page(spd->pages[idx]); |
3065 | } | 3251 | } |
3066 | 3252 | ||
3067 | static struct pipe_buf_operations tracing_pipe_buf_ops = { | 3253 | static const struct pipe_buf_operations tracing_pipe_buf_ops = { |
3068 | .can_merge = 0, | 3254 | .can_merge = 0, |
3069 | .map = generic_pipe_buf_map, | 3255 | .map = generic_pipe_buf_map, |
3070 | .unmap = generic_pipe_buf_unmap, | 3256 | .unmap = generic_pipe_buf_unmap, |
@@ -3095,9 +3281,10 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) | |||
3095 | break; | 3281 | break; |
3096 | } | 3282 | } |
3097 | 3283 | ||
3098 | trace_consume(iter); | 3284 | if (ret != TRACE_TYPE_NO_CONSUME) |
3285 | trace_consume(iter); | ||
3099 | rem -= count; | 3286 | rem -= count; |
3100 | if (!find_next_entry_inc(iter)) { | 3287 | if (!trace_find_next_entry_inc(iter)) { |
3101 | rem = 0; | 3288 | rem = 0; |
3102 | iter->ent = NULL; | 3289 | iter->ent = NULL; |
3103 | break; | 3290 | break; |
@@ -3113,12 +3300,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3113 | size_t len, | 3300 | size_t len, |
3114 | unsigned int flags) | 3301 | unsigned int flags) |
3115 | { | 3302 | { |
3116 | struct page *pages[PIPE_BUFFERS]; | 3303 | struct page *pages_def[PIPE_DEF_BUFFERS]; |
3117 | struct partial_page partial[PIPE_BUFFERS]; | 3304 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; |
3118 | struct trace_iterator *iter = filp->private_data; | 3305 | struct trace_iterator *iter = filp->private_data; |
3119 | struct splice_pipe_desc spd = { | 3306 | struct splice_pipe_desc spd = { |
3120 | .pages = pages, | 3307 | .pages = pages_def, |
3121 | .partial = partial, | 3308 | .partial = partial_def, |
3122 | .nr_pages = 0, /* This gets updated below. */ | 3309 | .nr_pages = 0, /* This gets updated below. */ |
3123 | .flags = flags, | 3310 | .flags = flags, |
3124 | .ops = &tracing_pipe_buf_ops, | 3311 | .ops = &tracing_pipe_buf_ops, |
@@ -3129,6 +3316,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3129 | size_t rem; | 3316 | size_t rem; |
3130 | unsigned int i; | 3317 | unsigned int i; |
3131 | 3318 | ||
3319 | if (splice_grow_spd(pipe, &spd)) | ||
3320 | return -ENOMEM; | ||
3321 | |||
3132 | /* copy the tracer to avoid using a global lock all around */ | 3322 | /* copy the tracer to avoid using a global lock all around */ |
3133 | mutex_lock(&trace_types_lock); | 3323 | mutex_lock(&trace_types_lock); |
3134 | if (unlikely(old_tracer != current_trace && current_trace)) { | 3324 | if (unlikely(old_tracer != current_trace && current_trace)) { |
@@ -3150,46 +3340,50 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3150 | if (ret <= 0) | 3340 | if (ret <= 0) |
3151 | goto out_err; | 3341 | goto out_err; |
3152 | 3342 | ||
3153 | if (!iter->ent && !find_next_entry_inc(iter)) { | 3343 | if (!iter->ent && !trace_find_next_entry_inc(iter)) { |
3154 | ret = -EFAULT; | 3344 | ret = -EFAULT; |
3155 | goto out_err; | 3345 | goto out_err; |
3156 | } | 3346 | } |
3157 | 3347 | ||
3158 | trace_event_read_lock(); | 3348 | trace_event_read_lock(); |
3349 | trace_access_lock(iter->cpu_file); | ||
3159 | 3350 | ||
3160 | /* Fill as many pages as possible. */ | 3351 | /* Fill as many pages as possible. */ |
3161 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { | 3352 | for (i = 0, rem = len; i < pipe->buffers && rem; i++) { |
3162 | pages[i] = alloc_page(GFP_KERNEL); | 3353 | spd.pages[i] = alloc_page(GFP_KERNEL); |
3163 | if (!pages[i]) | 3354 | if (!spd.pages[i]) |
3164 | break; | 3355 | break; |
3165 | 3356 | ||
3166 | rem = tracing_fill_pipe_page(rem, iter); | 3357 | rem = tracing_fill_pipe_page(rem, iter); |
3167 | 3358 | ||
3168 | /* Copy the data into the page, so we can start over. */ | 3359 | /* Copy the data into the page, so we can start over. */ |
3169 | ret = trace_seq_to_buffer(&iter->seq, | 3360 | ret = trace_seq_to_buffer(&iter->seq, |
3170 | page_address(pages[i]), | 3361 | page_address(spd.pages[i]), |
3171 | iter->seq.len); | 3362 | iter->seq.len); |
3172 | if (ret < 0) { | 3363 | if (ret < 0) { |
3173 | __free_page(pages[i]); | 3364 | __free_page(spd.pages[i]); |
3174 | break; | 3365 | break; |
3175 | } | 3366 | } |
3176 | partial[i].offset = 0; | 3367 | spd.partial[i].offset = 0; |
3177 | partial[i].len = iter->seq.len; | 3368 | spd.partial[i].len = iter->seq.len; |
3178 | 3369 | ||
3179 | trace_seq_init(&iter->seq); | 3370 | trace_seq_init(&iter->seq); |
3180 | } | 3371 | } |
3181 | 3372 | ||
3373 | trace_access_unlock(iter->cpu_file); | ||
3182 | trace_event_read_unlock(); | 3374 | trace_event_read_unlock(); |
3183 | mutex_unlock(&iter->mutex); | 3375 | mutex_unlock(&iter->mutex); |
3184 | 3376 | ||
3185 | spd.nr_pages = i; | 3377 | spd.nr_pages = i; |
3186 | 3378 | ||
3187 | return splice_to_pipe(pipe, &spd); | 3379 | ret = splice_to_pipe(pipe, &spd); |
3380 | out: | ||
3381 | splice_shrink_spd(pipe, &spd); | ||
3382 | return ret; | ||
3188 | 3383 | ||
3189 | out_err: | 3384 | out_err: |
3190 | mutex_unlock(&iter->mutex); | 3385 | mutex_unlock(&iter->mutex); |
3191 | 3386 | goto out; | |
3192 | return ret; | ||
3193 | } | 3387 | } |
3194 | 3388 | ||
3195 | static ssize_t | 3389 | static ssize_t |
@@ -3259,7 +3453,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, | |||
3259 | } | 3453 | } |
3260 | } | 3454 | } |
3261 | 3455 | ||
3262 | filp->f_pos += cnt; | 3456 | *ppos += cnt; |
3263 | 3457 | ||
3264 | /* If check pages failed, return ENOMEM */ | 3458 | /* If check pages failed, return ENOMEM */ |
3265 | if (tracing_disabled) | 3459 | if (tracing_disabled) |
@@ -3273,7 +3467,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, | |||
3273 | } | 3467 | } |
3274 | 3468 | ||
3275 | tracing_start(); | 3469 | tracing_start(); |
3276 | max_tr.entries = global_trace.entries; | ||
3277 | mutex_unlock(&trace_types_lock); | 3470 | mutex_unlock(&trace_types_lock); |
3278 | 3471 | ||
3279 | return cnt; | 3472 | return cnt; |
@@ -3294,7 +3487,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
3294 | size_t cnt, loff_t *fpos) | 3487 | size_t cnt, loff_t *fpos) |
3295 | { | 3488 | { |
3296 | char *buf; | 3489 | char *buf; |
3297 | char *end; | 3490 | size_t written; |
3298 | 3491 | ||
3299 | if (tracing_disabled) | 3492 | if (tracing_disabled) |
3300 | return -EINVAL; | 3493 | return -EINVAL; |
@@ -3302,7 +3495,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
3302 | if (cnt > TRACE_BUF_SIZE) | 3495 | if (cnt > TRACE_BUF_SIZE) |
3303 | cnt = TRACE_BUF_SIZE; | 3496 | cnt = TRACE_BUF_SIZE; |
3304 | 3497 | ||
3305 | buf = kmalloc(cnt + 1, GFP_KERNEL); | 3498 | buf = kmalloc(cnt + 2, GFP_KERNEL); |
3306 | if (buf == NULL) | 3499 | if (buf == NULL) |
3307 | return -ENOMEM; | 3500 | return -ENOMEM; |
3308 | 3501 | ||
@@ -3310,36 +3503,102 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
3310 | kfree(buf); | 3503 | kfree(buf); |
3311 | return -EFAULT; | 3504 | return -EFAULT; |
3312 | } | 3505 | } |
3506 | if (buf[cnt-1] != '\n') { | ||
3507 | buf[cnt] = '\n'; | ||
3508 | buf[cnt+1] = '\0'; | ||
3509 | } else | ||
3510 | buf[cnt] = '\0'; | ||
3313 | 3511 | ||
3314 | /* Cut from the first nil or newline. */ | 3512 | written = mark_printk("%s", buf); |
3315 | buf[cnt] = '\0'; | ||
3316 | end = strchr(buf, '\n'); | ||
3317 | if (end) | ||
3318 | *end = '\0'; | ||
3319 | |||
3320 | cnt = mark_printk("%s\n", buf); | ||
3321 | kfree(buf); | 3513 | kfree(buf); |
3514 | *fpos += written; | ||
3515 | |||
3516 | /* don't tell userspace we wrote more - it might confuse them */ | ||
3517 | if (written > cnt) | ||
3518 | written = cnt; | ||
3519 | |||
3520 | return written; | ||
3521 | } | ||
3522 | |||
3523 | static int tracing_clock_show(struct seq_file *m, void *v) | ||
3524 | { | ||
3525 | int i; | ||
3526 | |||
3527 | for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) | ||
3528 | seq_printf(m, | ||
3529 | "%s%s%s%s", i ? " " : "", | ||
3530 | i == trace_clock_id ? "[" : "", trace_clocks[i].name, | ||
3531 | i == trace_clock_id ? "]" : ""); | ||
3532 | seq_putc(m, '\n'); | ||
3533 | |||
3534 | return 0; | ||
3535 | } | ||
3536 | |||
3537 | static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, | ||
3538 | size_t cnt, loff_t *fpos) | ||
3539 | { | ||
3540 | char buf[64]; | ||
3541 | const char *clockstr; | ||
3542 | int i; | ||
3543 | |||
3544 | if (cnt >= sizeof(buf)) | ||
3545 | return -EINVAL; | ||
3546 | |||
3547 | if (copy_from_user(&buf, ubuf, cnt)) | ||
3548 | return -EFAULT; | ||
3549 | |||
3550 | buf[cnt] = 0; | ||
3551 | |||
3552 | clockstr = strstrip(buf); | ||
3553 | |||
3554 | for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { | ||
3555 | if (strcmp(trace_clocks[i].name, clockstr) == 0) | ||
3556 | break; | ||
3557 | } | ||
3558 | if (i == ARRAY_SIZE(trace_clocks)) | ||
3559 | return -EINVAL; | ||
3560 | |||
3561 | trace_clock_id = i; | ||
3562 | |||
3563 | mutex_lock(&trace_types_lock); | ||
3564 | |||
3565 | ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func); | ||
3566 | if (max_tr.buffer) | ||
3567 | ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func); | ||
3568 | |||
3569 | mutex_unlock(&trace_types_lock); | ||
3570 | |||
3322 | *fpos += cnt; | 3571 | *fpos += cnt; |
3323 | 3572 | ||
3324 | return cnt; | 3573 | return cnt; |
3325 | } | 3574 | } |
3326 | 3575 | ||
3576 | static int tracing_clock_open(struct inode *inode, struct file *file) | ||
3577 | { | ||
3578 | if (tracing_disabled) | ||
3579 | return -ENODEV; | ||
3580 | return single_open(file, tracing_clock_show, NULL); | ||
3581 | } | ||
3582 | |||
3327 | static const struct file_operations tracing_max_lat_fops = { | 3583 | static const struct file_operations tracing_max_lat_fops = { |
3328 | .open = tracing_open_generic, | 3584 | .open = tracing_open_generic, |
3329 | .read = tracing_max_lat_read, | 3585 | .read = tracing_max_lat_read, |
3330 | .write = tracing_max_lat_write, | 3586 | .write = tracing_max_lat_write, |
3587 | .llseek = generic_file_llseek, | ||
3331 | }; | 3588 | }; |
3332 | 3589 | ||
3333 | static const struct file_operations tracing_ctrl_fops = { | 3590 | static const struct file_operations tracing_ctrl_fops = { |
3334 | .open = tracing_open_generic, | 3591 | .open = tracing_open_generic, |
3335 | .read = tracing_ctrl_read, | 3592 | .read = tracing_ctrl_read, |
3336 | .write = tracing_ctrl_write, | 3593 | .write = tracing_ctrl_write, |
3594 | .llseek = generic_file_llseek, | ||
3337 | }; | 3595 | }; |
3338 | 3596 | ||
3339 | static const struct file_operations set_tracer_fops = { | 3597 | static const struct file_operations set_tracer_fops = { |
3340 | .open = tracing_open_generic, | 3598 | .open = tracing_open_generic, |
3341 | .read = tracing_set_trace_read, | 3599 | .read = tracing_set_trace_read, |
3342 | .write = tracing_set_trace_write, | 3600 | .write = tracing_set_trace_write, |
3601 | .llseek = generic_file_llseek, | ||
3343 | }; | 3602 | }; |
3344 | 3603 | ||
3345 | static const struct file_operations tracing_pipe_fops = { | 3604 | static const struct file_operations tracing_pipe_fops = { |
@@ -3348,17 +3607,28 @@ static const struct file_operations tracing_pipe_fops = { | |||
3348 | .read = tracing_read_pipe, | 3607 | .read = tracing_read_pipe, |
3349 | .splice_read = tracing_splice_read_pipe, | 3608 | .splice_read = tracing_splice_read_pipe, |
3350 | .release = tracing_release_pipe, | 3609 | .release = tracing_release_pipe, |
3610 | .llseek = no_llseek, | ||
3351 | }; | 3611 | }; |
3352 | 3612 | ||
3353 | static const struct file_operations tracing_entries_fops = { | 3613 | static const struct file_operations tracing_entries_fops = { |
3354 | .open = tracing_open_generic, | 3614 | .open = tracing_open_generic, |
3355 | .read = tracing_entries_read, | 3615 | .read = tracing_entries_read, |
3356 | .write = tracing_entries_write, | 3616 | .write = tracing_entries_write, |
3617 | .llseek = generic_file_llseek, | ||
3357 | }; | 3618 | }; |
3358 | 3619 | ||
3359 | static const struct file_operations tracing_mark_fops = { | 3620 | static const struct file_operations tracing_mark_fops = { |
3360 | .open = tracing_open_generic, | 3621 | .open = tracing_open_generic, |
3361 | .write = tracing_mark_write, | 3622 | .write = tracing_mark_write, |
3623 | .llseek = generic_file_llseek, | ||
3624 | }; | ||
3625 | |||
3626 | static const struct file_operations trace_clock_fops = { | ||
3627 | .open = tracing_clock_open, | ||
3628 | .read = seq_read, | ||
3629 | .llseek = seq_lseek, | ||
3630 | .release = single_release, | ||
3631 | .write = tracing_clock_write, | ||
3362 | }; | 3632 | }; |
3363 | 3633 | ||
3364 | struct ftrace_buffer_info { | 3634 | struct ftrace_buffer_info { |
@@ -3396,7 +3666,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
3396 | size_t count, loff_t *ppos) | 3666 | size_t count, loff_t *ppos) |
3397 | { | 3667 | { |
3398 | struct ftrace_buffer_info *info = filp->private_data; | 3668 | struct ftrace_buffer_info *info = filp->private_data; |
3399 | unsigned int pos; | ||
3400 | ssize_t ret; | 3669 | ssize_t ret; |
3401 | size_t size; | 3670 | size_t size; |
3402 | 3671 | ||
@@ -3414,18 +3683,15 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
3414 | 3683 | ||
3415 | info->read = 0; | 3684 | info->read = 0; |
3416 | 3685 | ||
3686 | trace_access_lock(info->cpu); | ||
3417 | ret = ring_buffer_read_page(info->tr->buffer, | 3687 | ret = ring_buffer_read_page(info->tr->buffer, |
3418 | &info->spare, | 3688 | &info->spare, |
3419 | count, | 3689 | count, |
3420 | info->cpu, 0); | 3690 | info->cpu, 0); |
3691 | trace_access_unlock(info->cpu); | ||
3421 | if (ret < 0) | 3692 | if (ret < 0) |
3422 | return 0; | 3693 | return 0; |
3423 | 3694 | ||
3424 | pos = ring_buffer_page_len(info->spare); | ||
3425 | |||
3426 | if (pos < PAGE_SIZE) | ||
3427 | memset(info->spare + pos, 0, PAGE_SIZE - pos); | ||
3428 | |||
3429 | read: | 3695 | read: |
3430 | size = PAGE_SIZE - info->read; | 3696 | size = PAGE_SIZE - info->read; |
3431 | if (size > count) | 3697 | if (size > count) |
@@ -3487,7 +3753,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, | |||
3487 | } | 3753 | } |
3488 | 3754 | ||
3489 | /* Pipe buffer operations for a buffer. */ | 3755 | /* Pipe buffer operations for a buffer. */ |
3490 | static struct pipe_buf_operations buffer_pipe_buf_ops = { | 3756 | static const struct pipe_buf_operations buffer_pipe_buf_ops = { |
3491 | .can_merge = 0, | 3757 | .can_merge = 0, |
3492 | .map = generic_pipe_buf_map, | 3758 | .map = generic_pipe_buf_map, |
3493 | .unmap = generic_pipe_buf_unmap, | 3759 | .unmap = generic_pipe_buf_unmap, |
@@ -3520,11 +3786,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3520 | unsigned int flags) | 3786 | unsigned int flags) |
3521 | { | 3787 | { |
3522 | struct ftrace_buffer_info *info = file->private_data; | 3788 | struct ftrace_buffer_info *info = file->private_data; |
3523 | struct partial_page partial[PIPE_BUFFERS]; | 3789 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; |
3524 | struct page *pages[PIPE_BUFFERS]; | 3790 | struct page *pages_def[PIPE_DEF_BUFFERS]; |
3525 | struct splice_pipe_desc spd = { | 3791 | struct splice_pipe_desc spd = { |
3526 | .pages = pages, | 3792 | .pages = pages_def, |
3527 | .partial = partial, | 3793 | .partial = partial_def, |
3528 | .flags = flags, | 3794 | .flags = flags, |
3529 | .ops = &buffer_pipe_buf_ops, | 3795 | .ops = &buffer_pipe_buf_ops, |
3530 | .spd_release = buffer_spd_release, | 3796 | .spd_release = buffer_spd_release, |
@@ -3533,21 +3799,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3533 | int entries, size, i; | 3799 | int entries, size, i; |
3534 | size_t ret; | 3800 | size_t ret; |
3535 | 3801 | ||
3802 | if (splice_grow_spd(pipe, &spd)) | ||
3803 | return -ENOMEM; | ||
3804 | |||
3536 | if (*ppos & (PAGE_SIZE - 1)) { | 3805 | if (*ppos & (PAGE_SIZE - 1)) { |
3537 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); | 3806 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); |
3538 | return -EINVAL; | 3807 | ret = -EINVAL; |
3808 | goto out; | ||
3539 | } | 3809 | } |
3540 | 3810 | ||
3541 | if (len & (PAGE_SIZE - 1)) { | 3811 | if (len & (PAGE_SIZE - 1)) { |
3542 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); | 3812 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); |
3543 | if (len < PAGE_SIZE) | 3813 | if (len < PAGE_SIZE) { |
3544 | return -EINVAL; | 3814 | ret = -EINVAL; |
3815 | goto out; | ||
3816 | } | ||
3545 | len &= PAGE_MASK; | 3817 | len &= PAGE_MASK; |
3546 | } | 3818 | } |
3547 | 3819 | ||
3820 | trace_access_lock(info->cpu); | ||
3548 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3821 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
3549 | 3822 | ||
3550 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { | 3823 | for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { |
3551 | struct page *page; | 3824 | struct page *page; |
3552 | int r; | 3825 | int r; |
3553 | 3826 | ||
@@ -3592,6 +3865,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3592 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3865 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
3593 | } | 3866 | } |
3594 | 3867 | ||
3868 | trace_access_unlock(info->cpu); | ||
3595 | spd.nr_pages = i; | 3869 | spd.nr_pages = i; |
3596 | 3870 | ||
3597 | /* did we read anything? */ | 3871 | /* did we read anything? */ |
@@ -3601,11 +3875,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3601 | else | 3875 | else |
3602 | ret = 0; | 3876 | ret = 0; |
3603 | /* TODO: block */ | 3877 | /* TODO: block */ |
3604 | return ret; | 3878 | goto out; |
3605 | } | 3879 | } |
3606 | 3880 | ||
3607 | ret = splice_to_pipe(pipe, &spd); | 3881 | ret = splice_to_pipe(pipe, &spd); |
3608 | 3882 | splice_shrink_spd(pipe, &spd); | |
3883 | out: | ||
3609 | return ret; | 3884 | return ret; |
3610 | } | 3885 | } |
3611 | 3886 | ||
@@ -3628,7 +3903,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
3628 | 3903 | ||
3629 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 3904 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
3630 | if (!s) | 3905 | if (!s) |
3631 | return ENOMEM; | 3906 | return -ENOMEM; |
3632 | 3907 | ||
3633 | trace_seq_init(s); | 3908 | trace_seq_init(s); |
3634 | 3909 | ||
@@ -3641,9 +3916,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
3641 | cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); | 3916 | cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); |
3642 | trace_seq_printf(s, "commit overrun: %ld\n", cnt); | 3917 | trace_seq_printf(s, "commit overrun: %ld\n", cnt); |
3643 | 3918 | ||
3644 | cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu); | ||
3645 | trace_seq_printf(s, "nmi dropped: %ld\n", cnt); | ||
3646 | |||
3647 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | 3919 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); |
3648 | 3920 | ||
3649 | kfree(s); | 3921 | kfree(s); |
@@ -3654,6 +3926,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
3654 | static const struct file_operations tracing_stats_fops = { | 3926 | static const struct file_operations tracing_stats_fops = { |
3655 | .open = tracing_open_generic, | 3927 | .open = tracing_open_generic, |
3656 | .read = tracing_stats_read, | 3928 | .read = tracing_stats_read, |
3929 | .llseek = generic_file_llseek, | ||
3657 | }; | 3930 | }; |
3658 | 3931 | ||
3659 | #ifdef CONFIG_DYNAMIC_FTRACE | 3932 | #ifdef CONFIG_DYNAMIC_FTRACE |
@@ -3690,6 +3963,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf, | |||
3690 | static const struct file_operations tracing_dyn_info_fops = { | 3963 | static const struct file_operations tracing_dyn_info_fops = { |
3691 | .open = tracing_open_generic, | 3964 | .open = tracing_open_generic, |
3692 | .read = tracing_read_dyn_info, | 3965 | .read = tracing_read_dyn_info, |
3966 | .llseek = generic_file_llseek, | ||
3693 | }; | 3967 | }; |
3694 | #endif | 3968 | #endif |
3695 | 3969 | ||
@@ -3746,13 +4020,9 @@ static void tracing_init_debugfs_percpu(long cpu) | |||
3746 | { | 4020 | { |
3747 | struct dentry *d_percpu = tracing_dentry_percpu(); | 4021 | struct dentry *d_percpu = tracing_dentry_percpu(); |
3748 | struct dentry *d_cpu; | 4022 | struct dentry *d_cpu; |
3749 | /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ | 4023 | char cpu_dir[30]; /* 30 characters should be more than enough */ |
3750 | char cpu_dir[7]; | ||
3751 | |||
3752 | if (cpu > 999 || cpu < 0) | ||
3753 | return; | ||
3754 | 4024 | ||
3755 | sprintf(cpu_dir, "cpu%ld", cpu); | 4025 | snprintf(cpu_dir, 30, "cpu%ld", cpu); |
3756 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); | 4026 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); |
3757 | if (!d_cpu) { | 4027 | if (!d_cpu) { |
3758 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); | 4028 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); |
@@ -3821,39 +4091,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
3821 | if (ret < 0) | 4091 | if (ret < 0) |
3822 | return ret; | 4092 | return ret; |
3823 | 4093 | ||
3824 | ret = 0; | 4094 | if (val != 0 && val != 1) |
3825 | switch (val) { | 4095 | return -EINVAL; |
3826 | case 0: | ||
3827 | /* do nothing if already cleared */ | ||
3828 | if (!(topt->flags->val & topt->opt->bit)) | ||
3829 | break; | ||
3830 | |||
3831 | mutex_lock(&trace_types_lock); | ||
3832 | if (current_trace->set_flag) | ||
3833 | ret = current_trace->set_flag(topt->flags->val, | ||
3834 | topt->opt->bit, 0); | ||
3835 | mutex_unlock(&trace_types_lock); | ||
3836 | if (ret) | ||
3837 | return ret; | ||
3838 | topt->flags->val &= ~topt->opt->bit; | ||
3839 | break; | ||
3840 | case 1: | ||
3841 | /* do nothing if already set */ | ||
3842 | if (topt->flags->val & topt->opt->bit) | ||
3843 | break; | ||
3844 | 4096 | ||
4097 | if (!!(topt->flags->val & topt->opt->bit) != val) { | ||
3845 | mutex_lock(&trace_types_lock); | 4098 | mutex_lock(&trace_types_lock); |
3846 | if (current_trace->set_flag) | 4099 | ret = __set_tracer_option(current_trace, topt->flags, |
3847 | ret = current_trace->set_flag(topt->flags->val, | 4100 | topt->opt, !val); |
3848 | topt->opt->bit, 1); | ||
3849 | mutex_unlock(&trace_types_lock); | 4101 | mutex_unlock(&trace_types_lock); |
3850 | if (ret) | 4102 | if (ret) |
3851 | return ret; | 4103 | return ret; |
3852 | topt->flags->val |= topt->opt->bit; | ||
3853 | break; | ||
3854 | |||
3855 | default: | ||
3856 | return -EINVAL; | ||
3857 | } | 4104 | } |
3858 | 4105 | ||
3859 | *ppos += cnt; | 4106 | *ppos += cnt; |
@@ -3866,6 +4113,7 @@ static const struct file_operations trace_options_fops = { | |||
3866 | .open = tracing_open_generic, | 4113 | .open = tracing_open_generic, |
3867 | .read = trace_options_read, | 4114 | .read = trace_options_read, |
3868 | .write = trace_options_write, | 4115 | .write = trace_options_write, |
4116 | .llseek = generic_file_llseek, | ||
3869 | }; | 4117 | }; |
3870 | 4118 | ||
3871 | static ssize_t | 4119 | static ssize_t |
@@ -3904,17 +4152,9 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
3904 | if (ret < 0) | 4152 | if (ret < 0) |
3905 | return ret; | 4153 | return ret; |
3906 | 4154 | ||
3907 | switch (val) { | 4155 | if (val != 0 && val != 1) |
3908 | case 0: | ||
3909 | trace_flags &= ~(1 << index); | ||
3910 | break; | ||
3911 | case 1: | ||
3912 | trace_flags |= 1 << index; | ||
3913 | break; | ||
3914 | |||
3915 | default: | ||
3916 | return -EINVAL; | 4156 | return -EINVAL; |
3917 | } | 4157 | set_tracer_flags(1 << index, val); |
3918 | 4158 | ||
3919 | *ppos += cnt; | 4159 | *ppos += cnt; |
3920 | 4160 | ||
@@ -3925,6 +4165,7 @@ static const struct file_operations trace_options_core_fops = { | |||
3925 | .open = tracing_open_generic, | 4165 | .open = tracing_open_generic, |
3926 | .read = trace_options_core_read, | 4166 | .read = trace_options_core_read, |
3927 | .write = trace_options_core_write, | 4167 | .write = trace_options_core_write, |
4168 | .llseek = generic_file_llseek, | ||
3928 | }; | 4169 | }; |
3929 | 4170 | ||
3930 | struct dentry *trace_create_file(const char *name, | 4171 | struct dentry *trace_create_file(const char *name, |
@@ -4062,6 +4303,8 @@ static __init int tracer_init_debugfs(void) | |||
4062 | struct dentry *d_tracer; | 4303 | struct dentry *d_tracer; |
4063 | int cpu; | 4304 | int cpu; |
4064 | 4305 | ||
4306 | trace_access_lock_init(); | ||
4307 | |||
4065 | d_tracer = tracing_init_dentry(); | 4308 | d_tracer = tracing_init_dentry(); |
4066 | 4309 | ||
4067 | trace_create_file("tracing_enabled", 0644, d_tracer, | 4310 | trace_create_file("tracing_enabled", 0644, d_tracer, |
@@ -4082,8 +4325,10 @@ static __init int tracer_init_debugfs(void) | |||
4082 | trace_create_file("current_tracer", 0644, d_tracer, | 4325 | trace_create_file("current_tracer", 0644, d_tracer, |
4083 | &global_trace, &set_tracer_fops); | 4326 | &global_trace, &set_tracer_fops); |
4084 | 4327 | ||
4328 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
4085 | trace_create_file("tracing_max_latency", 0644, d_tracer, | 4329 | trace_create_file("tracing_max_latency", 0644, d_tracer, |
4086 | &tracing_max_latency, &tracing_max_lat_fops); | 4330 | &tracing_max_latency, &tracing_max_lat_fops); |
4331 | #endif | ||
4087 | 4332 | ||
4088 | trace_create_file("tracing_thresh", 0644, d_tracer, | 4333 | trace_create_file("tracing_thresh", 0644, d_tracer, |
4089 | &tracing_thresh, &tracing_max_lat_fops); | 4334 | &tracing_thresh, &tracing_max_lat_fops); |
@@ -4103,13 +4348,13 @@ static __init int tracer_init_debugfs(void) | |||
4103 | trace_create_file("saved_cmdlines", 0444, d_tracer, | 4348 | trace_create_file("saved_cmdlines", 0444, d_tracer, |
4104 | NULL, &tracing_saved_cmdlines_fops); | 4349 | NULL, &tracing_saved_cmdlines_fops); |
4105 | 4350 | ||
4351 | trace_create_file("trace_clock", 0644, d_tracer, NULL, | ||
4352 | &trace_clock_fops); | ||
4353 | |||
4106 | #ifdef CONFIG_DYNAMIC_FTRACE | 4354 | #ifdef CONFIG_DYNAMIC_FTRACE |
4107 | trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, | 4355 | trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, |
4108 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); | 4356 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); |
4109 | #endif | 4357 | #endif |
4110 | #ifdef CONFIG_SYSPROF_TRACER | ||
4111 | init_tracer_sysprof_debugfs(d_tracer); | ||
4112 | #endif | ||
4113 | 4358 | ||
4114 | create_trace_options_dir(); | 4359 | create_trace_options_dir(); |
4115 | 4360 | ||
@@ -4123,7 +4368,7 @@ static int trace_panic_handler(struct notifier_block *this, | |||
4123 | unsigned long event, void *unused) | 4368 | unsigned long event, void *unused) |
4124 | { | 4369 | { |
4125 | if (ftrace_dump_on_oops) | 4370 | if (ftrace_dump_on_oops) |
4126 | ftrace_dump(); | 4371 | ftrace_dump(ftrace_dump_on_oops); |
4127 | return NOTIFY_OK; | 4372 | return NOTIFY_OK; |
4128 | } | 4373 | } |
4129 | 4374 | ||
@@ -4140,7 +4385,7 @@ static int trace_die_handler(struct notifier_block *self, | |||
4140 | switch (val) { | 4385 | switch (val) { |
4141 | case DIE_OOPS: | 4386 | case DIE_OOPS: |
4142 | if (ftrace_dump_on_oops) | 4387 | if (ftrace_dump_on_oops) |
4143 | ftrace_dump(); | 4388 | ftrace_dump(ftrace_dump_on_oops); |
4144 | break; | 4389 | break; |
4145 | default: | 4390 | default: |
4146 | break; | 4391 | break; |
@@ -4166,7 +4411,7 @@ static struct notifier_block trace_die_notifier = { | |||
4166 | */ | 4411 | */ |
4167 | #define KERN_TRACE KERN_EMERG | 4412 | #define KERN_TRACE KERN_EMERG |
4168 | 4413 | ||
4169 | static void | 4414 | void |
4170 | trace_printk_seq(struct trace_seq *s) | 4415 | trace_printk_seq(struct trace_seq *s) |
4171 | { | 4416 | { |
4172 | /* Probably should print a warning here. */ | 4417 | /* Probably should print a warning here. */ |
@@ -4181,10 +4426,18 @@ trace_printk_seq(struct trace_seq *s) | |||
4181 | trace_seq_init(s); | 4426 | trace_seq_init(s); |
4182 | } | 4427 | } |
4183 | 4428 | ||
4184 | static void __ftrace_dump(bool disable_tracing) | 4429 | void trace_init_global_iter(struct trace_iterator *iter) |
4430 | { | ||
4431 | iter->tr = &global_trace; | ||
4432 | iter->trace = current_trace; | ||
4433 | iter->cpu_file = TRACE_PIPE_ALL_CPU; | ||
4434 | } | ||
4435 | |||
4436 | static void | ||
4437 | __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | ||
4185 | { | 4438 | { |
4186 | static raw_spinlock_t ftrace_dump_lock = | 4439 | static arch_spinlock_t ftrace_dump_lock = |
4187 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 4440 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
4188 | /* use static because iter can be a bit big for the stack */ | 4441 | /* use static because iter can be a bit big for the stack */ |
4189 | static struct trace_iterator iter; | 4442 | static struct trace_iterator iter; |
4190 | unsigned int old_userobj; | 4443 | unsigned int old_userobj; |
@@ -4194,7 +4447,7 @@ static void __ftrace_dump(bool disable_tracing) | |||
4194 | 4447 | ||
4195 | /* only one dump */ | 4448 | /* only one dump */ |
4196 | local_irq_save(flags); | 4449 | local_irq_save(flags); |
4197 | __raw_spin_lock(&ftrace_dump_lock); | 4450 | arch_spin_lock(&ftrace_dump_lock); |
4198 | if (dump_ran) | 4451 | if (dump_ran) |
4199 | goto out; | 4452 | goto out; |
4200 | 4453 | ||
@@ -4205,8 +4458,10 @@ static void __ftrace_dump(bool disable_tracing) | |||
4205 | if (disable_tracing) | 4458 | if (disable_tracing) |
4206 | ftrace_kill(); | 4459 | ftrace_kill(); |
4207 | 4460 | ||
4461 | trace_init_global_iter(&iter); | ||
4462 | |||
4208 | for_each_tracing_cpu(cpu) { | 4463 | for_each_tracing_cpu(cpu) { |
4209 | atomic_inc(&global_trace.data[cpu]->disabled); | 4464 | atomic_inc(&iter.tr->data[cpu]->disabled); |
4210 | } | 4465 | } |
4211 | 4466 | ||
4212 | old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; | 4467 | old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; |
@@ -4214,12 +4469,25 @@ static void __ftrace_dump(bool disable_tracing) | |||
4214 | /* don't look at user memory in panic mode */ | 4469 | /* don't look at user memory in panic mode */ |
4215 | trace_flags &= ~TRACE_ITER_SYM_USEROBJ; | 4470 | trace_flags &= ~TRACE_ITER_SYM_USEROBJ; |
4216 | 4471 | ||
4217 | printk(KERN_TRACE "Dumping ftrace buffer:\n"); | ||
4218 | |||
4219 | /* Simulate the iterator */ | 4472 | /* Simulate the iterator */ |
4220 | iter.tr = &global_trace; | 4473 | iter.tr = &global_trace; |
4221 | iter.trace = current_trace; | 4474 | iter.trace = current_trace; |
4222 | iter.cpu_file = TRACE_PIPE_ALL_CPU; | 4475 | |
4476 | switch (oops_dump_mode) { | ||
4477 | case DUMP_ALL: | ||
4478 | iter.cpu_file = TRACE_PIPE_ALL_CPU; | ||
4479 | break; | ||
4480 | case DUMP_ORIG: | ||
4481 | iter.cpu_file = raw_smp_processor_id(); | ||
4482 | break; | ||
4483 | case DUMP_NONE: | ||
4484 | goto out_enable; | ||
4485 | default: | ||
4486 | printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); | ||
4487 | iter.cpu_file = TRACE_PIPE_ALL_CPU; | ||
4488 | } | ||
4489 | |||
4490 | printk(KERN_TRACE "Dumping ftrace buffer:\n"); | ||
4223 | 4491 | ||
4224 | /* | 4492 | /* |
4225 | * We need to stop all tracing on all CPUS to read the | 4493 | * We need to stop all tracing on all CPUS to read the |
@@ -4242,9 +4510,12 @@ static void __ftrace_dump(bool disable_tracing) | |||
4242 | iter.iter_flags |= TRACE_FILE_LAT_FMT; | 4510 | iter.iter_flags |= TRACE_FILE_LAT_FMT; |
4243 | iter.pos = -1; | 4511 | iter.pos = -1; |
4244 | 4512 | ||
4245 | if (find_next_entry_inc(&iter) != NULL) { | 4513 | if (trace_find_next_entry_inc(&iter) != NULL) { |
4246 | print_trace_line(&iter); | 4514 | int ret; |
4247 | trace_consume(&iter); | 4515 | |
4516 | ret = print_trace_line(&iter); | ||
4517 | if (ret != TRACE_TYPE_NO_CONSUME) | ||
4518 | trace_consume(&iter); | ||
4248 | } | 4519 | } |
4249 | 4520 | ||
4250 | trace_printk_seq(&iter.seq); | 4521 | trace_printk_seq(&iter.seq); |
@@ -4255,30 +4526,30 @@ static void __ftrace_dump(bool disable_tracing) | |||
4255 | else | 4526 | else |
4256 | printk(KERN_TRACE "---------------------------------\n"); | 4527 | printk(KERN_TRACE "---------------------------------\n"); |
4257 | 4528 | ||
4529 | out_enable: | ||
4258 | /* Re-enable tracing if requested */ | 4530 | /* Re-enable tracing if requested */ |
4259 | if (!disable_tracing) { | 4531 | if (!disable_tracing) { |
4260 | trace_flags |= old_userobj; | 4532 | trace_flags |= old_userobj; |
4261 | 4533 | ||
4262 | for_each_tracing_cpu(cpu) { | 4534 | for_each_tracing_cpu(cpu) { |
4263 | atomic_dec(&global_trace.data[cpu]->disabled); | 4535 | atomic_dec(&iter.tr->data[cpu]->disabled); |
4264 | } | 4536 | } |
4265 | tracing_on(); | 4537 | tracing_on(); |
4266 | } | 4538 | } |
4267 | 4539 | ||
4268 | out: | 4540 | out: |
4269 | __raw_spin_unlock(&ftrace_dump_lock); | 4541 | arch_spin_unlock(&ftrace_dump_lock); |
4270 | local_irq_restore(flags); | 4542 | local_irq_restore(flags); |
4271 | } | 4543 | } |
4272 | 4544 | ||
4273 | /* By default: disable tracing after the dump */ | 4545 | /* By default: disable tracing after the dump */ |
4274 | void ftrace_dump(void) | 4546 | void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) |
4275 | { | 4547 | { |
4276 | __ftrace_dump(true); | 4548 | __ftrace_dump(true, oops_dump_mode); |
4277 | } | 4549 | } |
4278 | 4550 | ||
4279 | __init static int tracer_alloc_buffers(void) | 4551 | __init static int tracer_alloc_buffers(void) |
4280 | { | 4552 | { |
4281 | struct trace_array_cpu *data; | ||
4282 | int ring_buf_size; | 4553 | int ring_buf_size; |
4283 | int i; | 4554 | int i; |
4284 | int ret = -ENOMEM; | 4555 | int ret = -ENOMEM; |
@@ -4289,9 +4560,6 @@ __init static int tracer_alloc_buffers(void) | |||
4289 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) | 4560 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) |
4290 | goto out_free_buffer_mask; | 4561 | goto out_free_buffer_mask; |
4291 | 4562 | ||
4292 | if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) | ||
4293 | goto out_free_tracing_cpumask; | ||
4294 | |||
4295 | /* To save memory, keep the ring buffer size to its minimum */ | 4563 | /* To save memory, keep the ring buffer size to its minimum */ |
4296 | if (ring_buffer_expanded) | 4564 | if (ring_buffer_expanded) |
4297 | ring_buf_size = trace_buf_size; | 4565 | ring_buf_size = trace_buf_size; |
@@ -4300,7 +4568,6 @@ __init static int tracer_alloc_buffers(void) | |||
4300 | 4568 | ||
4301 | cpumask_copy(tracing_buffer_mask, cpu_possible_mask); | 4569 | cpumask_copy(tracing_buffer_mask, cpu_possible_mask); |
4302 | cpumask_copy(tracing_cpumask, cpu_all_mask); | 4570 | cpumask_copy(tracing_cpumask, cpu_all_mask); |
4303 | cpumask_clear(tracing_reader_cpumask); | ||
4304 | 4571 | ||
4305 | /* TODO: make the number of buffers hot pluggable with CPUS */ | 4572 | /* TODO: make the number of buffers hot pluggable with CPUS */ |
4306 | global_trace.buffer = ring_buffer_alloc(ring_buf_size, | 4573 | global_trace.buffer = ring_buffer_alloc(ring_buf_size, |
@@ -4314,31 +4581,26 @@ __init static int tracer_alloc_buffers(void) | |||
4314 | 4581 | ||
4315 | 4582 | ||
4316 | #ifdef CONFIG_TRACER_MAX_TRACE | 4583 | #ifdef CONFIG_TRACER_MAX_TRACE |
4317 | max_tr.buffer = ring_buffer_alloc(ring_buf_size, | 4584 | max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS); |
4318 | TRACE_BUFFER_FLAGS); | ||
4319 | if (!max_tr.buffer) { | 4585 | if (!max_tr.buffer) { |
4320 | printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); | 4586 | printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); |
4321 | WARN_ON(1); | 4587 | WARN_ON(1); |
4322 | ring_buffer_free(global_trace.buffer); | 4588 | ring_buffer_free(global_trace.buffer); |
4323 | goto out_free_cpumask; | 4589 | goto out_free_cpumask; |
4324 | } | 4590 | } |
4325 | max_tr.entries = ring_buffer_size(max_tr.buffer); | 4591 | max_tr.entries = 1; |
4326 | WARN_ON(max_tr.entries != global_trace.entries); | ||
4327 | #endif | 4592 | #endif |
4328 | 4593 | ||
4329 | /* Allocate the first page for all buffers */ | 4594 | /* Allocate the first page for all buffers */ |
4330 | for_each_tracing_cpu(i) { | 4595 | for_each_tracing_cpu(i) { |
4331 | data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); | 4596 | global_trace.data[i] = &per_cpu(global_trace_cpu, i); |
4332 | max_tr.data[i] = &per_cpu(max_data, i); | 4597 | max_tr.data[i] = &per_cpu(max_tr_data, i); |
4333 | } | 4598 | } |
4334 | 4599 | ||
4335 | trace_init_cmdlines(); | 4600 | trace_init_cmdlines(); |
4336 | 4601 | ||
4337 | register_tracer(&nop_trace); | 4602 | register_tracer(&nop_trace); |
4338 | current_trace = &nop_trace; | 4603 | current_trace = &nop_trace; |
4339 | #ifdef CONFIG_BOOT_TRACER | ||
4340 | register_tracer(&boot_tracer); | ||
4341 | #endif | ||
4342 | /* All seems OK, enable tracing */ | 4604 | /* All seems OK, enable tracing */ |
4343 | tracing_disabled = 0; | 4605 | tracing_disabled = 0; |
4344 | 4606 | ||
@@ -4350,8 +4612,6 @@ __init static int tracer_alloc_buffers(void) | |||
4350 | return 0; | 4612 | return 0; |
4351 | 4613 | ||
4352 | out_free_cpumask: | 4614 | out_free_cpumask: |
4353 | free_cpumask_var(tracing_reader_cpumask); | ||
4354 | out_free_tracing_cpumask: | ||
4355 | free_cpumask_var(tracing_cpumask); | 4615 | free_cpumask_var(tracing_cpumask); |
4356 | out_free_buffer_mask: | 4616 | out_free_buffer_mask: |
4357 | free_cpumask_var(tracing_buffer_mask); | 4617 | free_cpumask_var(tracing_buffer_mask); |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6e735d4771f8..9021f8c0c0c3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -7,11 +7,9 @@ | |||
7 | #include <linux/clocksource.h> | 7 | #include <linux/clocksource.h> |
8 | #include <linux/ring_buffer.h> | 8 | #include <linux/ring_buffer.h> |
9 | #include <linux/mmiotrace.h> | 9 | #include <linux/mmiotrace.h> |
10 | #include <linux/tracepoint.h> | ||
10 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
11 | #include <trace/boot.h> | 12 | #include <linux/hw_breakpoint.h> |
12 | #include <linux/kmemtrace.h> | ||
13 | #include <trace/power.h> | ||
14 | |||
15 | #include <linux/trace_seq.h> | 13 | #include <linux/trace_seq.h> |
16 | #include <linux/ftrace_event.h> | 14 | #include <linux/ftrace_event.h> |
17 | 15 | ||
@@ -24,177 +22,58 @@ enum trace_type { | |||
24 | TRACE_STACK, | 22 | TRACE_STACK, |
25 | TRACE_PRINT, | 23 | TRACE_PRINT, |
26 | TRACE_BPRINT, | 24 | TRACE_BPRINT, |
27 | TRACE_SPECIAL, | ||
28 | TRACE_MMIO_RW, | 25 | TRACE_MMIO_RW, |
29 | TRACE_MMIO_MAP, | 26 | TRACE_MMIO_MAP, |
30 | TRACE_BRANCH, | 27 | TRACE_BRANCH, |
31 | TRACE_BOOT_CALL, | ||
32 | TRACE_BOOT_RET, | ||
33 | TRACE_GRAPH_RET, | 28 | TRACE_GRAPH_RET, |
34 | TRACE_GRAPH_ENT, | 29 | TRACE_GRAPH_ENT, |
35 | TRACE_USER_STACK, | 30 | TRACE_USER_STACK, |
36 | TRACE_HW_BRANCHES, | ||
37 | TRACE_SYSCALL_ENTER, | ||
38 | TRACE_SYSCALL_EXIT, | ||
39 | TRACE_KMEM_ALLOC, | ||
40 | TRACE_KMEM_FREE, | ||
41 | TRACE_POWER, | ||
42 | TRACE_BLK, | 31 | TRACE_BLK, |
43 | 32 | ||
44 | __TRACE_LAST_TYPE, | 33 | __TRACE_LAST_TYPE, |
45 | }; | 34 | }; |
46 | 35 | ||
47 | /* | ||
48 | * Function trace entry - function address and parent function addres: | ||
49 | */ | ||
50 | struct ftrace_entry { | ||
51 | struct trace_entry ent; | ||
52 | unsigned long ip; | ||
53 | unsigned long parent_ip; | ||
54 | }; | ||
55 | |||
56 | /* Function call entry */ | ||
57 | struct ftrace_graph_ent_entry { | ||
58 | struct trace_entry ent; | ||
59 | struct ftrace_graph_ent graph_ent; | ||
60 | }; | ||
61 | |||
62 | /* Function return entry */ | ||
63 | struct ftrace_graph_ret_entry { | ||
64 | struct trace_entry ent; | ||
65 | struct ftrace_graph_ret ret; | ||
66 | }; | ||
67 | extern struct tracer boot_tracer; | ||
68 | |||
69 | /* | ||
70 | * Context switch trace entry - which task (and prio) we switched from/to: | ||
71 | */ | ||
72 | struct ctx_switch_entry { | ||
73 | struct trace_entry ent; | ||
74 | unsigned int prev_pid; | ||
75 | unsigned char prev_prio; | ||
76 | unsigned char prev_state; | ||
77 | unsigned int next_pid; | ||
78 | unsigned char next_prio; | ||
79 | unsigned char next_state; | ||
80 | unsigned int next_cpu; | ||
81 | }; | ||
82 | |||
83 | /* | ||
84 | * Special (free-form) trace entry: | ||
85 | */ | ||
86 | struct special_entry { | ||
87 | struct trace_entry ent; | ||
88 | unsigned long arg1; | ||
89 | unsigned long arg2; | ||
90 | unsigned long arg3; | ||
91 | }; | ||
92 | |||
93 | /* | ||
94 | * Stack-trace entry: | ||
95 | */ | ||
96 | |||
97 | #define FTRACE_STACK_ENTRIES 8 | ||
98 | |||
99 | struct stack_entry { | ||
100 | struct trace_entry ent; | ||
101 | unsigned long caller[FTRACE_STACK_ENTRIES]; | ||
102 | }; | ||
103 | |||
104 | struct userstack_entry { | ||
105 | struct trace_entry ent; | ||
106 | unsigned long caller[FTRACE_STACK_ENTRIES]; | ||
107 | }; | ||
108 | |||
109 | /* | ||
110 | * trace_printk entry: | ||
111 | */ | ||
112 | struct bprint_entry { | ||
113 | struct trace_entry ent; | ||
114 | unsigned long ip; | ||
115 | const char *fmt; | ||
116 | u32 buf[]; | ||
117 | }; | ||
118 | |||
119 | struct print_entry { | ||
120 | struct trace_entry ent; | ||
121 | unsigned long ip; | ||
122 | char buf[]; | ||
123 | }; | ||
124 | |||
125 | #define TRACE_OLD_SIZE 88 | ||
126 | 36 | ||
127 | struct trace_field_cont { | 37 | #undef __field |
128 | unsigned char type; | 38 | #define __field(type, item) type item; |
129 | /* Temporary till we get rid of this completely */ | ||
130 | char buf[TRACE_OLD_SIZE - 1]; | ||
131 | }; | ||
132 | 39 | ||
133 | struct trace_mmiotrace_rw { | 40 | #undef __field_struct |
134 | struct trace_entry ent; | 41 | #define __field_struct(type, item) __field(type, item) |
135 | struct mmiotrace_rw rw; | ||
136 | }; | ||
137 | 42 | ||
138 | struct trace_mmiotrace_map { | 43 | #undef __field_desc |
139 | struct trace_entry ent; | 44 | #define __field_desc(type, container, item) |
140 | struct mmiotrace_map map; | ||
141 | }; | ||
142 | 45 | ||
143 | struct trace_boot_call { | 46 | #undef __array |
144 | struct trace_entry ent; | 47 | #define __array(type, item, size) type item[size]; |
145 | struct boot_trace_call boot_call; | ||
146 | }; | ||
147 | 48 | ||
148 | struct trace_boot_ret { | 49 | #undef __array_desc |
149 | struct trace_entry ent; | 50 | #define __array_desc(type, container, item, size) |
150 | struct boot_trace_ret boot_ret; | ||
151 | }; | ||
152 | 51 | ||
153 | #define TRACE_FUNC_SIZE 30 | 52 | #undef __dynamic_array |
154 | #define TRACE_FILE_SIZE 20 | 53 | #define __dynamic_array(type, item) type item[]; |
155 | struct trace_branch { | ||
156 | struct trace_entry ent; | ||
157 | unsigned line; | ||
158 | char func[TRACE_FUNC_SIZE+1]; | ||
159 | char file[TRACE_FILE_SIZE+1]; | ||
160 | char correct; | ||
161 | }; | ||
162 | 54 | ||
163 | struct hw_branch_entry { | 55 | #undef F_STRUCT |
164 | struct trace_entry ent; | 56 | #define F_STRUCT(args...) args |
165 | u64 from; | ||
166 | u64 to; | ||
167 | }; | ||
168 | 57 | ||
169 | struct trace_power { | 58 | #undef FTRACE_ENTRY |
170 | struct trace_entry ent; | 59 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ |
171 | struct power_trace state_data; | 60 | struct struct_name { \ |
172 | }; | 61 | struct trace_entry ent; \ |
62 | tstruct \ | ||
63 | } | ||
173 | 64 | ||
174 | enum kmemtrace_type_id { | 65 | #undef TP_ARGS |
175 | KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ | 66 | #define TP_ARGS(args...) args |
176 | KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ | ||
177 | KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ | ||
178 | }; | ||
179 | 67 | ||
180 | struct kmemtrace_alloc_entry { | 68 | #undef FTRACE_ENTRY_DUP |
181 | struct trace_entry ent; | 69 | #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) |
182 | enum kmemtrace_type_id type_id; | ||
183 | unsigned long call_site; | ||
184 | const void *ptr; | ||
185 | size_t bytes_req; | ||
186 | size_t bytes_alloc; | ||
187 | gfp_t gfp_flags; | ||
188 | int node; | ||
189 | }; | ||
190 | 70 | ||
191 | struct kmemtrace_free_entry { | 71 | #include "trace_entries.h" |
192 | struct trace_entry ent; | ||
193 | enum kmemtrace_type_id type_id; | ||
194 | unsigned long call_site; | ||
195 | const void *ptr; | ||
196 | }; | ||
197 | 72 | ||
73 | /* | ||
74 | * syscalls are special, and need special handling, this is why | ||
75 | * they are not included in trace_entries.h | ||
76 | */ | ||
198 | struct syscall_trace_enter { | 77 | struct syscall_trace_enter { |
199 | struct trace_entry ent; | 78 | struct trace_entry ent; |
200 | int nr; | 79 | int nr; |
@@ -204,16 +83,26 @@ struct syscall_trace_enter { | |||
204 | struct syscall_trace_exit { | 83 | struct syscall_trace_exit { |
205 | struct trace_entry ent; | 84 | struct trace_entry ent; |
206 | int nr; | 85 | int nr; |
207 | unsigned long ret; | 86 | long ret; |
208 | }; | 87 | }; |
209 | 88 | ||
89 | struct kprobe_trace_entry_head { | ||
90 | struct trace_entry ent; | ||
91 | unsigned long ip; | ||
92 | }; | ||
93 | |||
94 | struct kretprobe_trace_entry_head { | ||
95 | struct trace_entry ent; | ||
96 | unsigned long func; | ||
97 | unsigned long ret_ip; | ||
98 | }; | ||
210 | 99 | ||
211 | /* | 100 | /* |
212 | * trace_flag_type is an enumeration that holds different | 101 | * trace_flag_type is an enumeration that holds different |
213 | * states when a trace occurs. These are: | 102 | * states when a trace occurs. These are: |
214 | * IRQS_OFF - interrupts were disabled | 103 | * IRQS_OFF - interrupts were disabled |
215 | * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags | 104 | * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags |
216 | * NEED_RESCED - reschedule is requested | 105 | * NEED_RESCHED - reschedule is requested |
217 | * HARDIRQ - inside an interrupt handler | 106 | * HARDIRQ - inside an interrupt handler |
218 | * SOFTIRQ - inside a softirq handler | 107 | * SOFTIRQ - inside a softirq handler |
219 | */ | 108 | */ |
@@ -236,9 +125,6 @@ struct trace_array_cpu { | |||
236 | atomic_t disabled; | 125 | atomic_t disabled; |
237 | void *buffer_page; /* ring buffer spare */ | 126 | void *buffer_page; /* ring buffer spare */ |
238 | 127 | ||
239 | /* these fields get copied into max-trace: */ | ||
240 | unsigned long trace_idx; | ||
241 | unsigned long overrun; | ||
242 | unsigned long saved_latency; | 128 | unsigned long saved_latency; |
243 | unsigned long critical_start; | 129 | unsigned long critical_start; |
244 | unsigned long critical_end; | 130 | unsigned long critical_end; |
@@ -246,6 +132,7 @@ struct trace_array_cpu { | |||
246 | unsigned long nice; | 132 | unsigned long nice; |
247 | unsigned long policy; | 133 | unsigned long policy; |
248 | unsigned long rt_priority; | 134 | unsigned long rt_priority; |
135 | unsigned long skipped_entries; | ||
249 | cycle_t preempt_timestamp; | 136 | cycle_t preempt_timestamp; |
250 | pid_t pid; | 137 | pid_t pid; |
251 | uid_t uid; | 138 | uid_t uid; |
@@ -301,28 +188,15 @@ extern void __ftrace_bad_type(void); | |||
301 | IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ | 188 | IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ |
302 | IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ | 189 | IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ |
303 | IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ | 190 | IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ |
304 | IF_ASSIGN(var, ent, struct special_entry, 0); \ | ||
305 | IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ | 191 | IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ |
306 | TRACE_MMIO_RW); \ | 192 | TRACE_MMIO_RW); \ |
307 | IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ | 193 | IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ |
308 | TRACE_MMIO_MAP); \ | 194 | TRACE_MMIO_MAP); \ |
309 | IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ | ||
310 | IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ | ||
311 | IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ | 195 | IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ |
312 | IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ | 196 | IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ |
313 | TRACE_GRAPH_ENT); \ | 197 | TRACE_GRAPH_ENT); \ |
314 | IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ | 198 | IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ |
315 | TRACE_GRAPH_RET); \ | 199 | TRACE_GRAPH_RET); \ |
316 | IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ | ||
317 | IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ | ||
318 | IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ | ||
319 | TRACE_KMEM_ALLOC); \ | ||
320 | IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ | ||
321 | TRACE_KMEM_FREE); \ | ||
322 | IF_ASSIGN(var, ent, struct syscall_trace_enter, \ | ||
323 | TRACE_SYSCALL_ENTER); \ | ||
324 | IF_ASSIGN(var, ent, struct syscall_trace_exit, \ | ||
325 | TRACE_SYSCALL_EXIT); \ | ||
326 | __ftrace_bad_type(); \ | 200 | __ftrace_bad_type(); \ |
327 | } while (0) | 201 | } while (0) |
328 | 202 | ||
@@ -360,6 +234,7 @@ struct tracer_flags { | |||
360 | * @pipe_open: called when the trace_pipe file is opened | 234 | * @pipe_open: called when the trace_pipe file is opened |
361 | * @wait_pipe: override how the user waits for traces on trace_pipe | 235 | * @wait_pipe: override how the user waits for traces on trace_pipe |
362 | * @close: called when the trace file is released | 236 | * @close: called when the trace file is released |
237 | * @pipe_close: called when the trace_pipe file is released | ||
363 | * @read: override the default read callback on trace_pipe | 238 | * @read: override the default read callback on trace_pipe |
364 | * @splice_read: override the default splice_read callback on trace_pipe | 239 | * @splice_read: override the default splice_read callback on trace_pipe |
365 | * @selftest: selftest to run on boot (see trace_selftest.c) | 240 | * @selftest: selftest to run on boot (see trace_selftest.c) |
@@ -378,6 +253,7 @@ struct tracer { | |||
378 | void (*pipe_open)(struct trace_iterator *iter); | 253 | void (*pipe_open)(struct trace_iterator *iter); |
379 | void (*wait_pipe)(struct trace_iterator *iter); | 254 | void (*wait_pipe)(struct trace_iterator *iter); |
380 | void (*close)(struct trace_iterator *iter); | 255 | void (*close)(struct trace_iterator *iter); |
256 | void (*pipe_close)(struct trace_iterator *iter); | ||
381 | ssize_t (*read)(struct trace_iterator *iter, | 257 | ssize_t (*read)(struct trace_iterator *iter, |
382 | struct file *filp, char __user *ubuf, | 258 | struct file *filp, char __user *ubuf, |
383 | size_t cnt, loff_t *ppos); | 259 | size_t cnt, loff_t *ppos); |
@@ -398,7 +274,7 @@ struct tracer { | |||
398 | struct tracer *next; | 274 | struct tracer *next; |
399 | int print_max; | 275 | int print_max; |
400 | struct tracer_flags *flags; | 276 | struct tracer_flags *flags; |
401 | struct tracer_stat *stats; | 277 | int use_max_tr; |
402 | }; | 278 | }; |
403 | 279 | ||
404 | 280 | ||
@@ -419,16 +295,16 @@ struct dentry *trace_create_file(const char *name, | |||
419 | const struct file_operations *fops); | 295 | const struct file_operations *fops); |
420 | 296 | ||
421 | struct dentry *tracing_init_dentry(void); | 297 | struct dentry *tracing_init_dentry(void); |
422 | void init_tracer_sysprof_debugfs(struct dentry *d_tracer); | ||
423 | 298 | ||
424 | struct ring_buffer_event; | 299 | struct ring_buffer_event; |
425 | 300 | ||
426 | struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, | 301 | struct ring_buffer_event * |
427 | int type, | 302 | trace_buffer_lock_reserve(struct ring_buffer *buffer, |
428 | unsigned long len, | 303 | int type, |
429 | unsigned long flags, | 304 | unsigned long len, |
430 | int pc); | 305 | unsigned long flags, |
431 | void trace_buffer_unlock_commit(struct trace_array *tr, | 306 | int pc); |
307 | void trace_buffer_unlock_commit(struct ring_buffer *buffer, | ||
432 | struct ring_buffer_event *event, | 308 | struct ring_buffer_event *event, |
433 | unsigned long flags, int pc); | 309 | unsigned long flags, int pc); |
434 | 310 | ||
@@ -438,9 +314,13 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, | |||
438 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, | 314 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, |
439 | int *ent_cpu, u64 *ent_ts); | 315 | int *ent_cpu, u64 *ent_ts); |
440 | 316 | ||
441 | void tracing_generic_entry_update(struct trace_entry *entry, | 317 | int trace_empty(struct trace_iterator *iter); |
442 | unsigned long flags, | 318 | |
443 | int pc); | 319 | void *trace_find_next_entry_inc(struct trace_iterator *iter); |
320 | |||
321 | void trace_init_global_iter(struct trace_iterator *iter); | ||
322 | |||
323 | void tracing_iter_reset(struct trace_iterator *iter, int cpu); | ||
444 | 324 | ||
445 | void default_wait_pipe(struct trace_iterator *iter); | 325 | void default_wait_pipe(struct trace_iterator *iter); |
446 | void poll_wait_pipe(struct trace_iterator *iter); | 326 | void poll_wait_pipe(struct trace_iterator *iter); |
@@ -459,18 +339,21 @@ void tracing_sched_wakeup_trace(struct trace_array *tr, | |||
459 | struct task_struct *wakee, | 339 | struct task_struct *wakee, |
460 | struct task_struct *cur, | 340 | struct task_struct *cur, |
461 | unsigned long flags, int pc); | 341 | unsigned long flags, int pc); |
462 | void trace_special(struct trace_array *tr, | ||
463 | struct trace_array_cpu *data, | ||
464 | unsigned long arg1, | ||
465 | unsigned long arg2, | ||
466 | unsigned long arg3, int pc); | ||
467 | void trace_function(struct trace_array *tr, | 342 | void trace_function(struct trace_array *tr, |
468 | unsigned long ip, | 343 | unsigned long ip, |
469 | unsigned long parent_ip, | 344 | unsigned long parent_ip, |
470 | unsigned long flags, int pc); | 345 | unsigned long flags, int pc); |
346 | void trace_graph_function(struct trace_array *tr, | ||
347 | unsigned long ip, | ||
348 | unsigned long parent_ip, | ||
349 | unsigned long flags, int pc); | ||
350 | void trace_default_header(struct seq_file *m); | ||
351 | void print_trace_header(struct seq_file *m, struct trace_iterator *iter); | ||
352 | int trace_empty(struct trace_iterator *iter); | ||
471 | 353 | ||
472 | void trace_graph_return(struct ftrace_graph_ret *trace); | 354 | void trace_graph_return(struct ftrace_graph_ret *trace); |
473 | int trace_graph_entry(struct ftrace_graph_ent *trace); | 355 | int trace_graph_entry(struct ftrace_graph_ent *trace); |
356 | void set_graph_array(struct trace_array *tr); | ||
474 | 357 | ||
475 | void tracing_start_cmdline_record(void); | 358 | void tracing_start_cmdline_record(void); |
476 | void tracing_stop_cmdline_record(void); | 359 | void tracing_stop_cmdline_record(void); |
@@ -479,35 +362,56 @@ void tracing_stop_sched_switch_record(void); | |||
479 | void tracing_start_sched_switch_record(void); | 362 | void tracing_start_sched_switch_record(void); |
480 | int register_tracer(struct tracer *type); | 363 | int register_tracer(struct tracer *type); |
481 | void unregister_tracer(struct tracer *type); | 364 | void unregister_tracer(struct tracer *type); |
365 | int is_tracing_stopped(void); | ||
366 | enum trace_file_type { | ||
367 | TRACE_FILE_LAT_FMT = 1, | ||
368 | TRACE_FILE_ANNOTATE = 2, | ||
369 | }; | ||
370 | |||
371 | extern cpumask_var_t __read_mostly tracing_buffer_mask; | ||
372 | |||
373 | #define for_each_tracing_cpu(cpu) \ | ||
374 | for_each_cpu(cpu, tracing_buffer_mask) | ||
482 | 375 | ||
483 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); | 376 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); |
484 | 377 | ||
485 | extern unsigned long tracing_max_latency; | ||
486 | extern unsigned long tracing_thresh; | 378 | extern unsigned long tracing_thresh; |
487 | 379 | ||
380 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
381 | extern unsigned long tracing_max_latency; | ||
382 | |||
488 | void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); | 383 | void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); |
489 | void update_max_tr_single(struct trace_array *tr, | 384 | void update_max_tr_single(struct trace_array *tr, |
490 | struct task_struct *tsk, int cpu); | 385 | struct task_struct *tsk, int cpu); |
386 | #endif /* CONFIG_TRACER_MAX_TRACE */ | ||
491 | 387 | ||
492 | void __trace_stack(struct trace_array *tr, | 388 | #ifdef CONFIG_STACKTRACE |
493 | unsigned long flags, | 389 | void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, |
494 | int skip, int pc); | 390 | int skip, int pc); |
495 | 391 | ||
496 | extern cycle_t ftrace_now(int cpu); | 392 | void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, |
393 | int pc); | ||
497 | 394 | ||
498 | #ifdef CONFIG_CONTEXT_SWITCH_TRACER | 395 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, |
499 | typedef void | 396 | int pc); |
500 | (*tracer_switch_func_t)(void *private, | 397 | #else |
501 | void *__rq, | 398 | static inline void ftrace_trace_stack(struct ring_buffer *buffer, |
502 | struct task_struct *prev, | 399 | unsigned long flags, int skip, int pc) |
503 | struct task_struct *next); | 400 | { |
504 | 401 | } | |
505 | struct tracer_switch_ops { | 402 | |
506 | tracer_switch_func_t func; | 403 | static inline void ftrace_trace_userstack(struct ring_buffer *buffer, |
507 | void *private; | 404 | unsigned long flags, int pc) |
508 | struct tracer_switch_ops *next; | 405 | { |
509 | }; | 406 | } |
510 | #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ | 407 | |
408 | static inline void __trace_stack(struct trace_array *tr, unsigned long flags, | ||
409 | int skip, int pc) | ||
410 | { | ||
411 | } | ||
412 | #endif /* CONFIG_STACKTRACE */ | ||
413 | |||
414 | extern cycle_t ftrace_now(int cpu); | ||
511 | 415 | ||
512 | extern void trace_find_cmdline(int pid, char comm[]); | 416 | extern void trace_find_cmdline(int pid, char comm[]); |
513 | 417 | ||
@@ -517,6 +421,10 @@ extern unsigned long ftrace_update_tot_cnt; | |||
517 | extern int DYN_FTRACE_TEST_NAME(void); | 421 | extern int DYN_FTRACE_TEST_NAME(void); |
518 | #endif | 422 | #endif |
519 | 423 | ||
424 | extern int ring_buffer_expanded; | ||
425 | extern bool tracing_selftest_disabled; | ||
426 | DECLARE_PER_CPU(int, ftrace_cpu_disabled); | ||
427 | |||
520 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 428 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
521 | extern int trace_selftest_startup_function(struct tracer *trace, | 429 | extern int trace_selftest_startup_function(struct tracer *trace, |
522 | struct trace_array *tr); | 430 | struct trace_array *tr); |
@@ -534,12 +442,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace, | |||
534 | struct trace_array *tr); | 442 | struct trace_array *tr); |
535 | extern int trace_selftest_startup_sched_switch(struct tracer *trace, | 443 | extern int trace_selftest_startup_sched_switch(struct tracer *trace, |
536 | struct trace_array *tr); | 444 | struct trace_array *tr); |
537 | extern int trace_selftest_startup_sysprof(struct tracer *trace, | ||
538 | struct trace_array *tr); | ||
539 | extern int trace_selftest_startup_branch(struct tracer *trace, | 445 | extern int trace_selftest_startup_branch(struct tracer *trace, |
540 | struct trace_array *tr); | 446 | struct trace_array *tr); |
541 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, | ||
542 | struct trace_array *tr); | ||
543 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 447 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
544 | 448 | ||
545 | extern void *head_page(struct trace_array_cpu *data); | 449 | extern void *head_page(struct trace_array_cpu *data); |
@@ -548,18 +452,48 @@ extern int | |||
548 | trace_vbprintk(unsigned long ip, const char *fmt, va_list args); | 452 | trace_vbprintk(unsigned long ip, const char *fmt, va_list args); |
549 | extern int | 453 | extern int |
550 | trace_vprintk(unsigned long ip, const char *fmt, va_list args); | 454 | trace_vprintk(unsigned long ip, const char *fmt, va_list args); |
455 | extern int | ||
456 | trace_array_vprintk(struct trace_array *tr, | ||
457 | unsigned long ip, const char *fmt, va_list args); | ||
458 | int trace_array_printk(struct trace_array *tr, | ||
459 | unsigned long ip, const char *fmt, ...); | ||
460 | void trace_printk_seq(struct trace_seq *s); | ||
461 | enum print_line_t print_trace_line(struct trace_iterator *iter); | ||
551 | 462 | ||
552 | extern unsigned long trace_flags; | 463 | extern unsigned long trace_flags; |
553 | 464 | ||
465 | extern int trace_clock_id; | ||
466 | |||
554 | /* Standard output formatting function used for function return traces */ | 467 | /* Standard output formatting function used for function return traces */ |
555 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 468 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
556 | extern enum print_line_t print_graph_function(struct trace_iterator *iter); | 469 | |
470 | /* Flag options */ | ||
471 | #define TRACE_GRAPH_PRINT_OVERRUN 0x1 | ||
472 | #define TRACE_GRAPH_PRINT_CPU 0x2 | ||
473 | #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 | ||
474 | #define TRACE_GRAPH_PRINT_PROC 0x8 | ||
475 | #define TRACE_GRAPH_PRINT_DURATION 0x10 | ||
476 | #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 | ||
477 | |||
478 | extern enum print_line_t | ||
479 | print_graph_function_flags(struct trace_iterator *iter, u32 flags); | ||
480 | extern void print_graph_headers_flags(struct seq_file *s, u32 flags); | ||
557 | extern enum print_line_t | 481 | extern enum print_line_t |
558 | trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); | 482 | trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); |
483 | extern void graph_trace_open(struct trace_iterator *iter); | ||
484 | extern void graph_trace_close(struct trace_iterator *iter); | ||
485 | extern int __trace_graph_entry(struct trace_array *tr, | ||
486 | struct ftrace_graph_ent *trace, | ||
487 | unsigned long flags, int pc); | ||
488 | extern void __trace_graph_return(struct trace_array *tr, | ||
489 | struct ftrace_graph_ret *trace, | ||
490 | unsigned long flags, int pc); | ||
491 | |||
559 | 492 | ||
560 | #ifdef CONFIG_DYNAMIC_FTRACE | 493 | #ifdef CONFIG_DYNAMIC_FTRACE |
561 | /* TODO: make this variable */ | 494 | /* TODO: make this variable */ |
562 | #define FTRACE_GRAPH_MAX_FUNCS 32 | 495 | #define FTRACE_GRAPH_MAX_FUNCS 32 |
496 | extern int ftrace_graph_filter_enabled; | ||
563 | extern int ftrace_graph_count; | 497 | extern int ftrace_graph_count; |
564 | extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; | 498 | extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; |
565 | 499 | ||
@@ -567,7 +501,7 @@ static inline int ftrace_graph_addr(unsigned long addr) | |||
567 | { | 501 | { |
568 | int i; | 502 | int i; |
569 | 503 | ||
570 | if (!ftrace_graph_count || test_tsk_trace_graph(current)) | 504 | if (!ftrace_graph_filter_enabled) |
571 | return 1; | 505 | return 1; |
572 | 506 | ||
573 | for (i = 0; i < ftrace_graph_count; i++) { | 507 | for (i = 0; i < ftrace_graph_count; i++) { |
@@ -578,10 +512,6 @@ static inline int ftrace_graph_addr(unsigned long addr) | |||
578 | return 0; | 512 | return 0; |
579 | } | 513 | } |
580 | #else | 514 | #else |
581 | static inline int ftrace_trace_addr(unsigned long addr) | ||
582 | { | ||
583 | return 1; | ||
584 | } | ||
585 | static inline int ftrace_graph_addr(unsigned long addr) | 515 | static inline int ftrace_graph_addr(unsigned long addr) |
586 | { | 516 | { |
587 | return 1; | 517 | return 1; |
@@ -589,21 +519,63 @@ static inline int ftrace_graph_addr(unsigned long addr) | |||
589 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 519 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
590 | #else /* CONFIG_FUNCTION_GRAPH_TRACER */ | 520 | #else /* CONFIG_FUNCTION_GRAPH_TRACER */ |
591 | static inline enum print_line_t | 521 | static inline enum print_line_t |
592 | print_graph_function(struct trace_iterator *iter) | 522 | print_graph_function_flags(struct trace_iterator *iter, u32 flags) |
593 | { | 523 | { |
594 | return TRACE_TYPE_UNHANDLED; | 524 | return TRACE_TYPE_UNHANDLED; |
595 | } | 525 | } |
596 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 526 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
597 | 527 | ||
598 | extern struct pid *ftrace_pid_trace; | 528 | extern struct list_head ftrace_pids; |
599 | 529 | ||
530 | #ifdef CONFIG_FUNCTION_TRACER | ||
600 | static inline int ftrace_trace_task(struct task_struct *task) | 531 | static inline int ftrace_trace_task(struct task_struct *task) |
601 | { | 532 | { |
602 | if (!ftrace_pid_trace) | 533 | if (list_empty(&ftrace_pids)) |
603 | return 1; | 534 | return 1; |
604 | 535 | ||
605 | return test_tsk_trace_trace(task); | 536 | return test_tsk_trace_trace(task); |
606 | } | 537 | } |
538 | #else | ||
539 | static inline int ftrace_trace_task(struct task_struct *task) | ||
540 | { | ||
541 | return 1; | ||
542 | } | ||
543 | #endif | ||
544 | |||
545 | /* | ||
546 | * struct trace_parser - servers for reading the user input separated by spaces | ||
547 | * @cont: set if the input is not complete - no final space char was found | ||
548 | * @buffer: holds the parsed user input | ||
549 | * @idx: user input length | ||
550 | * @size: buffer size | ||
551 | */ | ||
552 | struct trace_parser { | ||
553 | bool cont; | ||
554 | char *buffer; | ||
555 | unsigned idx; | ||
556 | unsigned size; | ||
557 | }; | ||
558 | |||
559 | static inline bool trace_parser_loaded(struct trace_parser *parser) | ||
560 | { | ||
561 | return (parser->idx != 0); | ||
562 | } | ||
563 | |||
564 | static inline bool trace_parser_cont(struct trace_parser *parser) | ||
565 | { | ||
566 | return parser->cont; | ||
567 | } | ||
568 | |||
569 | static inline void trace_parser_clear(struct trace_parser *parser) | ||
570 | { | ||
571 | parser->cont = false; | ||
572 | parser->idx = 0; | ||
573 | } | ||
574 | |||
575 | extern int trace_parser_get_init(struct trace_parser *parser, int size); | ||
576 | extern void trace_parser_put(struct trace_parser *parser); | ||
577 | extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, | ||
578 | size_t cnt, loff_t *ppos); | ||
607 | 579 | ||
608 | /* | 580 | /* |
609 | * trace_iterator_flags is an enumeration that defines bit | 581 | * trace_iterator_flags is an enumeration that defines bit |
@@ -622,19 +594,18 @@ enum trace_iterator_flags { | |||
622 | TRACE_ITER_BIN = 0x40, | 594 | TRACE_ITER_BIN = 0x40, |
623 | TRACE_ITER_BLOCK = 0x80, | 595 | TRACE_ITER_BLOCK = 0x80, |
624 | TRACE_ITER_STACKTRACE = 0x100, | 596 | TRACE_ITER_STACKTRACE = 0x100, |
625 | TRACE_ITER_SCHED_TREE = 0x200, | 597 | TRACE_ITER_PRINTK = 0x200, |
626 | TRACE_ITER_PRINTK = 0x400, | 598 | TRACE_ITER_PREEMPTONLY = 0x400, |
627 | TRACE_ITER_PREEMPTONLY = 0x800, | 599 | TRACE_ITER_BRANCH = 0x800, |
628 | TRACE_ITER_BRANCH = 0x1000, | 600 | TRACE_ITER_ANNOTATE = 0x1000, |
629 | TRACE_ITER_ANNOTATE = 0x2000, | 601 | TRACE_ITER_USERSTACKTRACE = 0x2000, |
630 | TRACE_ITER_USERSTACKTRACE = 0x4000, | 602 | TRACE_ITER_SYM_USEROBJ = 0x4000, |
631 | TRACE_ITER_SYM_USEROBJ = 0x8000, | 603 | TRACE_ITER_PRINTK_MSGONLY = 0x8000, |
632 | TRACE_ITER_PRINTK_MSGONLY = 0x10000, | 604 | TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */ |
633 | TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ | 605 | TRACE_ITER_LATENCY_FMT = 0x20000, |
634 | TRACE_ITER_LATENCY_FMT = 0x40000, | 606 | TRACE_ITER_SLEEP_TIME = 0x40000, |
635 | TRACE_ITER_GLOBAL_CLK = 0x80000, | 607 | TRACE_ITER_GRAPH_TIME = 0x80000, |
636 | TRACE_ITER_SLEEP_TIME = 0x100000, | 608 | TRACE_ITER_RECORD_CMD = 0x100000, |
637 | TRACE_ITER_GRAPH_TIME = 0x200000, | ||
638 | }; | 609 | }; |
639 | 610 | ||
640 | /* | 611 | /* |
@@ -646,54 +617,6 @@ enum trace_iterator_flags { | |||
646 | 617 | ||
647 | extern struct tracer nop_trace; | 618 | extern struct tracer nop_trace; |
648 | 619 | ||
649 | /** | ||
650 | * ftrace_preempt_disable - disable preemption scheduler safe | ||
651 | * | ||
652 | * When tracing can happen inside the scheduler, there exists | ||
653 | * cases that the tracing might happen before the need_resched | ||
654 | * flag is checked. If this happens and the tracer calls | ||
655 | * preempt_enable (after a disable), a schedule might take place | ||
656 | * causing an infinite recursion. | ||
657 | * | ||
658 | * To prevent this, we read the need_resched flag before | ||
659 | * disabling preemption. When we want to enable preemption we | ||
660 | * check the flag, if it is set, then we call preempt_enable_no_resched. | ||
661 | * Otherwise, we call preempt_enable. | ||
662 | * | ||
663 | * The rational for doing the above is that if need_resched is set | ||
664 | * and we have yet to reschedule, we are either in an atomic location | ||
665 | * (where we do not need to check for scheduling) or we are inside | ||
666 | * the scheduler and do not want to resched. | ||
667 | */ | ||
668 | static inline int ftrace_preempt_disable(void) | ||
669 | { | ||
670 | int resched; | ||
671 | |||
672 | resched = need_resched(); | ||
673 | preempt_disable_notrace(); | ||
674 | |||
675 | return resched; | ||
676 | } | ||
677 | |||
678 | /** | ||
679 | * ftrace_preempt_enable - enable preemption scheduler safe | ||
680 | * @resched: the return value from ftrace_preempt_disable | ||
681 | * | ||
682 | * This is a scheduler safe way to enable preemption and not miss | ||
683 | * any preemption checks. The disabled saved the state of preemption. | ||
684 | * If resched is set, then we are either inside an atomic or | ||
685 | * are inside the scheduler (we would have already scheduled | ||
686 | * otherwise). In this case, we do not want to call normal | ||
687 | * preempt_enable, but preempt_enable_no_resched instead. | ||
688 | */ | ||
689 | static inline void ftrace_preempt_enable(int resched) | ||
690 | { | ||
691 | if (resched) | ||
692 | preempt_enable_no_resched_notrace(); | ||
693 | else | ||
694 | preempt_enable_notrace(); | ||
695 | } | ||
696 | |||
697 | #ifdef CONFIG_BRANCH_TRACER | 620 | #ifdef CONFIG_BRANCH_TRACER |
698 | extern int enable_branch_tracing(struct trace_array *tr); | 621 | extern int enable_branch_tracing(struct trace_array *tr); |
699 | extern void disable_branch_tracing(void); | 622 | extern void disable_branch_tracing(void); |
@@ -731,6 +654,7 @@ struct ftrace_event_field { | |||
731 | struct list_head link; | 654 | struct list_head link; |
732 | char *name; | 655 | char *name; |
733 | char *type; | 656 | char *type; |
657 | int filter_type; | ||
734 | int offset; | 658 | int offset; |
735 | int size; | 659 | int size; |
736 | int is_signed; | 660 | int is_signed; |
@@ -746,26 +670,47 @@ struct event_subsystem { | |||
746 | struct list_head list; | 670 | struct list_head list; |
747 | const char *name; | 671 | const char *name; |
748 | struct dentry *entry; | 672 | struct dentry *entry; |
749 | void *filter; | 673 | struct event_filter *filter; |
674 | int nr_events; | ||
750 | }; | 675 | }; |
751 | 676 | ||
752 | struct filter_pred; | 677 | struct filter_pred; |
678 | struct regex; | ||
753 | 679 | ||
754 | typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, | 680 | typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, |
755 | int val1, int val2); | 681 | int val1, int val2); |
756 | 682 | ||
683 | typedef int (*regex_match_func)(char *str, struct regex *r, int len); | ||
684 | |||
685 | enum regex_type { | ||
686 | MATCH_FULL = 0, | ||
687 | MATCH_FRONT_ONLY, | ||
688 | MATCH_MIDDLE_ONLY, | ||
689 | MATCH_END_ONLY, | ||
690 | }; | ||
691 | |||
692 | struct regex { | ||
693 | char pattern[MAX_FILTER_STR_VAL]; | ||
694 | int len; | ||
695 | int field_len; | ||
696 | regex_match_func match; | ||
697 | }; | ||
698 | |||
757 | struct filter_pred { | 699 | struct filter_pred { |
758 | filter_pred_fn_t fn; | 700 | filter_pred_fn_t fn; |
759 | u64 val; | 701 | u64 val; |
760 | char str_val[MAX_FILTER_STR_VAL]; | 702 | struct regex regex; |
761 | int str_len; | 703 | char *field_name; |
762 | char *field_name; | 704 | int offset; |
763 | int offset; | 705 | int not; |
764 | int not; | 706 | int op; |
765 | int op; | 707 | int pop_n; |
766 | int pop_n; | ||
767 | }; | 708 | }; |
768 | 709 | ||
710 | extern struct list_head ftrace_common_fields; | ||
711 | |||
712 | extern enum regex_type | ||
713 | filter_parse_regex(char *buff, int len, char **search, int *not); | ||
769 | extern void print_event_filter(struct ftrace_event_call *call, | 714 | extern void print_event_filter(struct ftrace_event_call *call, |
770 | struct trace_seq *s); | 715 | struct trace_seq *s); |
771 | extern int apply_event_filter(struct ftrace_event_call *call, | 716 | extern int apply_event_filter(struct ftrace_event_call *call, |
@@ -774,13 +719,18 @@ extern int apply_subsystem_event_filter(struct event_subsystem *system, | |||
774 | char *filter_string); | 719 | char *filter_string); |
775 | extern void print_subsystem_event_filter(struct event_subsystem *system, | 720 | extern void print_subsystem_event_filter(struct event_subsystem *system, |
776 | struct trace_seq *s); | 721 | struct trace_seq *s); |
722 | extern int filter_assign_type(const char *type); | ||
723 | |||
724 | struct list_head * | ||
725 | trace_get_fields(struct ftrace_event_call *event_call); | ||
777 | 726 | ||
778 | static inline int | 727 | static inline int |
779 | filter_check_discard(struct ftrace_event_call *call, void *rec, | 728 | filter_check_discard(struct ftrace_event_call *call, void *rec, |
780 | struct ring_buffer *buffer, | 729 | struct ring_buffer *buffer, |
781 | struct ring_buffer_event *event) | 730 | struct ring_buffer_event *event) |
782 | { | 731 | { |
783 | if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { | 732 | if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && |
733 | !filter_match_preds(call->filter, rec)) { | ||
784 | ring_buffer_discard_commit(buffer, event); | 734 | ring_buffer_discard_commit(buffer, event); |
785 | return 1; | 735 | return 1; |
786 | } | 736 | } |
@@ -788,46 +738,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, | |||
788 | return 0; | 738 | return 0; |
789 | } | 739 | } |
790 | 740 | ||
791 | #define DEFINE_COMPARISON_PRED(type) \ | 741 | extern void trace_event_enable_cmd_record(bool enable); |
792 | static int filter_pred_##type(struct filter_pred *pred, void *event, \ | ||
793 | int val1, int val2) \ | ||
794 | { \ | ||
795 | type *addr = (type *)(event + pred->offset); \ | ||
796 | type val = (type)pred->val; \ | ||
797 | int match = 0; \ | ||
798 | \ | ||
799 | switch (pred->op) { \ | ||
800 | case OP_LT: \ | ||
801 | match = (*addr < val); \ | ||
802 | break; \ | ||
803 | case OP_LE: \ | ||
804 | match = (*addr <= val); \ | ||
805 | break; \ | ||
806 | case OP_GT: \ | ||
807 | match = (*addr > val); \ | ||
808 | break; \ | ||
809 | case OP_GE: \ | ||
810 | match = (*addr >= val); \ | ||
811 | break; \ | ||
812 | default: \ | ||
813 | break; \ | ||
814 | } \ | ||
815 | \ | ||
816 | return match; \ | ||
817 | } | ||
818 | |||
819 | #define DEFINE_EQUALITY_PRED(size) \ | ||
820 | static int filter_pred_##size(struct filter_pred *pred, void *event, \ | ||
821 | int val1, int val2) \ | ||
822 | { \ | ||
823 | u##size *addr = (u##size *)(event + pred->offset); \ | ||
824 | u##size val = (u##size)pred->val; \ | ||
825 | int match; \ | ||
826 | \ | ||
827 | match = (val == *addr) ^ pred->not; \ | ||
828 | \ | ||
829 | return match; \ | ||
830 | } | ||
831 | 742 | ||
832 | extern struct mutex event_mutex; | 743 | extern struct mutex event_mutex; |
833 | extern struct list_head ftrace_events; | 744 | extern struct list_head ftrace_events; |
@@ -835,11 +746,13 @@ extern struct list_head ftrace_events; | |||
835 | extern const char *__start___trace_bprintk_fmt[]; | 746 | extern const char *__start___trace_bprintk_fmt[]; |
836 | extern const char *__stop___trace_bprintk_fmt[]; | 747 | extern const char *__stop___trace_bprintk_fmt[]; |
837 | 748 | ||
838 | #undef TRACE_EVENT_FORMAT | 749 | #undef FTRACE_ENTRY |
839 | #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ | 750 | #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ |
840 | extern struct ftrace_event_call event_##call; | 751 | extern struct ftrace_event_call \ |
841 | #undef TRACE_EVENT_FORMAT_NOFILTER | 752 | __attribute__((__aligned__(4))) event_##call; |
842 | #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) | 753 | #undef FTRACE_ENTRY_DUP |
843 | #include "trace_event_types.h" | 754 | #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ |
755 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) | ||
756 | #include "trace_entries.h" | ||
844 | 757 | ||
845 | #endif /* _LINUX_KERNEL_TRACE_H */ | 758 | #endif /* _LINUX_KERNEL_TRACE_H */ |
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c deleted file mode 100644 index a29ef23ffb47..000000000000 --- a/kernel/trace/trace_boot.c +++ /dev/null | |||
@@ -1,179 +0,0 @@ | |||
1 | /* | ||
2 | * ring buffer based initcalls tracer | ||
3 | * | ||
4 | * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #include <linux/init.h> | ||
9 | #include <linux/debugfs.h> | ||
10 | #include <linux/ftrace.h> | ||
11 | #include <linux/kallsyms.h> | ||
12 | #include <linux/time.h> | ||
13 | |||
14 | #include "trace.h" | ||
15 | #include "trace_output.h" | ||
16 | |||
17 | static struct trace_array *boot_trace; | ||
18 | static bool pre_initcalls_finished; | ||
19 | |||
20 | /* Tells the boot tracer that the pre_smp_initcalls are finished. | ||
21 | * So we are ready . | ||
22 | * It doesn't enable sched events tracing however. | ||
23 | * You have to call enable_boot_trace to do so. | ||
24 | */ | ||
25 | void start_boot_trace(void) | ||
26 | { | ||
27 | pre_initcalls_finished = true; | ||
28 | } | ||
29 | |||
30 | void enable_boot_trace(void) | ||
31 | { | ||
32 | if (boot_trace && pre_initcalls_finished) | ||
33 | tracing_start_sched_switch_record(); | ||
34 | } | ||
35 | |||
36 | void disable_boot_trace(void) | ||
37 | { | ||
38 | if (boot_trace && pre_initcalls_finished) | ||
39 | tracing_stop_sched_switch_record(); | ||
40 | } | ||
41 | |||
42 | static int boot_trace_init(struct trace_array *tr) | ||
43 | { | ||
44 | int cpu; | ||
45 | boot_trace = tr; | ||
46 | |||
47 | if (!tr) | ||
48 | return 0; | ||
49 | |||
50 | for_each_cpu(cpu, cpu_possible_mask) | ||
51 | tracing_reset(tr, cpu); | ||
52 | |||
53 | tracing_sched_switch_assign_trace(tr); | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static enum print_line_t | ||
58 | initcall_call_print_line(struct trace_iterator *iter) | ||
59 | { | ||
60 | struct trace_entry *entry = iter->ent; | ||
61 | struct trace_seq *s = &iter->seq; | ||
62 | struct trace_boot_call *field; | ||
63 | struct boot_trace_call *call; | ||
64 | u64 ts; | ||
65 | unsigned long nsec_rem; | ||
66 | int ret; | ||
67 | |||
68 | trace_assign_type(field, entry); | ||
69 | call = &field->boot_call; | ||
70 | ts = iter->ts; | ||
71 | nsec_rem = do_div(ts, NSEC_PER_SEC); | ||
72 | |||
73 | ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", | ||
74 | (unsigned long)ts, nsec_rem, call->func, call->caller); | ||
75 | |||
76 | if (!ret) | ||
77 | return TRACE_TYPE_PARTIAL_LINE; | ||
78 | else | ||
79 | return TRACE_TYPE_HANDLED; | ||
80 | } | ||
81 | |||
82 | static enum print_line_t | ||
83 | initcall_ret_print_line(struct trace_iterator *iter) | ||
84 | { | ||
85 | struct trace_entry *entry = iter->ent; | ||
86 | struct trace_seq *s = &iter->seq; | ||
87 | struct trace_boot_ret *field; | ||
88 | struct boot_trace_ret *init_ret; | ||
89 | u64 ts; | ||
90 | unsigned long nsec_rem; | ||
91 | int ret; | ||
92 | |||
93 | trace_assign_type(field, entry); | ||
94 | init_ret = &field->boot_ret; | ||
95 | ts = iter->ts; | ||
96 | nsec_rem = do_div(ts, NSEC_PER_SEC); | ||
97 | |||
98 | ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " | ||
99 | "returned %d after %llu msecs\n", | ||
100 | (unsigned long) ts, | ||
101 | nsec_rem, | ||
102 | init_ret->func, init_ret->result, init_ret->duration); | ||
103 | |||
104 | if (!ret) | ||
105 | return TRACE_TYPE_PARTIAL_LINE; | ||
106 | else | ||
107 | return TRACE_TYPE_HANDLED; | ||
108 | } | ||
109 | |||
110 | static enum print_line_t initcall_print_line(struct trace_iterator *iter) | ||
111 | { | ||
112 | struct trace_entry *entry = iter->ent; | ||
113 | |||
114 | switch (entry->type) { | ||
115 | case TRACE_BOOT_CALL: | ||
116 | return initcall_call_print_line(iter); | ||
117 | case TRACE_BOOT_RET: | ||
118 | return initcall_ret_print_line(iter); | ||
119 | default: | ||
120 | return TRACE_TYPE_UNHANDLED; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | struct tracer boot_tracer __read_mostly = | ||
125 | { | ||
126 | .name = "initcall", | ||
127 | .init = boot_trace_init, | ||
128 | .reset = tracing_reset_online_cpus, | ||
129 | .print_line = initcall_print_line, | ||
130 | }; | ||
131 | |||
132 | void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) | ||
133 | { | ||
134 | struct ring_buffer_event *event; | ||
135 | struct trace_boot_call *entry; | ||
136 | struct trace_array *tr = boot_trace; | ||
137 | |||
138 | if (!tr || !pre_initcalls_finished) | ||
139 | return; | ||
140 | |||
141 | /* Get its name now since this function could | ||
142 | * disappear because it is in the .init section. | ||
143 | */ | ||
144 | sprint_symbol(bt->func, (unsigned long)fn); | ||
145 | preempt_disable(); | ||
146 | |||
147 | event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL, | ||
148 | sizeof(*entry), 0, 0); | ||
149 | if (!event) | ||
150 | goto out; | ||
151 | entry = ring_buffer_event_data(event); | ||
152 | entry->boot_call = *bt; | ||
153 | trace_buffer_unlock_commit(tr, event, 0, 0); | ||
154 | out: | ||
155 | preempt_enable(); | ||
156 | } | ||
157 | |||
158 | void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) | ||
159 | { | ||
160 | struct ring_buffer_event *event; | ||
161 | struct trace_boot_ret *entry; | ||
162 | struct trace_array *tr = boot_trace; | ||
163 | |||
164 | if (!tr || !pre_initcalls_finished) | ||
165 | return; | ||
166 | |||
167 | sprint_symbol(bt->func, (unsigned long)fn); | ||
168 | preempt_disable(); | ||
169 | |||
170 | event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET, | ||
171 | sizeof(*entry), 0, 0); | ||
172 | if (!event) | ||
173 | goto out; | ||
174 | entry = ring_buffer_event_data(event); | ||
175 | entry->boot_ret = *bt; | ||
176 | trace_buffer_unlock_commit(tr, event, 0, 0); | ||
177 | out: | ||
178 | preempt_enable(); | ||
179 | } | ||
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 7a7a9fd249a9..8d3538b4ea5f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c | |||
@@ -34,6 +34,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) | |||
34 | struct trace_array *tr = branch_tracer; | 34 | struct trace_array *tr = branch_tracer; |
35 | struct ring_buffer_event *event; | 35 | struct ring_buffer_event *event; |
36 | struct trace_branch *entry; | 36 | struct trace_branch *entry; |
37 | struct ring_buffer *buffer; | ||
37 | unsigned long flags; | 38 | unsigned long flags; |
38 | int cpu, pc; | 39 | int cpu, pc; |
39 | const char *p; | 40 | const char *p; |
@@ -54,7 +55,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) | |||
54 | goto out; | 55 | goto out; |
55 | 56 | ||
56 | pc = preempt_count(); | 57 | pc = preempt_count(); |
57 | event = trace_buffer_lock_reserve(tr, TRACE_BRANCH, | 58 | buffer = tr->buffer; |
59 | event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, | ||
58 | sizeof(*entry), flags, pc); | 60 | sizeof(*entry), flags, pc); |
59 | if (!event) | 61 | if (!event) |
60 | goto out; | 62 | goto out; |
@@ -74,8 +76,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) | |||
74 | entry->line = f->line; | 76 | entry->line = f->line; |
75 | entry->correct = val == expect; | 77 | entry->correct = val == expect; |
76 | 78 | ||
77 | if (!filter_check_discard(call, entry, tr->buffer, event)) | 79 | if (!filter_check_discard(call, entry, buffer, event)) |
78 | ring_buffer_unlock_commit(tr->buffer, event); | 80 | ring_buffer_unlock_commit(buffer, event); |
79 | 81 | ||
80 | out: | 82 | out: |
81 | atomic_dec(&tr->data[cpu]->disabled); | 83 | atomic_dec(&tr->data[cpu]->disabled); |
@@ -141,7 +143,7 @@ static void branch_trace_reset(struct trace_array *tr) | |||
141 | } | 143 | } |
142 | 144 | ||
143 | static enum print_line_t trace_branch_print(struct trace_iterator *iter, | 145 | static enum print_line_t trace_branch_print(struct trace_iterator *iter, |
144 | int flags) | 146 | int flags, struct trace_event *event) |
145 | { | 147 | { |
146 | struct trace_branch *field; | 148 | struct trace_branch *field; |
147 | 149 | ||
@@ -165,9 +167,13 @@ static void branch_print_header(struct seq_file *s) | |||
165 | " |\n"); | 167 | " |\n"); |
166 | } | 168 | } |
167 | 169 | ||
170 | static struct trace_event_functions trace_branch_funcs = { | ||
171 | .trace = trace_branch_print, | ||
172 | }; | ||
173 | |||
168 | static struct trace_event trace_branch_event = { | 174 | static struct trace_event trace_branch_event = { |
169 | .type = TRACE_BRANCH, | 175 | .type = TRACE_BRANCH, |
170 | .trace = trace_branch_print, | 176 | .funcs = &trace_branch_funcs, |
171 | }; | 177 | }; |
172 | 178 | ||
173 | static struct tracer branch_trace __read_mostly = | 179 | static struct tracer branch_trace __read_mostly = |
@@ -305,8 +311,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2) | |||
305 | return -1; | 311 | return -1; |
306 | if (percent_a > percent_b) | 312 | if (percent_a > percent_b) |
307 | return 1; | 313 | return 1; |
308 | else | 314 | |
309 | return 0; | 315 | if (a->incorrect < b->incorrect) |
316 | return -1; | ||
317 | if (a->incorrect > b->incorrect) | ||
318 | return 1; | ||
319 | |||
320 | /* | ||
321 | * Since the above shows worse (incorrect) cases | ||
322 | * first, we continue that by showing best (correct) | ||
323 | * cases last. | ||
324 | */ | ||
325 | if (a->correct > b->correct) | ||
326 | return -1; | ||
327 | if (a->correct < b->correct) | ||
328 | return 1; | ||
329 | |||
330 | return 0; | ||
310 | } | 331 | } |
311 | 332 | ||
312 | static struct tracer_stat annotated_branch_stats = { | 333 | static struct tracer_stat annotated_branch_stats = { |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index b588fd81f7f9..685a67d55db0 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -13,6 +13,7 @@ | |||
13 | * Tracer plugins will chose a default from these clocks. | 13 | * Tracer plugins will chose a default from these clocks. |
14 | */ | 14 | */ |
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/irqflags.h> | ||
16 | #include <linux/hardirq.h> | 17 | #include <linux/hardirq.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
@@ -20,6 +21,8 @@ | |||
20 | #include <linux/ktime.h> | 21 | #include <linux/ktime.h> |
21 | #include <linux/trace_clock.h> | 22 | #include <linux/trace_clock.h> |
22 | 23 | ||
24 | #include "trace.h" | ||
25 | |||
23 | /* | 26 | /* |
24 | * trace_clock_local(): the simplest and least coherent tracing clock. | 27 | * trace_clock_local(): the simplest and least coherent tracing clock. |
25 | * | 28 | * |
@@ -28,7 +31,6 @@ | |||
28 | */ | 31 | */ |
29 | u64 notrace trace_clock_local(void) | 32 | u64 notrace trace_clock_local(void) |
30 | { | 33 | { |
31 | unsigned long flags; | ||
32 | u64 clock; | 34 | u64 clock; |
33 | 35 | ||
34 | /* | 36 | /* |
@@ -36,9 +38,9 @@ u64 notrace trace_clock_local(void) | |||
36 | * lockless clock. It is not guaranteed to be coherent across | 38 | * lockless clock. It is not guaranteed to be coherent across |
37 | * CPUs, nor across CPU idle events. | 39 | * CPUs, nor across CPU idle events. |
38 | */ | 40 | */ |
39 | raw_local_irq_save(flags); | 41 | preempt_disable_notrace(); |
40 | clock = sched_clock(); | 42 | clock = sched_clock(); |
41 | raw_local_irq_restore(flags); | 43 | preempt_enable_notrace(); |
42 | 44 | ||
43 | return clock; | 45 | return clock; |
44 | } | 46 | } |
@@ -53,7 +55,7 @@ u64 notrace trace_clock_local(void) | |||
53 | */ | 55 | */ |
54 | u64 notrace trace_clock(void) | 56 | u64 notrace trace_clock(void) |
55 | { | 57 | { |
56 | return cpu_clock(raw_smp_processor_id()); | 58 | return local_clock(); |
57 | } | 59 | } |
58 | 60 | ||
59 | 61 | ||
@@ -66,10 +68,14 @@ u64 notrace trace_clock(void) | |||
66 | * Used by plugins that need globally coherent timestamps. | 68 | * Used by plugins that need globally coherent timestamps. |
67 | */ | 69 | */ |
68 | 70 | ||
69 | static u64 prev_trace_clock_time; | 71 | /* keep prev_time and lock in the same cacheline. */ |
70 | 72 | static struct { | |
71 | static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = | 73 | u64 prev_time; |
72 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 74 | arch_spinlock_t lock; |
75 | } trace_clock_struct ____cacheline_aligned_in_smp = | ||
76 | { | ||
77 | .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED, | ||
78 | }; | ||
73 | 79 | ||
74 | u64 notrace trace_clock_global(void) | 80 | u64 notrace trace_clock_global(void) |
75 | { | 81 | { |
@@ -77,7 +83,7 @@ u64 notrace trace_clock_global(void) | |||
77 | int this_cpu; | 83 | int this_cpu; |
78 | u64 now; | 84 | u64 now; |
79 | 85 | ||
80 | raw_local_irq_save(flags); | 86 | local_irq_save(flags); |
81 | 87 | ||
82 | this_cpu = raw_smp_processor_id(); | 88 | this_cpu = raw_smp_processor_id(); |
83 | now = cpu_clock(this_cpu); | 89 | now = cpu_clock(this_cpu); |
@@ -88,22 +94,22 @@ u64 notrace trace_clock_global(void) | |||
88 | if (unlikely(in_nmi())) | 94 | if (unlikely(in_nmi())) |
89 | goto out; | 95 | goto out; |
90 | 96 | ||
91 | __raw_spin_lock(&trace_clock_lock); | 97 | arch_spin_lock(&trace_clock_struct.lock); |
92 | 98 | ||
93 | /* | 99 | /* |
94 | * TODO: if this happens often then maybe we should reset | 100 | * TODO: if this happens often then maybe we should reset |
95 | * my_scd->clock to prev_trace_clock_time+1, to make sure | 101 | * my_scd->clock to prev_time+1, to make sure |
96 | * we start ticking with the local clock from now on? | 102 | * we start ticking with the local clock from now on? |
97 | */ | 103 | */ |
98 | if ((s64)(now - prev_trace_clock_time) < 0) | 104 | if ((s64)(now - trace_clock_struct.prev_time) < 0) |
99 | now = prev_trace_clock_time + 1; | 105 | now = trace_clock_struct.prev_time + 1; |
100 | 106 | ||
101 | prev_trace_clock_time = now; | 107 | trace_clock_struct.prev_time = now; |
102 | 108 | ||
103 | __raw_spin_unlock(&trace_clock_lock); | 109 | arch_spin_unlock(&trace_clock_struct.lock); |
104 | 110 | ||
105 | out: | 111 | out: |
106 | raw_local_irq_restore(flags); | 112 | local_irq_restore(flags); |
107 | 113 | ||
108 | return now; | 114 | return now; |
109 | } | 115 | } |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h new file mode 100644 index 000000000000..e3dfecaf13e6 --- /dev/null +++ b/kernel/trace/trace_entries.h | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * This file defines the trace event structures that go into the ring | ||
3 | * buffer directly. They are created via macros so that changes for them | ||
4 | * appear in the format file. Using macros will automate this process. | ||
5 | * | ||
6 | * The macro used to create a ftrace data structure is: | ||
7 | * | ||
8 | * FTRACE_ENTRY( name, struct_name, id, structure, print ) | ||
9 | * | ||
10 | * @name: the name used the event name, as well as the name of | ||
11 | * the directory that holds the format file. | ||
12 | * | ||
13 | * @struct_name: the name of the structure that is created. | ||
14 | * | ||
15 | * @id: The event identifier that is used to detect what event | ||
16 | * this is from the ring buffer. | ||
17 | * | ||
18 | * @structure: the structure layout | ||
19 | * | ||
20 | * - __field( type, item ) | ||
21 | * This is equivalent to declaring | ||
22 | * type item; | ||
23 | * in the structure. | ||
24 | * - __array( type, item, size ) | ||
25 | * This is equivalent to declaring | ||
26 | * type item[size]; | ||
27 | * in the structure. | ||
28 | * | ||
29 | * * for structures within structures, the format of the internal | ||
30 | * structure is layed out. This allows the internal structure | ||
31 | * to be deciphered for the format file. Although these macros | ||
32 | * may become out of sync with the internal structure, they | ||
33 | * will create a compile error if it happens. Since the | ||
34 | * internel structures are just tracing helpers, this is not | ||
35 | * an issue. | ||
36 | * | ||
37 | * When an internal structure is used, it should use: | ||
38 | * | ||
39 | * __field_struct( type, item ) | ||
40 | * | ||
41 | * instead of __field. This will prevent it from being shown in | ||
42 | * the output file. The fields in the structure should use. | ||
43 | * | ||
44 | * __field_desc( type, container, item ) | ||
45 | * __array_desc( type, container, item, len ) | ||
46 | * | ||
47 | * type, item and len are the same as __field and __array, but | ||
48 | * container is added. This is the name of the item in | ||
49 | * __field_struct that this is describing. | ||
50 | * | ||
51 | * | ||
52 | * @print: the print format shown to users in the format file. | ||
53 | */ | ||
54 | |||
55 | /* | ||
56 | * Function trace entry - function address and parent function addres: | ||
57 | */ | ||
58 | FTRACE_ENTRY(function, ftrace_entry, | ||
59 | |||
60 | TRACE_FN, | ||
61 | |||
62 | F_STRUCT( | ||
63 | __field( unsigned long, ip ) | ||
64 | __field( unsigned long, parent_ip ) | ||
65 | ), | ||
66 | |||
67 | F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) | ||
68 | ); | ||
69 | |||
70 | /* Function call entry */ | ||
71 | FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, | ||
72 | |||
73 | TRACE_GRAPH_ENT, | ||
74 | |||
75 | F_STRUCT( | ||
76 | __field_struct( struct ftrace_graph_ent, graph_ent ) | ||
77 | __field_desc( unsigned long, graph_ent, func ) | ||
78 | __field_desc( int, graph_ent, depth ) | ||
79 | ), | ||
80 | |||
81 | F_printk("--> %lx (%d)", __entry->func, __entry->depth) | ||
82 | ); | ||
83 | |||
84 | /* Function return entry */ | ||
85 | FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, | ||
86 | |||
87 | TRACE_GRAPH_RET, | ||
88 | |||
89 | F_STRUCT( | ||
90 | __field_struct( struct ftrace_graph_ret, ret ) | ||
91 | __field_desc( unsigned long, ret, func ) | ||
92 | __field_desc( unsigned long long, ret, calltime) | ||
93 | __field_desc( unsigned long long, ret, rettime ) | ||
94 | __field_desc( unsigned long, ret, overrun ) | ||
95 | __field_desc( int, ret, depth ) | ||
96 | ), | ||
97 | |||
98 | F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", | ||
99 | __entry->func, __entry->depth, | ||
100 | __entry->calltime, __entry->rettime, | ||
101 | __entry->depth) | ||
102 | ); | ||
103 | |||
104 | /* | ||
105 | * Context switch trace entry - which task (and prio) we switched from/to: | ||
106 | * | ||
107 | * This is used for both wakeup and context switches. We only want | ||
108 | * to create one structure, but we need two outputs for it. | ||
109 | */ | ||
110 | #define FTRACE_CTX_FIELDS \ | ||
111 | __field( unsigned int, prev_pid ) \ | ||
112 | __field( unsigned char, prev_prio ) \ | ||
113 | __field( unsigned char, prev_state ) \ | ||
114 | __field( unsigned int, next_pid ) \ | ||
115 | __field( unsigned char, next_prio ) \ | ||
116 | __field( unsigned char, next_state ) \ | ||
117 | __field( unsigned int, next_cpu ) | ||
118 | |||
119 | FTRACE_ENTRY(context_switch, ctx_switch_entry, | ||
120 | |||
121 | TRACE_CTX, | ||
122 | |||
123 | F_STRUCT( | ||
124 | FTRACE_CTX_FIELDS | ||
125 | ), | ||
126 | |||
127 | F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", | ||
128 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, | ||
129 | __entry->next_pid, __entry->next_prio, __entry->next_state, | ||
130 | __entry->next_cpu | ||
131 | ) | ||
132 | ); | ||
133 | |||
134 | /* | ||
135 | * FTRACE_ENTRY_DUP only creates the format file, it will not | ||
136 | * create another structure. | ||
137 | */ | ||
138 | FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, | ||
139 | |||
140 | TRACE_WAKE, | ||
141 | |||
142 | F_STRUCT( | ||
143 | FTRACE_CTX_FIELDS | ||
144 | ), | ||
145 | |||
146 | F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", | ||
147 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, | ||
148 | __entry->next_pid, __entry->next_prio, __entry->next_state, | ||
149 | __entry->next_cpu | ||
150 | ) | ||
151 | ); | ||
152 | |||
153 | /* | ||
154 | * Stack-trace entry: | ||
155 | */ | ||
156 | |||
157 | #define FTRACE_STACK_ENTRIES 8 | ||
158 | |||
159 | FTRACE_ENTRY(kernel_stack, stack_entry, | ||
160 | |||
161 | TRACE_STACK, | ||
162 | |||
163 | F_STRUCT( | ||
164 | __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) | ||
165 | ), | ||
166 | |||
167 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | ||
168 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | ||
169 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | ||
170 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | ||
171 | __entry->caller[6], __entry->caller[7]) | ||
172 | ); | ||
173 | |||
174 | FTRACE_ENTRY(user_stack, userstack_entry, | ||
175 | |||
176 | TRACE_USER_STACK, | ||
177 | |||
178 | F_STRUCT( | ||
179 | __field( unsigned int, tgid ) | ||
180 | __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) | ||
181 | ), | ||
182 | |||
183 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | ||
184 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | ||
185 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | ||
186 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | ||
187 | __entry->caller[6], __entry->caller[7]) | ||
188 | ); | ||
189 | |||
190 | /* | ||
191 | * trace_printk entry: | ||
192 | */ | ||
193 | FTRACE_ENTRY(bprint, bprint_entry, | ||
194 | |||
195 | TRACE_BPRINT, | ||
196 | |||
197 | F_STRUCT( | ||
198 | __field( unsigned long, ip ) | ||
199 | __field( const char *, fmt ) | ||
200 | __dynamic_array( u32, buf ) | ||
201 | ), | ||
202 | |||
203 | F_printk("%08lx fmt:%p", | ||
204 | __entry->ip, __entry->fmt) | ||
205 | ); | ||
206 | |||
207 | FTRACE_ENTRY(print, print_entry, | ||
208 | |||
209 | TRACE_PRINT, | ||
210 | |||
211 | F_STRUCT( | ||
212 | __field( unsigned long, ip ) | ||
213 | __dynamic_array( char, buf ) | ||
214 | ), | ||
215 | |||
216 | F_printk("%08lx %s", | ||
217 | __entry->ip, __entry->buf) | ||
218 | ); | ||
219 | |||
220 | FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, | ||
221 | |||
222 | TRACE_MMIO_RW, | ||
223 | |||
224 | F_STRUCT( | ||
225 | __field_struct( struct mmiotrace_rw, rw ) | ||
226 | __field_desc( resource_size_t, rw, phys ) | ||
227 | __field_desc( unsigned long, rw, value ) | ||
228 | __field_desc( unsigned long, rw, pc ) | ||
229 | __field_desc( int, rw, map_id ) | ||
230 | __field_desc( unsigned char, rw, opcode ) | ||
231 | __field_desc( unsigned char, rw, width ) | ||
232 | ), | ||
233 | |||
234 | F_printk("%lx %lx %lx %d %x %x", | ||
235 | (unsigned long)__entry->phys, __entry->value, __entry->pc, | ||
236 | __entry->map_id, __entry->opcode, __entry->width) | ||
237 | ); | ||
238 | |||
239 | FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, | ||
240 | |||
241 | TRACE_MMIO_MAP, | ||
242 | |||
243 | F_STRUCT( | ||
244 | __field_struct( struct mmiotrace_map, map ) | ||
245 | __field_desc( resource_size_t, map, phys ) | ||
246 | __field_desc( unsigned long, map, virt ) | ||
247 | __field_desc( unsigned long, map, len ) | ||
248 | __field_desc( int, map, map_id ) | ||
249 | __field_desc( unsigned char, map, opcode ) | ||
250 | ), | ||
251 | |||
252 | F_printk("%lx %lx %lx %d %x", | ||
253 | (unsigned long)__entry->phys, __entry->virt, __entry->len, | ||
254 | __entry->map_id, __entry->opcode) | ||
255 | ); | ||
256 | |||
257 | |||
258 | #define TRACE_FUNC_SIZE 30 | ||
259 | #define TRACE_FILE_SIZE 20 | ||
260 | |||
261 | FTRACE_ENTRY(branch, trace_branch, | ||
262 | |||
263 | TRACE_BRANCH, | ||
264 | |||
265 | F_STRUCT( | ||
266 | __field( unsigned int, line ) | ||
267 | __array( char, func, TRACE_FUNC_SIZE+1 ) | ||
268 | __array( char, file, TRACE_FILE_SIZE+1 ) | ||
269 | __field( char, correct ) | ||
270 | ), | ||
271 | |||
272 | F_printk("%u:%s:%s (%u)", | ||
273 | __entry->line, | ||
274 | __entry->func, __entry->file, __entry->correct) | ||
275 | ); | ||
276 | |||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c new file mode 100644 index 000000000000..19a359d5e6d5 --- /dev/null +++ b/kernel/trace/trace_event_perf.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | * trace event based perf event profiling/tracing | ||
3 | * | ||
4 | * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> | ||
5 | * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> | ||
6 | */ | ||
7 | |||
8 | #include <linux/module.h> | ||
9 | #include <linux/kprobes.h> | ||
10 | #include "trace.h" | ||
11 | |||
12 | static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; | ||
13 | |||
14 | /* | ||
15 | * Force it to be aligned to unsigned long to avoid misaligned accesses | ||
16 | * suprises | ||
17 | */ | ||
18 | typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) | ||
19 | perf_trace_t; | ||
20 | |||
21 | /* Count the events in use (per event id, not per instance) */ | ||
22 | static int total_ref_count; | ||
23 | |||
24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | ||
25 | struct perf_event *p_event) | ||
26 | { | ||
27 | /* No tracing, just counting, so no obvious leak */ | ||
28 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) | ||
29 | return 0; | ||
30 | |||
31 | /* Some events are ok to be traced by non-root users... */ | ||
32 | if (p_event->attach_state == PERF_ATTACH_TASK) { | ||
33 | if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) | ||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * ...otherwise raw tracepoint data can be a severe data leak, | ||
39 | * only allow root to have these. | ||
40 | */ | ||
41 | if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) | ||
42 | return -EPERM; | ||
43 | |||
44 | return 0; | ||
45 | } | ||
46 | |||
47 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | ||
48 | struct perf_event *p_event) | ||
49 | { | ||
50 | struct hlist_head __percpu *list; | ||
51 | int ret; | ||
52 | int cpu; | ||
53 | |||
54 | ret = perf_trace_event_perm(tp_event, p_event); | ||
55 | if (ret) | ||
56 | return ret; | ||
57 | |||
58 | p_event->tp_event = tp_event; | ||
59 | if (tp_event->perf_refcount++ > 0) | ||
60 | return 0; | ||
61 | |||
62 | ret = -ENOMEM; | ||
63 | |||
64 | list = alloc_percpu(struct hlist_head); | ||
65 | if (!list) | ||
66 | goto fail; | ||
67 | |||
68 | for_each_possible_cpu(cpu) | ||
69 | INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); | ||
70 | |||
71 | tp_event->perf_events = list; | ||
72 | |||
73 | if (!total_ref_count) { | ||
74 | char __percpu *buf; | ||
75 | int i; | ||
76 | |||
77 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { | ||
78 | buf = (char __percpu *)alloc_percpu(perf_trace_t); | ||
79 | if (!buf) | ||
80 | goto fail; | ||
81 | |||
82 | perf_trace_buf[i] = buf; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); | ||
87 | if (ret) | ||
88 | goto fail; | ||
89 | |||
90 | total_ref_count++; | ||
91 | return 0; | ||
92 | |||
93 | fail: | ||
94 | if (!total_ref_count) { | ||
95 | int i; | ||
96 | |||
97 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { | ||
98 | free_percpu(perf_trace_buf[i]); | ||
99 | perf_trace_buf[i] = NULL; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | if (!--tp_event->perf_refcount) { | ||
104 | free_percpu(tp_event->perf_events); | ||
105 | tp_event->perf_events = NULL; | ||
106 | } | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | int perf_trace_init(struct perf_event *p_event) | ||
112 | { | ||
113 | struct ftrace_event_call *tp_event; | ||
114 | int event_id = p_event->attr.config; | ||
115 | int ret = -EINVAL; | ||
116 | |||
117 | mutex_lock(&event_mutex); | ||
118 | list_for_each_entry(tp_event, &ftrace_events, list) { | ||
119 | if (tp_event->event.type == event_id && | ||
120 | tp_event->class && tp_event->class->reg && | ||
121 | try_module_get(tp_event->mod)) { | ||
122 | ret = perf_trace_event_init(tp_event, p_event); | ||
123 | if (ret) | ||
124 | module_put(tp_event->mod); | ||
125 | break; | ||
126 | } | ||
127 | } | ||
128 | mutex_unlock(&event_mutex); | ||
129 | |||
130 | return ret; | ||
131 | } | ||
132 | |||
133 | int perf_trace_add(struct perf_event *p_event, int flags) | ||
134 | { | ||
135 | struct ftrace_event_call *tp_event = p_event->tp_event; | ||
136 | struct hlist_head __percpu *pcpu_list; | ||
137 | struct hlist_head *list; | ||
138 | |||
139 | pcpu_list = tp_event->perf_events; | ||
140 | if (WARN_ON_ONCE(!pcpu_list)) | ||
141 | return -EINVAL; | ||
142 | |||
143 | if (!(flags & PERF_EF_START)) | ||
144 | p_event->hw.state = PERF_HES_STOPPED; | ||
145 | |||
146 | list = this_cpu_ptr(pcpu_list); | ||
147 | hlist_add_head_rcu(&p_event->hlist_entry, list); | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | void perf_trace_del(struct perf_event *p_event, int flags) | ||
153 | { | ||
154 | hlist_del_rcu(&p_event->hlist_entry); | ||
155 | } | ||
156 | |||
157 | void perf_trace_destroy(struct perf_event *p_event) | ||
158 | { | ||
159 | struct ftrace_event_call *tp_event = p_event->tp_event; | ||
160 | int i; | ||
161 | |||
162 | mutex_lock(&event_mutex); | ||
163 | if (--tp_event->perf_refcount > 0) | ||
164 | goto out; | ||
165 | |||
166 | tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); | ||
167 | |||
168 | /* | ||
169 | * Ensure our callback won't be called anymore. The buffers | ||
170 | * will be freed after that. | ||
171 | */ | ||
172 | tracepoint_synchronize_unregister(); | ||
173 | |||
174 | free_percpu(tp_event->perf_events); | ||
175 | tp_event->perf_events = NULL; | ||
176 | |||
177 | if (!--total_ref_count) { | ||
178 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { | ||
179 | free_percpu(perf_trace_buf[i]); | ||
180 | perf_trace_buf[i] = NULL; | ||
181 | } | ||
182 | } | ||
183 | out: | ||
184 | module_put(tp_event->mod); | ||
185 | mutex_unlock(&event_mutex); | ||
186 | } | ||
187 | |||
188 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | ||
189 | struct pt_regs *regs, int *rctxp) | ||
190 | { | ||
191 | struct trace_entry *entry; | ||
192 | unsigned long flags; | ||
193 | char *raw_data; | ||
194 | int pc; | ||
195 | |||
196 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | ||
197 | |||
198 | pc = preempt_count(); | ||
199 | |||
200 | *rctxp = perf_swevent_get_recursion_context(); | ||
201 | if (*rctxp < 0) | ||
202 | return NULL; | ||
203 | |||
204 | raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); | ||
205 | |||
206 | /* zero the dead bytes from align to not leak stack to user */ | ||
207 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | ||
208 | |||
209 | entry = (struct trace_entry *)raw_data; | ||
210 | local_save_flags(flags); | ||
211 | tracing_generic_entry_update(entry, flags, pc); | ||
212 | entry->type = type; | ||
213 | |||
214 | return raw_data; | ||
215 | } | ||
216 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | ||
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c deleted file mode 100644 index 5b5895afecfe..000000000000 --- a/kernel/trace/trace_event_profile.c +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | /* | ||
2 | * trace event based perf counter profiling | ||
3 | * | ||
4 | * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #include "trace.h" | ||
9 | |||
10 | int ftrace_profile_enable(int event_id) | ||
11 | { | ||
12 | struct ftrace_event_call *event; | ||
13 | int ret = -EINVAL; | ||
14 | |||
15 | mutex_lock(&event_mutex); | ||
16 | list_for_each_entry(event, &ftrace_events, list) { | ||
17 | if (event->id == event_id) { | ||
18 | ret = event->profile_enable(event); | ||
19 | break; | ||
20 | } | ||
21 | } | ||
22 | mutex_unlock(&event_mutex); | ||
23 | |||
24 | return ret; | ||
25 | } | ||
26 | |||
27 | void ftrace_profile_disable(int event_id) | ||
28 | { | ||
29 | struct ftrace_event_call *event; | ||
30 | |||
31 | mutex_lock(&event_mutex); | ||
32 | list_for_each_entry(event, &ftrace_events, list) { | ||
33 | if (event->id == event_id) { | ||
34 | event->profile_disable(event); | ||
35 | break; | ||
36 | } | ||
37 | } | ||
38 | mutex_unlock(&event_mutex); | ||
39 | } | ||
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h deleted file mode 100644 index 5e32e375134d..000000000000 --- a/kernel/trace/trace_event_types.h +++ /dev/null | |||
@@ -1,175 +0,0 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM ftrace | ||
3 | |||
4 | /* | ||
5 | * We cheat and use the proto type field as the ID | ||
6 | * and args as the entry type (minus 'struct') | ||
7 | */ | ||
8 | TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore, | ||
9 | TRACE_STRUCT( | ||
10 | TRACE_FIELD(unsigned long, ip, ip) | ||
11 | TRACE_FIELD(unsigned long, parent_ip, parent_ip) | ||
12 | ), | ||
13 | TP_RAW_FMT(" %lx <-- %lx") | ||
14 | ); | ||
15 | |||
16 | TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, | ||
17 | ftrace_graph_ent_entry, ignore, | ||
18 | TRACE_STRUCT( | ||
19 | TRACE_FIELD(unsigned long, graph_ent.func, func) | ||
20 | TRACE_FIELD(int, graph_ent.depth, depth) | ||
21 | ), | ||
22 | TP_RAW_FMT("--> %lx (%d)") | ||
23 | ); | ||
24 | |||
25 | TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, | ||
26 | ftrace_graph_ret_entry, ignore, | ||
27 | TRACE_STRUCT( | ||
28 | TRACE_FIELD(unsigned long, ret.func, func) | ||
29 | TRACE_FIELD(int, ret.depth, depth) | ||
30 | ), | ||
31 | TP_RAW_FMT("<-- %lx (%d)") | ||
32 | ); | ||
33 | |||
34 | TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, | ||
35 | TRACE_STRUCT( | ||
36 | TRACE_FIELD(unsigned int, prev_pid, prev_pid) | ||
37 | TRACE_FIELD(unsigned char, prev_prio, prev_prio) | ||
38 | TRACE_FIELD(unsigned char, prev_state, prev_state) | ||
39 | TRACE_FIELD(unsigned int, next_pid, next_pid) | ||
40 | TRACE_FIELD(unsigned char, next_prio, next_prio) | ||
41 | TRACE_FIELD(unsigned char, next_state, next_state) | ||
42 | TRACE_FIELD(unsigned int, next_cpu, next_cpu) | ||
43 | ), | ||
44 | TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") | ||
45 | ); | ||
46 | |||
47 | TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, | ||
48 | TRACE_STRUCT( | ||
49 | TRACE_FIELD(unsigned int, prev_pid, prev_pid) | ||
50 | TRACE_FIELD(unsigned char, prev_prio, prev_prio) | ||
51 | TRACE_FIELD(unsigned char, prev_state, prev_state) | ||
52 | TRACE_FIELD(unsigned int, next_pid, next_pid) | ||
53 | TRACE_FIELD(unsigned char, next_prio, next_prio) | ||
54 | TRACE_FIELD(unsigned char, next_state, next_state) | ||
55 | TRACE_FIELD(unsigned int, next_cpu, next_cpu) | ||
56 | ), | ||
57 | TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") | ||
58 | ); | ||
59 | |||
60 | TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore, | ||
61 | TRACE_STRUCT( | ||
62 | TRACE_FIELD(unsigned long, arg1, arg1) | ||
63 | TRACE_FIELD(unsigned long, arg2, arg2) | ||
64 | TRACE_FIELD(unsigned long, arg3, arg3) | ||
65 | ), | ||
66 | TP_RAW_FMT("(%08lx) (%08lx) (%08lx)") | ||
67 | ); | ||
68 | |||
69 | /* | ||
70 | * Stack-trace entry: | ||
71 | */ | ||
72 | |||
73 | /* #define FTRACE_STACK_ENTRIES 8 */ | ||
74 | |||
75 | TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore, | ||
76 | TRACE_STRUCT( | ||
77 | TRACE_FIELD(unsigned long, caller[0], stack0) | ||
78 | TRACE_FIELD(unsigned long, caller[1], stack1) | ||
79 | TRACE_FIELD(unsigned long, caller[2], stack2) | ||
80 | TRACE_FIELD(unsigned long, caller[3], stack3) | ||
81 | TRACE_FIELD(unsigned long, caller[4], stack4) | ||
82 | TRACE_FIELD(unsigned long, caller[5], stack5) | ||
83 | TRACE_FIELD(unsigned long, caller[6], stack6) | ||
84 | TRACE_FIELD(unsigned long, caller[7], stack7) | ||
85 | ), | ||
86 | TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | ||
87 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") | ||
88 | ); | ||
89 | |||
90 | TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, | ||
91 | TRACE_STRUCT( | ||
92 | TRACE_FIELD(unsigned long, caller[0], stack0) | ||
93 | TRACE_FIELD(unsigned long, caller[1], stack1) | ||
94 | TRACE_FIELD(unsigned long, caller[2], stack2) | ||
95 | TRACE_FIELD(unsigned long, caller[3], stack3) | ||
96 | TRACE_FIELD(unsigned long, caller[4], stack4) | ||
97 | TRACE_FIELD(unsigned long, caller[5], stack5) | ||
98 | TRACE_FIELD(unsigned long, caller[6], stack6) | ||
99 | TRACE_FIELD(unsigned long, caller[7], stack7) | ||
100 | ), | ||
101 | TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | ||
102 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") | ||
103 | ); | ||
104 | |||
105 | TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, | ||
106 | TRACE_STRUCT( | ||
107 | TRACE_FIELD(unsigned long, ip, ip) | ||
108 | TRACE_FIELD(char *, fmt, fmt) | ||
109 | TRACE_FIELD_ZERO_CHAR(buf) | ||
110 | ), | ||
111 | TP_RAW_FMT("%08lx (%d) fmt:%p %s") | ||
112 | ); | ||
113 | |||
114 | TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, | ||
115 | TRACE_STRUCT( | ||
116 | TRACE_FIELD(unsigned long, ip, ip) | ||
117 | TRACE_FIELD_ZERO_CHAR(buf) | ||
118 | ), | ||
119 | TP_RAW_FMT("%08lx (%d) fmt:%p %s") | ||
120 | ); | ||
121 | |||
122 | TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, | ||
123 | TRACE_STRUCT( | ||
124 | TRACE_FIELD(unsigned int, line, line) | ||
125 | TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, | ||
126 | TRACE_FUNC_SIZE+1, func) | ||
127 | TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, | ||
128 | TRACE_FUNC_SIZE+1, file) | ||
129 | TRACE_FIELD(char, correct, correct) | ||
130 | ), | ||
131 | TP_RAW_FMT("%u:%s:%s (%u)") | ||
132 | ); | ||
133 | |||
134 | TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, | ||
135 | TRACE_STRUCT( | ||
136 | TRACE_FIELD(u64, from, from) | ||
137 | TRACE_FIELD(u64, to, to) | ||
138 | ), | ||
139 | TP_RAW_FMT("from: %llx to: %llx") | ||
140 | ); | ||
141 | |||
142 | TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, | ||
143 | TRACE_STRUCT( | ||
144 | TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1) | ||
145 | TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1) | ||
146 | TRACE_FIELD(int, state_data.type, type) | ||
147 | TRACE_FIELD(int, state_data.state, state) | ||
148 | ), | ||
149 | TP_RAW_FMT("%llx->%llx type:%u state:%u") | ||
150 | ); | ||
151 | |||
152 | TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, | ||
153 | TRACE_STRUCT( | ||
154 | TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) | ||
155 | TRACE_FIELD(unsigned long, call_site, call_site) | ||
156 | TRACE_FIELD(const void *, ptr, ptr) | ||
157 | TRACE_FIELD(size_t, bytes_req, bytes_req) | ||
158 | TRACE_FIELD(size_t, bytes_alloc, bytes_alloc) | ||
159 | TRACE_FIELD(gfp_t, gfp_flags, gfp_flags) | ||
160 | TRACE_FIELD(int, node, node) | ||
161 | ), | ||
162 | TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" | ||
163 | " flags:%x node:%d") | ||
164 | ); | ||
165 | |||
166 | TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, | ||
167 | TRACE_STRUCT( | ||
168 | TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) | ||
169 | TRACE_FIELD(unsigned long, call_site, call_site) | ||
170 | TRACE_FIELD(const void *, ptr, ptr) | ||
171 | ), | ||
172 | TP_RAW_FMT("type:%u call_site:%lx ptr:%p") | ||
173 | ); | ||
174 | |||
175 | #undef TRACE_SYSTEM | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index aa08be69a1b6..35fde09b81de 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -15,18 +15,38 @@ | |||
15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/slab.h> | ||
18 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
19 | 20 | ||
21 | #include <asm/setup.h> | ||
22 | |||
20 | #include "trace_output.h" | 23 | #include "trace_output.h" |
21 | 24 | ||
25 | #undef TRACE_SYSTEM | ||
22 | #define TRACE_SYSTEM "TRACE_SYSTEM" | 26 | #define TRACE_SYSTEM "TRACE_SYSTEM" |
23 | 27 | ||
24 | DEFINE_MUTEX(event_mutex); | 28 | DEFINE_MUTEX(event_mutex); |
25 | 29 | ||
30 | DEFINE_MUTEX(event_storage_mutex); | ||
31 | EXPORT_SYMBOL_GPL(event_storage_mutex); | ||
32 | |||
33 | char event_storage[EVENT_STORAGE_SIZE]; | ||
34 | EXPORT_SYMBOL_GPL(event_storage); | ||
35 | |||
26 | LIST_HEAD(ftrace_events); | 36 | LIST_HEAD(ftrace_events); |
37 | LIST_HEAD(ftrace_common_fields); | ||
27 | 38 | ||
28 | int trace_define_field(struct ftrace_event_call *call, char *type, | 39 | struct list_head * |
29 | char *name, int offset, int size, int is_signed) | 40 | trace_get_fields(struct ftrace_event_call *event_call) |
41 | { | ||
42 | if (!event_call->class->get_fields) | ||
43 | return &event_call->class->fields; | ||
44 | return event_call->class->get_fields(event_call); | ||
45 | } | ||
46 | |||
47 | static int __trace_define_field(struct list_head *head, const char *type, | ||
48 | const char *name, int offset, int size, | ||
49 | int is_signed, int filter_type) | ||
30 | { | 50 | { |
31 | struct ftrace_event_field *field; | 51 | struct ftrace_event_field *field; |
32 | 52 | ||
@@ -42,31 +62,72 @@ int trace_define_field(struct ftrace_event_call *call, char *type, | |||
42 | if (!field->type) | 62 | if (!field->type) |
43 | goto err; | 63 | goto err; |
44 | 64 | ||
65 | if (filter_type == FILTER_OTHER) | ||
66 | field->filter_type = filter_assign_type(type); | ||
67 | else | ||
68 | field->filter_type = filter_type; | ||
69 | |||
45 | field->offset = offset; | 70 | field->offset = offset; |
46 | field->size = size; | 71 | field->size = size; |
47 | field->is_signed = is_signed; | 72 | field->is_signed = is_signed; |
48 | list_add(&field->link, &call->fields); | 73 | |
74 | list_add(&field->link, head); | ||
49 | 75 | ||
50 | return 0; | 76 | return 0; |
51 | 77 | ||
52 | err: | 78 | err: |
53 | if (field) { | 79 | if (field) |
54 | kfree(field->name); | 80 | kfree(field->name); |
55 | kfree(field->type); | ||
56 | } | ||
57 | kfree(field); | 81 | kfree(field); |
58 | 82 | ||
59 | return -ENOMEM; | 83 | return -ENOMEM; |
60 | } | 84 | } |
85 | |||
86 | int trace_define_field(struct ftrace_event_call *call, const char *type, | ||
87 | const char *name, int offset, int size, int is_signed, | ||
88 | int filter_type) | ||
89 | { | ||
90 | struct list_head *head; | ||
91 | |||
92 | if (WARN_ON(!call->class)) | ||
93 | return 0; | ||
94 | |||
95 | head = trace_get_fields(call); | ||
96 | return __trace_define_field(head, type, name, offset, size, | ||
97 | is_signed, filter_type); | ||
98 | } | ||
61 | EXPORT_SYMBOL_GPL(trace_define_field); | 99 | EXPORT_SYMBOL_GPL(trace_define_field); |
62 | 100 | ||
63 | #ifdef CONFIG_MODULES | 101 | #define __common_field(type, item) \ |
102 | ret = __trace_define_field(&ftrace_common_fields, #type, \ | ||
103 | "common_" #item, \ | ||
104 | offsetof(typeof(ent), item), \ | ||
105 | sizeof(ent.item), \ | ||
106 | is_signed_type(type), FILTER_OTHER); \ | ||
107 | if (ret) \ | ||
108 | return ret; | ||
109 | |||
110 | static int trace_define_common_fields(void) | ||
111 | { | ||
112 | int ret; | ||
113 | struct trace_entry ent; | ||
114 | |||
115 | __common_field(unsigned short, type); | ||
116 | __common_field(unsigned char, flags); | ||
117 | __common_field(unsigned char, preempt_count); | ||
118 | __common_field(int, pid); | ||
119 | __common_field(int, lock_depth); | ||
120 | |||
121 | return ret; | ||
122 | } | ||
64 | 123 | ||
65 | static void trace_destroy_fields(struct ftrace_event_call *call) | 124 | void trace_destroy_fields(struct ftrace_event_call *call) |
66 | { | 125 | { |
67 | struct ftrace_event_field *field, *next; | 126 | struct ftrace_event_field *field, *next; |
127 | struct list_head *head; | ||
68 | 128 | ||
69 | list_for_each_entry_safe(field, next, &call->fields, link) { | 129 | head = trace_get_fields(call); |
130 | list_for_each_entry_safe(field, next, head, link) { | ||
70 | list_del(&field->link); | 131 | list_del(&field->link); |
71 | kfree(field->type); | 132 | kfree(field->type); |
72 | kfree(field->name); | 133 | kfree(field->name); |
@@ -74,27 +135,102 @@ static void trace_destroy_fields(struct ftrace_event_call *call) | |||
74 | } | 135 | } |
75 | } | 136 | } |
76 | 137 | ||
77 | #endif /* CONFIG_MODULES */ | 138 | int trace_event_raw_init(struct ftrace_event_call *call) |
139 | { | ||
140 | int id; | ||
141 | |||
142 | id = register_ftrace_event(&call->event); | ||
143 | if (!id) | ||
144 | return -ENODEV; | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | EXPORT_SYMBOL_GPL(trace_event_raw_init); | ||
149 | |||
150 | int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) | ||
151 | { | ||
152 | switch (type) { | ||
153 | case TRACE_REG_REGISTER: | ||
154 | return tracepoint_probe_register(call->name, | ||
155 | call->class->probe, | ||
156 | call); | ||
157 | case TRACE_REG_UNREGISTER: | ||
158 | tracepoint_probe_unregister(call->name, | ||
159 | call->class->probe, | ||
160 | call); | ||
161 | return 0; | ||
162 | |||
163 | #ifdef CONFIG_PERF_EVENTS | ||
164 | case TRACE_REG_PERF_REGISTER: | ||
165 | return tracepoint_probe_register(call->name, | ||
166 | call->class->perf_probe, | ||
167 | call); | ||
168 | case TRACE_REG_PERF_UNREGISTER: | ||
169 | tracepoint_probe_unregister(call->name, | ||
170 | call->class->perf_probe, | ||
171 | call); | ||
172 | return 0; | ||
173 | #endif | ||
174 | } | ||
175 | return 0; | ||
176 | } | ||
177 | EXPORT_SYMBOL_GPL(ftrace_event_reg); | ||
178 | |||
179 | void trace_event_enable_cmd_record(bool enable) | ||
180 | { | ||
181 | struct ftrace_event_call *call; | ||
182 | |||
183 | mutex_lock(&event_mutex); | ||
184 | list_for_each_entry(call, &ftrace_events, list) { | ||
185 | if (!(call->flags & TRACE_EVENT_FL_ENABLED)) | ||
186 | continue; | ||
187 | |||
188 | if (enable) { | ||
189 | tracing_start_cmdline_record(); | ||
190 | call->flags |= TRACE_EVENT_FL_RECORDED_CMD; | ||
191 | } else { | ||
192 | tracing_stop_cmdline_record(); | ||
193 | call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; | ||
194 | } | ||
195 | } | ||
196 | mutex_unlock(&event_mutex); | ||
197 | } | ||
78 | 198 | ||
79 | static void ftrace_event_enable_disable(struct ftrace_event_call *call, | 199 | static int ftrace_event_enable_disable(struct ftrace_event_call *call, |
80 | int enable) | 200 | int enable) |
81 | { | 201 | { |
202 | int ret = 0; | ||
203 | |||
82 | switch (enable) { | 204 | switch (enable) { |
83 | case 0: | 205 | case 0: |
84 | if (call->enabled) { | 206 | if (call->flags & TRACE_EVENT_FL_ENABLED) { |
85 | call->enabled = 0; | 207 | call->flags &= ~TRACE_EVENT_FL_ENABLED; |
86 | tracing_stop_cmdline_record(); | 208 | if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) { |
87 | call->unregfunc(); | 209 | tracing_stop_cmdline_record(); |
210 | call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; | ||
211 | } | ||
212 | call->class->reg(call, TRACE_REG_UNREGISTER); | ||
88 | } | 213 | } |
89 | break; | 214 | break; |
90 | case 1: | 215 | case 1: |
91 | if (!call->enabled) { | 216 | if (!(call->flags & TRACE_EVENT_FL_ENABLED)) { |
92 | call->enabled = 1; | 217 | if (trace_flags & TRACE_ITER_RECORD_CMD) { |
93 | tracing_start_cmdline_record(); | 218 | tracing_start_cmdline_record(); |
94 | call->regfunc(); | 219 | call->flags |= TRACE_EVENT_FL_RECORDED_CMD; |
220 | } | ||
221 | ret = call->class->reg(call, TRACE_REG_REGISTER); | ||
222 | if (ret) { | ||
223 | tracing_stop_cmdline_record(); | ||
224 | pr_info("event trace: Could not enable event " | ||
225 | "%s\n", call->name); | ||
226 | break; | ||
227 | } | ||
228 | call->flags |= TRACE_EVENT_FL_ENABLED; | ||
95 | } | 229 | } |
96 | break; | 230 | break; |
97 | } | 231 | } |
232 | |||
233 | return ret; | ||
98 | } | 234 | } |
99 | 235 | ||
100 | static void ftrace_clear_events(void) | 236 | static void ftrace_clear_events(void) |
@@ -120,15 +256,15 @@ static int __ftrace_set_clr_event(const char *match, const char *sub, | |||
120 | mutex_lock(&event_mutex); | 256 | mutex_lock(&event_mutex); |
121 | list_for_each_entry(call, &ftrace_events, list) { | 257 | list_for_each_entry(call, &ftrace_events, list) { |
122 | 258 | ||
123 | if (!call->name || !call->regfunc) | 259 | if (!call->name || !call->class || !call->class->reg) |
124 | continue; | 260 | continue; |
125 | 261 | ||
126 | if (match && | 262 | if (match && |
127 | strcmp(match, call->name) != 0 && | 263 | strcmp(match, call->name) != 0 && |
128 | strcmp(match, call->system) != 0) | 264 | strcmp(match, call->class->system) != 0) |
129 | continue; | 265 | continue; |
130 | 266 | ||
131 | if (sub && strcmp(sub, call->system) != 0) | 267 | if (sub && strcmp(sub, call->class->system) != 0) |
132 | continue; | 268 | continue; |
133 | 269 | ||
134 | if (event && strcmp(event, call->name) != 0) | 270 | if (event && strcmp(event, call->name) != 0) |
@@ -198,73 +334,38 @@ static ssize_t | |||
198 | ftrace_event_write(struct file *file, const char __user *ubuf, | 334 | ftrace_event_write(struct file *file, const char __user *ubuf, |
199 | size_t cnt, loff_t *ppos) | 335 | size_t cnt, loff_t *ppos) |
200 | { | 336 | { |
201 | size_t read = 0; | 337 | struct trace_parser parser; |
202 | int i, set = 1; | 338 | ssize_t read, ret; |
203 | ssize_t ret; | ||
204 | char *buf; | ||
205 | char ch; | ||
206 | 339 | ||
207 | if (!cnt || cnt < 0) | 340 | if (!cnt) |
208 | return 0; | 341 | return 0; |
209 | 342 | ||
210 | ret = tracing_update_buffers(); | 343 | ret = tracing_update_buffers(); |
211 | if (ret < 0) | 344 | if (ret < 0) |
212 | return ret; | 345 | return ret; |
213 | 346 | ||
214 | ret = get_user(ch, ubuf++); | 347 | if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) |
215 | if (ret) | ||
216 | return ret; | ||
217 | read++; | ||
218 | cnt--; | ||
219 | |||
220 | /* skip white space */ | ||
221 | while (cnt && isspace(ch)) { | ||
222 | ret = get_user(ch, ubuf++); | ||
223 | if (ret) | ||
224 | return ret; | ||
225 | read++; | ||
226 | cnt--; | ||
227 | } | ||
228 | |||
229 | /* Only white space found? */ | ||
230 | if (isspace(ch)) { | ||
231 | file->f_pos += read; | ||
232 | ret = read; | ||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL); | ||
237 | if (!buf) | ||
238 | return -ENOMEM; | 348 | return -ENOMEM; |
239 | 349 | ||
240 | if (cnt > EVENT_BUF_SIZE) | 350 | read = trace_get_user(&parser, ubuf, cnt, ppos); |
241 | cnt = EVENT_BUF_SIZE; | ||
242 | 351 | ||
243 | i = 0; | 352 | if (read >= 0 && trace_parser_loaded((&parser))) { |
244 | while (cnt && !isspace(ch)) { | 353 | int set = 1; |
245 | if (!i && ch == '!') | 354 | |
355 | if (*parser.buffer == '!') | ||
246 | set = 0; | 356 | set = 0; |
247 | else | ||
248 | buf[i++] = ch; | ||
249 | 357 | ||
250 | ret = get_user(ch, ubuf++); | 358 | parser.buffer[parser.idx] = 0; |
359 | |||
360 | ret = ftrace_set_clr_event(parser.buffer + !set, set); | ||
251 | if (ret) | 361 | if (ret) |
252 | goto out_free; | 362 | goto out_put; |
253 | read++; | ||
254 | cnt--; | ||
255 | } | 363 | } |
256 | buf[i] = 0; | ||
257 | |||
258 | file->f_pos += read; | ||
259 | |||
260 | ret = ftrace_set_clr_event(buf, set); | ||
261 | if (ret) | ||
262 | goto out_free; | ||
263 | 364 | ||
264 | ret = read; | 365 | ret = read; |
265 | 366 | ||
266 | out_free: | 367 | out_put: |
267 | kfree(buf); | 368 | trace_parser_put(&parser); |
268 | 369 | ||
269 | return ret; | 370 | return ret; |
270 | } | 371 | } |
@@ -272,78 +373,75 @@ ftrace_event_write(struct file *file, const char __user *ubuf, | |||
272 | static void * | 373 | static void * |
273 | t_next(struct seq_file *m, void *v, loff_t *pos) | 374 | t_next(struct seq_file *m, void *v, loff_t *pos) |
274 | { | 375 | { |
275 | struct list_head *list = m->private; | 376 | struct ftrace_event_call *call = v; |
276 | struct ftrace_event_call *call; | ||
277 | 377 | ||
278 | (*pos)++; | 378 | (*pos)++; |
279 | 379 | ||
280 | for (;;) { | 380 | list_for_each_entry_continue(call, &ftrace_events, list) { |
281 | if (list == &ftrace_events) | ||
282 | return NULL; | ||
283 | |||
284 | call = list_entry(list, struct ftrace_event_call, list); | ||
285 | |||
286 | /* | 381 | /* |
287 | * The ftrace subsystem is for showing formats only. | 382 | * The ftrace subsystem is for showing formats only. |
288 | * They can not be enabled or disabled via the event files. | 383 | * They can not be enabled or disabled via the event files. |
289 | */ | 384 | */ |
290 | if (call->regfunc) | 385 | if (call->class && call->class->reg) |
291 | break; | 386 | return call; |
292 | |||
293 | list = list->next; | ||
294 | } | 387 | } |
295 | 388 | ||
296 | m->private = list->next; | 389 | return NULL; |
297 | |||
298 | return call; | ||
299 | } | 390 | } |
300 | 391 | ||
301 | static void *t_start(struct seq_file *m, loff_t *pos) | 392 | static void *t_start(struct seq_file *m, loff_t *pos) |
302 | { | 393 | { |
394 | struct ftrace_event_call *call; | ||
395 | loff_t l; | ||
396 | |||
303 | mutex_lock(&event_mutex); | 397 | mutex_lock(&event_mutex); |
304 | if (*pos == 0) | 398 | |
305 | m->private = ftrace_events.next; | 399 | call = list_entry(&ftrace_events, struct ftrace_event_call, list); |
306 | return t_next(m, NULL, pos); | 400 | for (l = 0; l <= *pos; ) { |
401 | call = t_next(m, call, &l); | ||
402 | if (!call) | ||
403 | break; | ||
404 | } | ||
405 | return call; | ||
307 | } | 406 | } |
308 | 407 | ||
309 | static void * | 408 | static void * |
310 | s_next(struct seq_file *m, void *v, loff_t *pos) | 409 | s_next(struct seq_file *m, void *v, loff_t *pos) |
311 | { | 410 | { |
312 | struct list_head *list = m->private; | 411 | struct ftrace_event_call *call = v; |
313 | struct ftrace_event_call *call; | ||
314 | 412 | ||
315 | (*pos)++; | 413 | (*pos)++; |
316 | 414 | ||
317 | retry: | 415 | list_for_each_entry_continue(call, &ftrace_events, list) { |
318 | if (list == &ftrace_events) | 416 | if (call->flags & TRACE_EVENT_FL_ENABLED) |
319 | return NULL; | 417 | return call; |
320 | |||
321 | call = list_entry(list, struct ftrace_event_call, list); | ||
322 | |||
323 | if (!call->enabled) { | ||
324 | list = list->next; | ||
325 | goto retry; | ||
326 | } | 418 | } |
327 | 419 | ||
328 | m->private = list->next; | 420 | return NULL; |
329 | |||
330 | return call; | ||
331 | } | 421 | } |
332 | 422 | ||
333 | static void *s_start(struct seq_file *m, loff_t *pos) | 423 | static void *s_start(struct seq_file *m, loff_t *pos) |
334 | { | 424 | { |
425 | struct ftrace_event_call *call; | ||
426 | loff_t l; | ||
427 | |||
335 | mutex_lock(&event_mutex); | 428 | mutex_lock(&event_mutex); |
336 | if (*pos == 0) | 429 | |
337 | m->private = ftrace_events.next; | 430 | call = list_entry(&ftrace_events, struct ftrace_event_call, list); |
338 | return s_next(m, NULL, pos); | 431 | for (l = 0; l <= *pos; ) { |
432 | call = s_next(m, call, &l); | ||
433 | if (!call) | ||
434 | break; | ||
435 | } | ||
436 | return call; | ||
339 | } | 437 | } |
340 | 438 | ||
341 | static int t_show(struct seq_file *m, void *v) | 439 | static int t_show(struct seq_file *m, void *v) |
342 | { | 440 | { |
343 | struct ftrace_event_call *call = v; | 441 | struct ftrace_event_call *call = v; |
344 | 442 | ||
345 | if (strcmp(call->system, TRACE_SYSTEM) != 0) | 443 | if (strcmp(call->class->system, TRACE_SYSTEM) != 0) |
346 | seq_printf(m, "%s:", call->system); | 444 | seq_printf(m, "%s:", call->class->system); |
347 | seq_printf(m, "%s\n", call->name); | 445 | seq_printf(m, "%s\n", call->name); |
348 | 446 | ||
349 | return 0; | 447 | return 0; |
@@ -360,7 +458,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file) | |||
360 | const struct seq_operations *seq_ops; | 458 | const struct seq_operations *seq_ops; |
361 | 459 | ||
362 | if ((file->f_mode & FMODE_WRITE) && | 460 | if ((file->f_mode & FMODE_WRITE) && |
363 | !(file->f_flags & O_APPEND)) | 461 | (file->f_flags & O_TRUNC)) |
364 | ftrace_clear_events(); | 462 | ftrace_clear_events(); |
365 | 463 | ||
366 | seq_ops = inode->i_private; | 464 | seq_ops = inode->i_private; |
@@ -374,7 +472,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
374 | struct ftrace_event_call *call = filp->private_data; | 472 | struct ftrace_event_call *call = filp->private_data; |
375 | char *buf; | 473 | char *buf; |
376 | 474 | ||
377 | if (call->enabled) | 475 | if (call->flags & TRACE_EVENT_FL_ENABLED) |
378 | buf = "1\n"; | 476 | buf = "1\n"; |
379 | else | 477 | else |
380 | buf = "0\n"; | 478 | buf = "0\n"; |
@@ -411,7 +509,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
411 | case 0: | 509 | case 0: |
412 | case 1: | 510 | case 1: |
413 | mutex_lock(&event_mutex); | 511 | mutex_lock(&event_mutex); |
414 | ftrace_event_enable_disable(call, val); | 512 | ret = ftrace_event_enable_disable(call, val); |
415 | mutex_unlock(&event_mutex); | 513 | mutex_unlock(&event_mutex); |
416 | break; | 514 | break; |
417 | 515 | ||
@@ -421,7 +519,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
421 | 519 | ||
422 | *ppos += cnt; | 520 | *ppos += cnt; |
423 | 521 | ||
424 | return cnt; | 522 | return ret ? ret : cnt; |
425 | } | 523 | } |
426 | 524 | ||
427 | static ssize_t | 525 | static ssize_t |
@@ -437,10 +535,10 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
437 | 535 | ||
438 | mutex_lock(&event_mutex); | 536 | mutex_lock(&event_mutex); |
439 | list_for_each_entry(call, &ftrace_events, list) { | 537 | list_for_each_entry(call, &ftrace_events, list) { |
440 | if (!call->name || !call->regfunc) | 538 | if (!call->name || !call->class || !call->class->reg) |
441 | continue; | 539 | continue; |
442 | 540 | ||
443 | if (system && strcmp(call->system, system) != 0) | 541 | if (system && strcmp(call->class->system, system) != 0) |
444 | continue; | 542 | continue; |
445 | 543 | ||
446 | /* | 544 | /* |
@@ -448,7 +546,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
448 | * or if all events or cleared, or if we have | 546 | * or if all events or cleared, or if we have |
449 | * a mixture. | 547 | * a mixture. |
450 | */ | 548 | */ |
451 | set |= (1 << !!call->enabled); | 549 | set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED)); |
452 | 550 | ||
453 | /* | 551 | /* |
454 | * If we have a mixture, no need to look further. | 552 | * If we have a mixture, no need to look further. |
@@ -506,74 +604,146 @@ out: | |||
506 | return ret; | 604 | return ret; |
507 | } | 605 | } |
508 | 606 | ||
509 | extern char *__bad_type_size(void); | 607 | enum { |
608 | FORMAT_HEADER = 1, | ||
609 | FORMAT_FIELD_SEPERATOR = 2, | ||
610 | FORMAT_PRINTFMT = 3, | ||
611 | }; | ||
612 | |||
613 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) | ||
614 | { | ||
615 | struct ftrace_event_call *call = m->private; | ||
616 | struct ftrace_event_field *field; | ||
617 | struct list_head *common_head = &ftrace_common_fields; | ||
618 | struct list_head *head = trace_get_fields(call); | ||
619 | |||
620 | (*pos)++; | ||
621 | |||
622 | switch ((unsigned long)v) { | ||
623 | case FORMAT_HEADER: | ||
624 | if (unlikely(list_empty(common_head))) | ||
625 | return NULL; | ||
626 | |||
627 | field = list_entry(common_head->prev, | ||
628 | struct ftrace_event_field, link); | ||
629 | return field; | ||
630 | |||
631 | case FORMAT_FIELD_SEPERATOR: | ||
632 | if (unlikely(list_empty(head))) | ||
633 | return NULL; | ||
634 | |||
635 | field = list_entry(head->prev, struct ftrace_event_field, link); | ||
636 | return field; | ||
510 | 637 | ||
511 | #undef FIELD | 638 | case FORMAT_PRINTFMT: |
512 | #define FIELD(type, name) \ | 639 | /* all done */ |
513 | sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ | 640 | return NULL; |
514 | #type, "common_" #name, offsetof(typeof(field), name), \ | 641 | } |
515 | sizeof(field.name) | 642 | |
643 | field = v; | ||
644 | if (field->link.prev == common_head) | ||
645 | return (void *)FORMAT_FIELD_SEPERATOR; | ||
646 | else if (field->link.prev == head) | ||
647 | return (void *)FORMAT_PRINTFMT; | ||
516 | 648 | ||
517 | static int trace_write_header(struct trace_seq *s) | 649 | field = list_entry(field->link.prev, struct ftrace_event_field, link); |
650 | |||
651 | return field; | ||
652 | } | ||
653 | |||
654 | static void *f_start(struct seq_file *m, loff_t *pos) | ||
518 | { | 655 | { |
519 | struct trace_entry field; | 656 | loff_t l = 0; |
520 | 657 | void *p; | |
521 | /* struct trace_entry */ | 658 | |
522 | return trace_seq_printf(s, | 659 | /* Start by showing the header */ |
523 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" | 660 | if (!*pos) |
524 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" | 661 | return (void *)FORMAT_HEADER; |
525 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" | 662 | |
526 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" | 663 | p = (void *)FORMAT_HEADER; |
527 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" | 664 | do { |
528 | "\n", | 665 | p = f_next(m, p, &l); |
529 | FIELD(unsigned short, type), | 666 | } while (p && l < *pos); |
530 | FIELD(unsigned char, flags), | 667 | |
531 | FIELD(unsigned char, preempt_count), | 668 | return p; |
532 | FIELD(int, pid), | ||
533 | FIELD(int, tgid)); | ||
534 | } | 669 | } |
535 | 670 | ||
536 | static ssize_t | 671 | static int f_show(struct seq_file *m, void *v) |
537 | event_format_read(struct file *filp, char __user *ubuf, size_t cnt, | ||
538 | loff_t *ppos) | ||
539 | { | 672 | { |
540 | struct ftrace_event_call *call = filp->private_data; | 673 | struct ftrace_event_call *call = m->private; |
541 | struct trace_seq *s; | 674 | struct ftrace_event_field *field; |
542 | char *buf; | 675 | const char *array_descriptor; |
543 | int r; | ||
544 | 676 | ||
545 | if (*ppos) | 677 | switch ((unsigned long)v) { |
678 | case FORMAT_HEADER: | ||
679 | seq_printf(m, "name: %s\n", call->name); | ||
680 | seq_printf(m, "ID: %d\n", call->event.type); | ||
681 | seq_printf(m, "format:\n"); | ||
546 | return 0; | 682 | return 0; |
547 | 683 | ||
548 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 684 | case FORMAT_FIELD_SEPERATOR: |
549 | if (!s) | 685 | seq_putc(m, '\n'); |
550 | return -ENOMEM; | 686 | return 0; |
551 | 687 | ||
552 | trace_seq_init(s); | 688 | case FORMAT_PRINTFMT: |
689 | seq_printf(m, "\nprint fmt: %s\n", | ||
690 | call->print_fmt); | ||
691 | return 0; | ||
692 | } | ||
553 | 693 | ||
554 | /* If any of the first writes fail, so will the show_format. */ | 694 | field = v; |
555 | 695 | ||
556 | trace_seq_printf(s, "name: %s\n", call->name); | 696 | /* |
557 | trace_seq_printf(s, "ID: %d\n", call->id); | 697 | * Smartly shows the array type(except dynamic array). |
558 | trace_seq_printf(s, "format:\n"); | 698 | * Normal: |
559 | trace_write_header(s); | 699 | * field:TYPE VAR |
700 | * If TYPE := TYPE[LEN], it is shown: | ||
701 | * field:TYPE VAR[LEN] | ||
702 | */ | ||
703 | array_descriptor = strchr(field->type, '['); | ||
560 | 704 | ||
561 | r = call->show_format(s); | 705 | if (!strncmp(field->type, "__data_loc", 10)) |
562 | if (!r) { | 706 | array_descriptor = NULL; |
563 | /* | ||
564 | * ug! The format output is bigger than a PAGE!! | ||
565 | */ | ||
566 | buf = "FORMAT TOO BIG\n"; | ||
567 | r = simple_read_from_buffer(ubuf, cnt, ppos, | ||
568 | buf, strlen(buf)); | ||
569 | goto out; | ||
570 | } | ||
571 | 707 | ||
572 | r = simple_read_from_buffer(ubuf, cnt, ppos, | 708 | if (!array_descriptor) |
573 | s->buffer, s->len); | 709 | seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", |
574 | out: | 710 | field->type, field->name, field->offset, |
575 | kfree(s); | 711 | field->size, !!field->is_signed); |
576 | return r; | 712 | else |
713 | seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", | ||
714 | (int)(array_descriptor - field->type), | ||
715 | field->type, field->name, | ||
716 | array_descriptor, field->offset, | ||
717 | field->size, !!field->is_signed); | ||
718 | |||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | static void f_stop(struct seq_file *m, void *p) | ||
723 | { | ||
724 | } | ||
725 | |||
726 | static const struct seq_operations trace_format_seq_ops = { | ||
727 | .start = f_start, | ||
728 | .next = f_next, | ||
729 | .stop = f_stop, | ||
730 | .show = f_show, | ||
731 | }; | ||
732 | |||
733 | static int trace_format_open(struct inode *inode, struct file *file) | ||
734 | { | ||
735 | struct ftrace_event_call *call = inode->i_private; | ||
736 | struct seq_file *m; | ||
737 | int ret; | ||
738 | |||
739 | ret = seq_open(file, &trace_format_seq_ops); | ||
740 | if (ret < 0) | ||
741 | return ret; | ||
742 | |||
743 | m = file->private_data; | ||
744 | m->private = call; | ||
745 | |||
746 | return 0; | ||
577 | } | 747 | } |
578 | 748 | ||
579 | static ssize_t | 749 | static ssize_t |
@@ -591,7 +761,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) | |||
591 | return -ENOMEM; | 761 | return -ENOMEM; |
592 | 762 | ||
593 | trace_seq_init(s); | 763 | trace_seq_init(s); |
594 | trace_seq_printf(s, "%d\n", call->id); | 764 | trace_seq_printf(s, "%d\n", call->event.type); |
595 | 765 | ||
596 | r = simple_read_from_buffer(ubuf, cnt, ppos, | 766 | r = simple_read_from_buffer(ubuf, cnt, ppos, |
597 | s->buffer, s->len); | 767 | s->buffer, s->len); |
@@ -768,39 +938,47 @@ static const struct file_operations ftrace_enable_fops = { | |||
768 | .open = tracing_open_generic, | 938 | .open = tracing_open_generic, |
769 | .read = event_enable_read, | 939 | .read = event_enable_read, |
770 | .write = event_enable_write, | 940 | .write = event_enable_write, |
941 | .llseek = default_llseek, | ||
771 | }; | 942 | }; |
772 | 943 | ||
773 | static const struct file_operations ftrace_event_format_fops = { | 944 | static const struct file_operations ftrace_event_format_fops = { |
774 | .open = tracing_open_generic, | 945 | .open = trace_format_open, |
775 | .read = event_format_read, | 946 | .read = seq_read, |
947 | .llseek = seq_lseek, | ||
948 | .release = seq_release, | ||
776 | }; | 949 | }; |
777 | 950 | ||
778 | static const struct file_operations ftrace_event_id_fops = { | 951 | static const struct file_operations ftrace_event_id_fops = { |
779 | .open = tracing_open_generic, | 952 | .open = tracing_open_generic, |
780 | .read = event_id_read, | 953 | .read = event_id_read, |
954 | .llseek = default_llseek, | ||
781 | }; | 955 | }; |
782 | 956 | ||
783 | static const struct file_operations ftrace_event_filter_fops = { | 957 | static const struct file_operations ftrace_event_filter_fops = { |
784 | .open = tracing_open_generic, | 958 | .open = tracing_open_generic, |
785 | .read = event_filter_read, | 959 | .read = event_filter_read, |
786 | .write = event_filter_write, | 960 | .write = event_filter_write, |
961 | .llseek = default_llseek, | ||
787 | }; | 962 | }; |
788 | 963 | ||
789 | static const struct file_operations ftrace_subsystem_filter_fops = { | 964 | static const struct file_operations ftrace_subsystem_filter_fops = { |
790 | .open = tracing_open_generic, | 965 | .open = tracing_open_generic, |
791 | .read = subsystem_filter_read, | 966 | .read = subsystem_filter_read, |
792 | .write = subsystem_filter_write, | 967 | .write = subsystem_filter_write, |
968 | .llseek = default_llseek, | ||
793 | }; | 969 | }; |
794 | 970 | ||
795 | static const struct file_operations ftrace_system_enable_fops = { | 971 | static const struct file_operations ftrace_system_enable_fops = { |
796 | .open = tracing_open_generic, | 972 | .open = tracing_open_generic, |
797 | .read = system_enable_read, | 973 | .read = system_enable_read, |
798 | .write = system_enable_write, | 974 | .write = system_enable_write, |
975 | .llseek = default_llseek, | ||
799 | }; | 976 | }; |
800 | 977 | ||
801 | static const struct file_operations ftrace_show_header_fops = { | 978 | static const struct file_operations ftrace_show_header_fops = { |
802 | .open = tracing_open_generic, | 979 | .open = tracing_open_generic, |
803 | .read = show_header, | 980 | .read = show_header, |
981 | .llseek = default_llseek, | ||
804 | }; | 982 | }; |
805 | 983 | ||
806 | static struct dentry *event_trace_events_dir(void) | 984 | static struct dentry *event_trace_events_dir(void) |
@@ -833,8 +1011,10 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
833 | 1011 | ||
834 | /* First see if we did not already create this dir */ | 1012 | /* First see if we did not already create this dir */ |
835 | list_for_each_entry(system, &event_subsystems, list) { | 1013 | list_for_each_entry(system, &event_subsystems, list) { |
836 | if (strcmp(system->name, name) == 0) | 1014 | if (strcmp(system->name, name) == 0) { |
1015 | system->nr_events++; | ||
837 | return system->entry; | 1016 | return system->entry; |
1017 | } | ||
838 | } | 1018 | } |
839 | 1019 | ||
840 | /* need to create new entry */ | 1020 | /* need to create new entry */ |
@@ -853,6 +1033,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
853 | return d_events; | 1033 | return d_events; |
854 | } | 1034 | } |
855 | 1035 | ||
1036 | system->nr_events = 1; | ||
856 | system->name = kstrdup(name, GFP_KERNEL); | 1037 | system->name = kstrdup(name, GFP_KERNEL); |
857 | if (!system->name) { | 1038 | if (!system->name) { |
858 | debugfs_remove(system->entry); | 1039 | debugfs_remove(system->entry); |
@@ -880,9 +1061,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
880 | "'%s/filter' entry\n", name); | 1061 | "'%s/filter' entry\n", name); |
881 | } | 1062 | } |
882 | 1063 | ||
883 | entry = trace_create_file("enable", 0644, system->entry, | 1064 | trace_create_file("enable", 0644, system->entry, |
884 | (void *)system->name, | 1065 | (void *)system->name, |
885 | &ftrace_system_enable_fops); | 1066 | &ftrace_system_enable_fops); |
886 | 1067 | ||
887 | return system->entry; | 1068 | return system->entry; |
888 | } | 1069 | } |
@@ -894,24 +1075,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, | |||
894 | const struct file_operations *filter, | 1075 | const struct file_operations *filter, |
895 | const struct file_operations *format) | 1076 | const struct file_operations *format) |
896 | { | 1077 | { |
897 | struct dentry *entry; | 1078 | struct list_head *head; |
898 | int ret; | 1079 | int ret; |
899 | 1080 | ||
900 | /* | 1081 | /* |
901 | * If the trace point header did not define TRACE_SYSTEM | 1082 | * If the trace point header did not define TRACE_SYSTEM |
902 | * then the system would be called "TRACE_SYSTEM". | 1083 | * then the system would be called "TRACE_SYSTEM". |
903 | */ | 1084 | */ |
904 | if (strcmp(call->system, TRACE_SYSTEM) != 0) | 1085 | if (strcmp(call->class->system, TRACE_SYSTEM) != 0) |
905 | d_events = event_subsystem_dir(call->system, d_events); | 1086 | d_events = event_subsystem_dir(call->class->system, d_events); |
906 | |||
907 | if (call->raw_init) { | ||
908 | ret = call->raw_init(); | ||
909 | if (ret < 0) { | ||
910 | pr_warning("Could not initialize trace point" | ||
911 | " events/%s\n", call->name); | ||
912 | return ret; | ||
913 | } | ||
914 | } | ||
915 | 1087 | ||
916 | call->dir = debugfs_create_dir(call->name, d_events); | 1088 | call->dir = debugfs_create_dir(call->name, d_events); |
917 | if (!call->dir) { | 1089 | if (!call->dir) { |
@@ -920,35 +1092,138 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, | |||
920 | return -1; | 1092 | return -1; |
921 | } | 1093 | } |
922 | 1094 | ||
923 | if (call->regfunc) | 1095 | if (call->class->reg) |
924 | entry = trace_create_file("enable", 0644, call->dir, call, | 1096 | trace_create_file("enable", 0644, call->dir, call, |
925 | enable); | 1097 | enable); |
926 | 1098 | ||
927 | if (call->id) | 1099 | #ifdef CONFIG_PERF_EVENTS |
928 | entry = trace_create_file("id", 0444, call->dir, call, | 1100 | if (call->event.type && call->class->reg) |
929 | id); | 1101 | trace_create_file("id", 0444, call->dir, call, |
1102 | id); | ||
1103 | #endif | ||
930 | 1104 | ||
931 | if (call->define_fields) { | 1105 | /* |
932 | ret = call->define_fields(); | 1106 | * Other events may have the same class. Only update |
1107 | * the fields if they are not already defined. | ||
1108 | */ | ||
1109 | head = trace_get_fields(call); | ||
1110 | if (list_empty(head)) { | ||
1111 | ret = call->class->define_fields(call); | ||
933 | if (ret < 0) { | 1112 | if (ret < 0) { |
934 | pr_warning("Could not initialize trace point" | 1113 | pr_warning("Could not initialize trace point" |
935 | " events/%s\n", call->name); | 1114 | " events/%s\n", call->name); |
936 | return ret; | 1115 | return ret; |
937 | } | 1116 | } |
938 | entry = trace_create_file("filter", 0644, call->dir, call, | ||
939 | filter); | ||
940 | } | 1117 | } |
1118 | trace_create_file("filter", 0644, call->dir, call, | ||
1119 | filter); | ||
941 | 1120 | ||
942 | /* A trace may not want to export its format */ | 1121 | trace_create_file("format", 0444, call->dir, call, |
943 | if (!call->show_format) | 1122 | format); |
944 | return 0; | ||
945 | |||
946 | entry = trace_create_file("format", 0444, call->dir, call, | ||
947 | format); | ||
948 | 1123 | ||
949 | return 0; | 1124 | return 0; |
950 | } | 1125 | } |
951 | 1126 | ||
1127 | static int | ||
1128 | __trace_add_event_call(struct ftrace_event_call *call, struct module *mod, | ||
1129 | const struct file_operations *id, | ||
1130 | const struct file_operations *enable, | ||
1131 | const struct file_operations *filter, | ||
1132 | const struct file_operations *format) | ||
1133 | { | ||
1134 | struct dentry *d_events; | ||
1135 | int ret; | ||
1136 | |||
1137 | /* The linker may leave blanks */ | ||
1138 | if (!call->name) | ||
1139 | return -EINVAL; | ||
1140 | |||
1141 | if (call->class->raw_init) { | ||
1142 | ret = call->class->raw_init(call); | ||
1143 | if (ret < 0) { | ||
1144 | if (ret != -ENOSYS) | ||
1145 | pr_warning("Could not initialize trace events/%s\n", | ||
1146 | call->name); | ||
1147 | return ret; | ||
1148 | } | ||
1149 | } | ||
1150 | |||
1151 | d_events = event_trace_events_dir(); | ||
1152 | if (!d_events) | ||
1153 | return -ENOENT; | ||
1154 | |||
1155 | ret = event_create_dir(call, d_events, id, enable, filter, format); | ||
1156 | if (!ret) | ||
1157 | list_add(&call->list, &ftrace_events); | ||
1158 | call->mod = mod; | ||
1159 | |||
1160 | return ret; | ||
1161 | } | ||
1162 | |||
1163 | /* Add an additional event_call dynamically */ | ||
1164 | int trace_add_event_call(struct ftrace_event_call *call) | ||
1165 | { | ||
1166 | int ret; | ||
1167 | mutex_lock(&event_mutex); | ||
1168 | ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops, | ||
1169 | &ftrace_enable_fops, | ||
1170 | &ftrace_event_filter_fops, | ||
1171 | &ftrace_event_format_fops); | ||
1172 | mutex_unlock(&event_mutex); | ||
1173 | return ret; | ||
1174 | } | ||
1175 | |||
1176 | static void remove_subsystem_dir(const char *name) | ||
1177 | { | ||
1178 | struct event_subsystem *system; | ||
1179 | |||
1180 | if (strcmp(name, TRACE_SYSTEM) == 0) | ||
1181 | return; | ||
1182 | |||
1183 | list_for_each_entry(system, &event_subsystems, list) { | ||
1184 | if (strcmp(system->name, name) == 0) { | ||
1185 | if (!--system->nr_events) { | ||
1186 | struct event_filter *filter = system->filter; | ||
1187 | |||
1188 | debugfs_remove_recursive(system->entry); | ||
1189 | list_del(&system->list); | ||
1190 | if (filter) { | ||
1191 | kfree(filter->filter_string); | ||
1192 | kfree(filter); | ||
1193 | } | ||
1194 | kfree(system->name); | ||
1195 | kfree(system); | ||
1196 | } | ||
1197 | break; | ||
1198 | } | ||
1199 | } | ||
1200 | } | ||
1201 | |||
1202 | /* | ||
1203 | * Must be called under locking both of event_mutex and trace_event_mutex. | ||
1204 | */ | ||
1205 | static void __trace_remove_event_call(struct ftrace_event_call *call) | ||
1206 | { | ||
1207 | ftrace_event_enable_disable(call, 0); | ||
1208 | if (call->event.funcs) | ||
1209 | __unregister_ftrace_event(&call->event); | ||
1210 | debugfs_remove_recursive(call->dir); | ||
1211 | list_del(&call->list); | ||
1212 | trace_destroy_fields(call); | ||
1213 | destroy_preds(call); | ||
1214 | remove_subsystem_dir(call->class->system); | ||
1215 | } | ||
1216 | |||
1217 | /* Remove an event_call */ | ||
1218 | void trace_remove_event_call(struct ftrace_event_call *call) | ||
1219 | { | ||
1220 | mutex_lock(&event_mutex); | ||
1221 | down_write(&trace_event_mutex); | ||
1222 | __trace_remove_event_call(call); | ||
1223 | up_write(&trace_event_mutex); | ||
1224 | mutex_unlock(&event_mutex); | ||
1225 | } | ||
1226 | |||
952 | #define for_each_event(event, start, end) \ | 1227 | #define for_each_event(event, start, end) \ |
953 | for (event = start; \ | 1228 | for (event = start; \ |
954 | (unsigned long)event < (unsigned long)end; \ | 1229 | (unsigned long)event < (unsigned long)end; \ |
@@ -1010,7 +1285,6 @@ static void trace_module_add_events(struct module *mod) | |||
1010 | { | 1285 | { |
1011 | struct ftrace_module_file_ops *file_ops = NULL; | 1286 | struct ftrace_module_file_ops *file_ops = NULL; |
1012 | struct ftrace_event_call *call, *start, *end; | 1287 | struct ftrace_event_call *call, *start, *end; |
1013 | struct dentry *d_events; | ||
1014 | 1288 | ||
1015 | start = mod->trace_events; | 1289 | start = mod->trace_events; |
1016 | end = mod->trace_events + mod->num_trace_events; | 1290 | end = mod->trace_events + mod->num_trace_events; |
@@ -1018,29 +1292,14 @@ static void trace_module_add_events(struct module *mod) | |||
1018 | if (start == end) | 1292 | if (start == end) |
1019 | return; | 1293 | return; |
1020 | 1294 | ||
1021 | d_events = event_trace_events_dir(); | 1295 | file_ops = trace_create_file_ops(mod); |
1022 | if (!d_events) | 1296 | if (!file_ops) |
1023 | return; | 1297 | return; |
1024 | 1298 | ||
1025 | for_each_event(call, start, end) { | 1299 | for_each_event(call, start, end) { |
1026 | /* The linker may leave blanks */ | 1300 | __trace_add_event_call(call, mod, |
1027 | if (!call->name) | 1301 | &file_ops->id, &file_ops->enable, |
1028 | continue; | 1302 | &file_ops->filter, &file_ops->format); |
1029 | |||
1030 | /* | ||
1031 | * This module has events, create file ops for this module | ||
1032 | * if not already done. | ||
1033 | */ | ||
1034 | if (!file_ops) { | ||
1035 | file_ops = trace_create_file_ops(mod); | ||
1036 | if (!file_ops) | ||
1037 | return; | ||
1038 | } | ||
1039 | call->mod = mod; | ||
1040 | list_add(&call->list, &ftrace_events); | ||
1041 | event_create_dir(call, d_events, | ||
1042 | &file_ops->id, &file_ops->enable, | ||
1043 | &file_ops->filter, &file_ops->format); | ||
1044 | } | 1303 | } |
1045 | } | 1304 | } |
1046 | 1305 | ||
@@ -1054,13 +1313,7 @@ static void trace_module_remove_events(struct module *mod) | |||
1054 | list_for_each_entry_safe(call, p, &ftrace_events, list) { | 1313 | list_for_each_entry_safe(call, p, &ftrace_events, list) { |
1055 | if (call->mod == mod) { | 1314 | if (call->mod == mod) { |
1056 | found = true; | 1315 | found = true; |
1057 | ftrace_event_enable_disable(call, 0); | 1316 | __trace_remove_event_call(call); |
1058 | if (call->event) | ||
1059 | __unregister_ftrace_event(call->event); | ||
1060 | debugfs_remove_recursive(call->dir); | ||
1061 | list_del(&call->list); | ||
1062 | trace_destroy_fields(call); | ||
1063 | destroy_preds(call); | ||
1064 | } | 1317 | } |
1065 | } | 1318 | } |
1066 | 1319 | ||
@@ -1109,7 +1362,7 @@ static int trace_module_notify(struct notifier_block *self, | |||
1109 | } | 1362 | } |
1110 | #endif /* CONFIG_MODULES */ | 1363 | #endif /* CONFIG_MODULES */ |
1111 | 1364 | ||
1112 | struct notifier_block trace_module_nb = { | 1365 | static struct notifier_block trace_module_nb = { |
1113 | .notifier_call = trace_module_notify, | 1366 | .notifier_call = trace_module_notify, |
1114 | .priority = 0, | 1367 | .priority = 0, |
1115 | }; | 1368 | }; |
@@ -1117,6 +1370,18 @@ struct notifier_block trace_module_nb = { | |||
1117 | extern struct ftrace_event_call __start_ftrace_events[]; | 1370 | extern struct ftrace_event_call __start_ftrace_events[]; |
1118 | extern struct ftrace_event_call __stop_ftrace_events[]; | 1371 | extern struct ftrace_event_call __stop_ftrace_events[]; |
1119 | 1372 | ||
1373 | static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; | ||
1374 | |||
1375 | static __init int setup_trace_event(char *str) | ||
1376 | { | ||
1377 | strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE); | ||
1378 | ring_buffer_expanded = 1; | ||
1379 | tracing_selftest_disabled = 1; | ||
1380 | |||
1381 | return 1; | ||
1382 | } | ||
1383 | __setup("trace_event=", setup_trace_event); | ||
1384 | |||
1120 | static __init int event_trace_init(void) | 1385 | static __init int event_trace_init(void) |
1121 | { | 1386 | { |
1122 | struct ftrace_event_call *call; | 1387 | struct ftrace_event_call *call; |
@@ -1124,6 +1389,8 @@ static __init int event_trace_init(void) | |||
1124 | struct dentry *entry; | 1389 | struct dentry *entry; |
1125 | struct dentry *d_events; | 1390 | struct dentry *d_events; |
1126 | int ret; | 1391 | int ret; |
1392 | char *buf = bootup_event_buf; | ||
1393 | char *token; | ||
1127 | 1394 | ||
1128 | d_tracer = tracing_init_dentry(); | 1395 | d_tracer = tracing_init_dentry(); |
1129 | if (!d_tracer) | 1396 | if (!d_tracer) |
@@ -1159,14 +1426,27 @@ static __init int event_trace_init(void) | |||
1159 | trace_create_file("enable", 0644, d_events, | 1426 | trace_create_file("enable", 0644, d_events, |
1160 | NULL, &ftrace_system_enable_fops); | 1427 | NULL, &ftrace_system_enable_fops); |
1161 | 1428 | ||
1429 | if (trace_define_common_fields()) | ||
1430 | pr_warning("tracing: Failed to allocate common fields"); | ||
1431 | |||
1162 | for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { | 1432 | for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { |
1163 | /* The linker may leave blanks */ | 1433 | __trace_add_event_call(call, NULL, &ftrace_event_id_fops, |
1164 | if (!call->name) | 1434 | &ftrace_enable_fops, |
1435 | &ftrace_event_filter_fops, | ||
1436 | &ftrace_event_format_fops); | ||
1437 | } | ||
1438 | |||
1439 | while (true) { | ||
1440 | token = strsep(&buf, ","); | ||
1441 | |||
1442 | if (!token) | ||
1443 | break; | ||
1444 | if (!*token) | ||
1165 | continue; | 1445 | continue; |
1166 | list_add(&call->list, &ftrace_events); | 1446 | |
1167 | event_create_dir(call, d_events, &ftrace_event_id_fops, | 1447 | ret = ftrace_set_clr_event(token, 1); |
1168 | &ftrace_enable_fops, &ftrace_event_filter_fops, | 1448 | if (ret) |
1169 | &ftrace_event_format_fops); | 1449 | pr_warning("Failed to enable trace event: %s\n", token); |
1170 | } | 1450 | } |
1171 | 1451 | ||
1172 | ret = register_module_notifier(&trace_module_nb); | 1452 | ret = register_module_notifier(&trace_module_nb); |
@@ -1241,17 +1521,29 @@ static __init void event_trace_self_tests(void) | |||
1241 | 1521 | ||
1242 | list_for_each_entry(call, &ftrace_events, list) { | 1522 | list_for_each_entry(call, &ftrace_events, list) { |
1243 | 1523 | ||
1244 | /* Only test those that have a regfunc */ | 1524 | /* Only test those that have a probe */ |
1245 | if (!call->regfunc) | 1525 | if (!call->class || !call->class->probe) |
1246 | continue; | 1526 | continue; |
1247 | 1527 | ||
1528 | /* | ||
1529 | * Testing syscall events here is pretty useless, but | ||
1530 | * we still do it if configured. But this is time consuming. | ||
1531 | * What we really need is a user thread to perform the | ||
1532 | * syscalls as we test. | ||
1533 | */ | ||
1534 | #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS | ||
1535 | if (call->class->system && | ||
1536 | strcmp(call->class->system, "syscalls") == 0) | ||
1537 | continue; | ||
1538 | #endif | ||
1539 | |||
1248 | pr_info("Testing event %s: ", call->name); | 1540 | pr_info("Testing event %s: ", call->name); |
1249 | 1541 | ||
1250 | /* | 1542 | /* |
1251 | * If an event is already enabled, someone is using | 1543 | * If an event is already enabled, someone is using |
1252 | * it and the self test should not be on. | 1544 | * it and the self test should not be on. |
1253 | */ | 1545 | */ |
1254 | if (call->enabled) { | 1546 | if (call->flags & TRACE_EVENT_FL_ENABLED) { |
1255 | pr_warning("Enabled event during self test!\n"); | 1547 | pr_warning("Enabled event during self test!\n"); |
1256 | WARN_ON_ONCE(1); | 1548 | WARN_ON_ONCE(1); |
1257 | continue; | 1549 | continue; |
@@ -1318,30 +1610,31 @@ static __init void event_trace_self_tests(void) | |||
1318 | 1610 | ||
1319 | #ifdef CONFIG_FUNCTION_TRACER | 1611 | #ifdef CONFIG_FUNCTION_TRACER |
1320 | 1612 | ||
1321 | static DEFINE_PER_CPU(atomic_t, test_event_disable); | 1613 | static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); |
1322 | 1614 | ||
1323 | static void | 1615 | static void |
1324 | function_test_events_call(unsigned long ip, unsigned long parent_ip) | 1616 | function_test_events_call(unsigned long ip, unsigned long parent_ip) |
1325 | { | 1617 | { |
1326 | struct ring_buffer_event *event; | 1618 | struct ring_buffer_event *event; |
1619 | struct ring_buffer *buffer; | ||
1327 | struct ftrace_entry *entry; | 1620 | struct ftrace_entry *entry; |
1328 | unsigned long flags; | 1621 | unsigned long flags; |
1329 | long disabled; | 1622 | long disabled; |
1330 | int resched; | ||
1331 | int cpu; | 1623 | int cpu; |
1332 | int pc; | 1624 | int pc; |
1333 | 1625 | ||
1334 | pc = preempt_count(); | 1626 | pc = preempt_count(); |
1335 | resched = ftrace_preempt_disable(); | 1627 | preempt_disable_notrace(); |
1336 | cpu = raw_smp_processor_id(); | 1628 | cpu = raw_smp_processor_id(); |
1337 | disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu)); | 1629 | disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); |
1338 | 1630 | ||
1339 | if (disabled != 1) | 1631 | if (disabled != 1) |
1340 | goto out; | 1632 | goto out; |
1341 | 1633 | ||
1342 | local_save_flags(flags); | 1634 | local_save_flags(flags); |
1343 | 1635 | ||
1344 | event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry), | 1636 | event = trace_current_buffer_lock_reserve(&buffer, |
1637 | TRACE_FN, sizeof(*entry), | ||
1345 | flags, pc); | 1638 | flags, pc); |
1346 | if (!event) | 1639 | if (!event) |
1347 | goto out; | 1640 | goto out; |
@@ -1349,11 +1642,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) | |||
1349 | entry->ip = ip; | 1642 | entry->ip = ip; |
1350 | entry->parent_ip = parent_ip; | 1643 | entry->parent_ip = parent_ip; |
1351 | 1644 | ||
1352 | trace_nowake_buffer_unlock_commit(event, flags, pc); | 1645 | trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); |
1353 | 1646 | ||
1354 | out: | 1647 | out: |
1355 | atomic_dec(&per_cpu(test_event_disable, cpu)); | 1648 | atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); |
1356 | ftrace_preempt_enable(resched); | 1649 | preempt_enable_notrace(); |
1357 | } | 1650 | } |
1358 | 1651 | ||
1359 | static struct ftrace_ops trace_ops __initdata = | 1652 | static struct ftrace_ops trace_ops __initdata = |
@@ -1376,10 +1669,10 @@ static __init void event_trace_self_test_with_function(void) | |||
1376 | 1669 | ||
1377 | static __init int event_trace_self_tests_init(void) | 1670 | static __init int event_trace_self_tests_init(void) |
1378 | { | 1671 | { |
1379 | 1672 | if (!tracing_selftest_disabled) { | |
1380 | event_trace_self_tests(); | 1673 | event_trace_self_tests(); |
1381 | 1674 | event_trace_self_test_with_function(); | |
1382 | event_trace_self_test_with_function(); | 1675 | } |
1383 | 1676 | ||
1384 | return 0; | 1677 | return 0; |
1385 | } | 1678 | } |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 936c621bbf46..36d40104b17f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -18,11 +18,11 @@ | |||
18 | * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> | 18 | * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/debugfs.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/module.h> | 21 | #include <linux/module.h> |
24 | #include <linux/ctype.h> | 22 | #include <linux/ctype.h> |
25 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/perf_event.h> | ||
25 | #include <linux/slab.h> | ||
26 | 26 | ||
27 | #include "trace.h" | 27 | #include "trace.h" |
28 | #include "trace_output.h" | 28 | #include "trace_output.h" |
@@ -31,6 +31,7 @@ enum filter_op_ids | |||
31 | { | 31 | { |
32 | OP_OR, | 32 | OP_OR, |
33 | OP_AND, | 33 | OP_AND, |
34 | OP_GLOB, | ||
34 | OP_NE, | 35 | OP_NE, |
35 | OP_EQ, | 36 | OP_EQ, |
36 | OP_LT, | 37 | OP_LT, |
@@ -48,16 +49,17 @@ struct filter_op { | |||
48 | }; | 49 | }; |
49 | 50 | ||
50 | static struct filter_op filter_ops[] = { | 51 | static struct filter_op filter_ops[] = { |
51 | { OP_OR, "||", 1 }, | 52 | { OP_OR, "||", 1 }, |
52 | { OP_AND, "&&", 2 }, | 53 | { OP_AND, "&&", 2 }, |
53 | { OP_NE, "!=", 4 }, | 54 | { OP_GLOB, "~", 4 }, |
54 | { OP_EQ, "==", 4 }, | 55 | { OP_NE, "!=", 4 }, |
55 | { OP_LT, "<", 5 }, | 56 | { OP_EQ, "==", 4 }, |
56 | { OP_LE, "<=", 5 }, | 57 | { OP_LT, "<", 5 }, |
57 | { OP_GT, ">", 5 }, | 58 | { OP_LE, "<=", 5 }, |
58 | { OP_GE, ">=", 5 }, | 59 | { OP_GT, ">", 5 }, |
59 | { OP_NONE, "OP_NONE", 0 }, | 60 | { OP_GE, ">=", 5 }, |
60 | { OP_OPEN_PAREN, "(", 0 }, | 61 | { OP_NONE, "OP_NONE", 0 }, |
62 | { OP_OPEN_PAREN, "(", 0 }, | ||
61 | }; | 63 | }; |
62 | 64 | ||
63 | enum { | 65 | enum { |
@@ -121,6 +123,47 @@ struct filter_parse_state { | |||
121 | } operand; | 123 | } operand; |
122 | }; | 124 | }; |
123 | 125 | ||
126 | #define DEFINE_COMPARISON_PRED(type) \ | ||
127 | static int filter_pred_##type(struct filter_pred *pred, void *event, \ | ||
128 | int val1, int val2) \ | ||
129 | { \ | ||
130 | type *addr = (type *)(event + pred->offset); \ | ||
131 | type val = (type)pred->val; \ | ||
132 | int match = 0; \ | ||
133 | \ | ||
134 | switch (pred->op) { \ | ||
135 | case OP_LT: \ | ||
136 | match = (*addr < val); \ | ||
137 | break; \ | ||
138 | case OP_LE: \ | ||
139 | match = (*addr <= val); \ | ||
140 | break; \ | ||
141 | case OP_GT: \ | ||
142 | match = (*addr > val); \ | ||
143 | break; \ | ||
144 | case OP_GE: \ | ||
145 | match = (*addr >= val); \ | ||
146 | break; \ | ||
147 | default: \ | ||
148 | break; \ | ||
149 | } \ | ||
150 | \ | ||
151 | return match; \ | ||
152 | } | ||
153 | |||
154 | #define DEFINE_EQUALITY_PRED(size) \ | ||
155 | static int filter_pred_##size(struct filter_pred *pred, void *event, \ | ||
156 | int val1, int val2) \ | ||
157 | { \ | ||
158 | u##size *addr = (u##size *)(event + pred->offset); \ | ||
159 | u##size val = (u##size)pred->val; \ | ||
160 | int match; \ | ||
161 | \ | ||
162 | match = (val == *addr) ^ pred->not; \ | ||
163 | \ | ||
164 | return match; \ | ||
165 | } | ||
166 | |||
124 | DEFINE_COMPARISON_PRED(s64); | 167 | DEFINE_COMPARISON_PRED(s64); |
125 | DEFINE_COMPARISON_PRED(u64); | 168 | DEFINE_COMPARISON_PRED(u64); |
126 | DEFINE_COMPARISON_PRED(s32); | 169 | DEFINE_COMPARISON_PRED(s32); |
@@ -156,9 +199,24 @@ static int filter_pred_string(struct filter_pred *pred, void *event, | |||
156 | char *addr = (char *)(event + pred->offset); | 199 | char *addr = (char *)(event + pred->offset); |
157 | int cmp, match; | 200 | int cmp, match; |
158 | 201 | ||
159 | cmp = strncmp(addr, pred->str_val, pred->str_len); | 202 | cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len); |
160 | 203 | ||
161 | match = (!cmp) ^ pred->not; | 204 | match = cmp ^ pred->not; |
205 | |||
206 | return match; | ||
207 | } | ||
208 | |||
209 | /* Filter predicate for char * pointers */ | ||
210 | static int filter_pred_pchar(struct filter_pred *pred, void *event, | ||
211 | int val1, int val2) | ||
212 | { | ||
213 | char **addr = (char **)(event + pred->offset); | ||
214 | int cmp, match; | ||
215 | int len = strlen(*addr) + 1; /* including tailing '\0' */ | ||
216 | |||
217 | cmp = pred->regex.match(*addr, &pred->regex, len); | ||
218 | |||
219 | match = cmp ^ pred->not; | ||
162 | 220 | ||
163 | return match; | 221 | return match; |
164 | } | 222 | } |
@@ -176,13 +234,15 @@ static int filter_pred_string(struct filter_pred *pred, void *event, | |||
176 | static int filter_pred_strloc(struct filter_pred *pred, void *event, | 234 | static int filter_pred_strloc(struct filter_pred *pred, void *event, |
177 | int val1, int val2) | 235 | int val1, int val2) |
178 | { | 236 | { |
179 | unsigned short str_loc = *(unsigned short *)(event + pred->offset); | 237 | u32 str_item = *(u32 *)(event + pred->offset); |
238 | int str_loc = str_item & 0xffff; | ||
239 | int str_len = str_item >> 16; | ||
180 | char *addr = (char *)(event + str_loc); | 240 | char *addr = (char *)(event + str_loc); |
181 | int cmp, match; | 241 | int cmp, match; |
182 | 242 | ||
183 | cmp = strncmp(addr, pred->str_val, pred->str_len); | 243 | cmp = pred->regex.match(addr, &pred->regex, str_len); |
184 | 244 | ||
185 | match = (!cmp) ^ pred->not; | 245 | match = cmp ^ pred->not; |
186 | 246 | ||
187 | return match; | 247 | return match; |
188 | } | 248 | } |
@@ -193,10 +253,133 @@ static int filter_pred_none(struct filter_pred *pred, void *event, | |||
193 | return 0; | 253 | return 0; |
194 | } | 254 | } |
195 | 255 | ||
256 | /* | ||
257 | * regex_match_foo - Basic regex callbacks | ||
258 | * | ||
259 | * @str: the string to be searched | ||
260 | * @r: the regex structure containing the pattern string | ||
261 | * @len: the length of the string to be searched (including '\0') | ||
262 | * | ||
263 | * Note: | ||
264 | * - @str might not be NULL-terminated if it's of type DYN_STRING | ||
265 | * or STATIC_STRING | ||
266 | */ | ||
267 | |||
268 | static int regex_match_full(char *str, struct regex *r, int len) | ||
269 | { | ||
270 | if (strncmp(str, r->pattern, len) == 0) | ||
271 | return 1; | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | static int regex_match_front(char *str, struct regex *r, int len) | ||
276 | { | ||
277 | if (strncmp(str, r->pattern, r->len) == 0) | ||
278 | return 1; | ||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static int regex_match_middle(char *str, struct regex *r, int len) | ||
283 | { | ||
284 | if (strnstr(str, r->pattern, len)) | ||
285 | return 1; | ||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | static int regex_match_end(char *str, struct regex *r, int len) | ||
290 | { | ||
291 | int strlen = len - 1; | ||
292 | |||
293 | if (strlen >= r->len && | ||
294 | memcmp(str + strlen - r->len, r->pattern, r->len) == 0) | ||
295 | return 1; | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | /** | ||
300 | * filter_parse_regex - parse a basic regex | ||
301 | * @buff: the raw regex | ||
302 | * @len: length of the regex | ||
303 | * @search: will point to the beginning of the string to compare | ||
304 | * @not: tell whether the match will have to be inverted | ||
305 | * | ||
306 | * This passes in a buffer containing a regex and this function will | ||
307 | * set search to point to the search part of the buffer and | ||
308 | * return the type of search it is (see enum above). | ||
309 | * This does modify buff. | ||
310 | * | ||
311 | * Returns enum type. | ||
312 | * search returns the pointer to use for comparison. | ||
313 | * not returns 1 if buff started with a '!' | ||
314 | * 0 otherwise. | ||
315 | */ | ||
316 | enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) | ||
317 | { | ||
318 | int type = MATCH_FULL; | ||
319 | int i; | ||
320 | |||
321 | if (buff[0] == '!') { | ||
322 | *not = 1; | ||
323 | buff++; | ||
324 | len--; | ||
325 | } else | ||
326 | *not = 0; | ||
327 | |||
328 | *search = buff; | ||
329 | |||
330 | for (i = 0; i < len; i++) { | ||
331 | if (buff[i] == '*') { | ||
332 | if (!i) { | ||
333 | *search = buff + 1; | ||
334 | type = MATCH_END_ONLY; | ||
335 | } else { | ||
336 | if (type == MATCH_END_ONLY) | ||
337 | type = MATCH_MIDDLE_ONLY; | ||
338 | else | ||
339 | type = MATCH_FRONT_ONLY; | ||
340 | buff[i] = 0; | ||
341 | break; | ||
342 | } | ||
343 | } | ||
344 | } | ||
345 | |||
346 | return type; | ||
347 | } | ||
348 | |||
349 | static void filter_build_regex(struct filter_pred *pred) | ||
350 | { | ||
351 | struct regex *r = &pred->regex; | ||
352 | char *search; | ||
353 | enum regex_type type = MATCH_FULL; | ||
354 | int not = 0; | ||
355 | |||
356 | if (pred->op == OP_GLOB) { | ||
357 | type = filter_parse_regex(r->pattern, r->len, &search, ¬); | ||
358 | r->len = strlen(search); | ||
359 | memmove(r->pattern, search, r->len+1); | ||
360 | } | ||
361 | |||
362 | switch (type) { | ||
363 | case MATCH_FULL: | ||
364 | r->match = regex_match_full; | ||
365 | break; | ||
366 | case MATCH_FRONT_ONLY: | ||
367 | r->match = regex_match_front; | ||
368 | break; | ||
369 | case MATCH_MIDDLE_ONLY: | ||
370 | r->match = regex_match_middle; | ||
371 | break; | ||
372 | case MATCH_END_ONLY: | ||
373 | r->match = regex_match_end; | ||
374 | break; | ||
375 | } | ||
376 | |||
377 | pred->not ^= not; | ||
378 | } | ||
379 | |||
196 | /* return 1 if event matches, 0 otherwise (discard) */ | 380 | /* return 1 if event matches, 0 otherwise (discard) */ |
197 | int filter_match_preds(struct ftrace_event_call *call, void *rec) | 381 | int filter_match_preds(struct event_filter *filter, void *rec) |
198 | { | 382 | { |
199 | struct event_filter *filter = call->filter; | ||
200 | int match, top = 0, val1 = 0, val2 = 0; | 383 | int match, top = 0, val1 = 0, val2 = 0; |
201 | int stack[MAX_FILTER_PRED]; | 384 | int stack[MAX_FILTER_PRED]; |
202 | struct filter_pred *pred; | 385 | struct filter_pred *pred; |
@@ -293,7 +476,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) | |||
293 | struct event_filter *filter = call->filter; | 476 | struct event_filter *filter = call->filter; |
294 | 477 | ||
295 | mutex_lock(&event_mutex); | 478 | mutex_lock(&event_mutex); |
296 | if (filter->filter_string) | 479 | if (filter && filter->filter_string) |
297 | trace_seq_printf(s, "%s\n", filter->filter_string); | 480 | trace_seq_printf(s, "%s\n", filter->filter_string); |
298 | else | 481 | else |
299 | trace_seq_printf(s, "none\n"); | 482 | trace_seq_printf(s, "none\n"); |
@@ -306,7 +489,7 @@ void print_subsystem_event_filter(struct event_subsystem *system, | |||
306 | struct event_filter *filter = system->filter; | 489 | struct event_filter *filter = system->filter; |
307 | 490 | ||
308 | mutex_lock(&event_mutex); | 491 | mutex_lock(&event_mutex); |
309 | if (filter->filter_string) | 492 | if (filter && filter->filter_string) |
310 | trace_seq_printf(s, "%s\n", filter->filter_string); | 493 | trace_seq_printf(s, "%s\n", filter->filter_string); |
311 | else | 494 | else |
312 | trace_seq_printf(s, "none\n"); | 495 | trace_seq_printf(s, "none\n"); |
@@ -314,11 +497,11 @@ void print_subsystem_event_filter(struct event_subsystem *system, | |||
314 | } | 497 | } |
315 | 498 | ||
316 | static struct ftrace_event_field * | 499 | static struct ftrace_event_field * |
317 | find_event_field(struct ftrace_event_call *call, char *name) | 500 | __find_event_field(struct list_head *head, char *name) |
318 | { | 501 | { |
319 | struct ftrace_event_field *field; | 502 | struct ftrace_event_field *field; |
320 | 503 | ||
321 | list_for_each_entry(field, &call->fields, link) { | 504 | list_for_each_entry(field, head, link) { |
322 | if (!strcmp(field->name, name)) | 505 | if (!strcmp(field->name, name)) |
323 | return field; | 506 | return field; |
324 | } | 507 | } |
@@ -326,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name) | |||
326 | return NULL; | 509 | return NULL; |
327 | } | 510 | } |
328 | 511 | ||
512 | static struct ftrace_event_field * | ||
513 | find_event_field(struct ftrace_event_call *call, char *name) | ||
514 | { | ||
515 | struct ftrace_event_field *field; | ||
516 | struct list_head *head; | ||
517 | |||
518 | field = __find_event_field(&ftrace_common_fields, name); | ||
519 | if (field) | ||
520 | return field; | ||
521 | |||
522 | head = trace_get_fields(call); | ||
523 | return __find_event_field(head, name); | ||
524 | } | ||
525 | |||
329 | static void filter_free_pred(struct filter_pred *pred) | 526 | static void filter_free_pred(struct filter_pred *pred) |
330 | { | 527 | { |
331 | if (!pred) | 528 | if (!pred) |
@@ -339,7 +536,7 @@ static void filter_clear_pred(struct filter_pred *pred) | |||
339 | { | 536 | { |
340 | kfree(pred->field_name); | 537 | kfree(pred->field_name); |
341 | pred->field_name = NULL; | 538 | pred->field_name = NULL; |
342 | pred->str_len = 0; | 539 | pred->regex.len = 0; |
343 | } | 540 | } |
344 | 541 | ||
345 | static int filter_set_pred(struct filter_pred *dest, | 542 | static int filter_set_pred(struct filter_pred *dest, |
@@ -362,18 +559,20 @@ static void filter_disable_preds(struct ftrace_event_call *call) | |||
362 | struct event_filter *filter = call->filter; | 559 | struct event_filter *filter = call->filter; |
363 | int i; | 560 | int i; |
364 | 561 | ||
365 | call->filter_active = 0; | 562 | call->flags &= ~TRACE_EVENT_FL_FILTERED; |
366 | filter->n_preds = 0; | 563 | filter->n_preds = 0; |
367 | 564 | ||
368 | for (i = 0; i < MAX_FILTER_PRED; i++) | 565 | for (i = 0; i < MAX_FILTER_PRED; i++) |
369 | filter->preds[i]->fn = filter_pred_none; | 566 | filter->preds[i]->fn = filter_pred_none; |
370 | } | 567 | } |
371 | 568 | ||
372 | void destroy_preds(struct ftrace_event_call *call) | 569 | static void __free_preds(struct event_filter *filter) |
373 | { | 570 | { |
374 | struct event_filter *filter = call->filter; | ||
375 | int i; | 571 | int i; |
376 | 572 | ||
573 | if (!filter) | ||
574 | return; | ||
575 | |||
377 | for (i = 0; i < MAX_FILTER_PRED; i++) { | 576 | for (i = 0; i < MAX_FILTER_PRED; i++) { |
378 | if (filter->preds[i]) | 577 | if (filter->preds[i]) |
379 | filter_free_pred(filter->preds[i]); | 578 | filter_free_pred(filter->preds[i]); |
@@ -381,20 +580,25 @@ void destroy_preds(struct ftrace_event_call *call) | |||
381 | kfree(filter->preds); | 580 | kfree(filter->preds); |
382 | kfree(filter->filter_string); | 581 | kfree(filter->filter_string); |
383 | kfree(filter); | 582 | kfree(filter); |
583 | } | ||
584 | |||
585 | void destroy_preds(struct ftrace_event_call *call) | ||
586 | { | ||
587 | __free_preds(call->filter); | ||
384 | call->filter = NULL; | 588 | call->filter = NULL; |
589 | call->flags &= ~TRACE_EVENT_FL_FILTERED; | ||
385 | } | 590 | } |
386 | 591 | ||
387 | int init_preds(struct ftrace_event_call *call) | 592 | static struct event_filter *__alloc_preds(void) |
388 | { | 593 | { |
389 | struct event_filter *filter; | 594 | struct event_filter *filter; |
390 | struct filter_pred *pred; | 595 | struct filter_pred *pred; |
391 | int i; | 596 | int i; |
392 | 597 | ||
393 | filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); | 598 | filter = kzalloc(sizeof(*filter), GFP_KERNEL); |
394 | if (!call->filter) | 599 | if (!filter) |
395 | return -ENOMEM; | 600 | return ERR_PTR(-ENOMEM); |
396 | 601 | ||
397 | call->filter_active = 0; | ||
398 | filter->n_preds = 0; | 602 | filter->n_preds = 0; |
399 | 603 | ||
400 | filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); | 604 | filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); |
@@ -409,46 +613,62 @@ int init_preds(struct ftrace_event_call *call) | |||
409 | filter->preds[i] = pred; | 613 | filter->preds[i] = pred; |
410 | } | 614 | } |
411 | 615 | ||
412 | return 0; | 616 | return filter; |
413 | 617 | ||
414 | oom: | 618 | oom: |
415 | destroy_preds(call); | 619 | __free_preds(filter); |
620 | return ERR_PTR(-ENOMEM); | ||
621 | } | ||
622 | |||
623 | static int init_preds(struct ftrace_event_call *call) | ||
624 | { | ||
625 | if (call->filter) | ||
626 | return 0; | ||
627 | |||
628 | call->flags &= ~TRACE_EVENT_FL_FILTERED; | ||
629 | call->filter = __alloc_preds(); | ||
630 | if (IS_ERR(call->filter)) | ||
631 | return PTR_ERR(call->filter); | ||
416 | 632 | ||
417 | return -ENOMEM; | 633 | return 0; |
418 | } | 634 | } |
419 | EXPORT_SYMBOL_GPL(init_preds); | ||
420 | 635 | ||
421 | static void filter_free_subsystem_preds(struct event_subsystem *system) | 636 | static int init_subsystem_preds(struct event_subsystem *system) |
422 | { | 637 | { |
423 | struct event_filter *filter = system->filter; | ||
424 | struct ftrace_event_call *call; | 638 | struct ftrace_event_call *call; |
425 | int i; | 639 | int err; |
426 | 640 | ||
427 | if (filter->n_preds) { | 641 | list_for_each_entry(call, &ftrace_events, list) { |
428 | for (i = 0; i < filter->n_preds; i++) | 642 | if (strcmp(call->class->system, system->name) != 0) |
429 | filter_free_pred(filter->preds[i]); | 643 | continue; |
430 | kfree(filter->preds); | 644 | |
431 | filter->preds = NULL; | 645 | err = init_preds(call); |
432 | filter->n_preds = 0; | 646 | if (err) |
647 | return err; | ||
433 | } | 648 | } |
434 | 649 | ||
650 | return 0; | ||
651 | } | ||
652 | |||
653 | static void filter_free_subsystem_preds(struct event_subsystem *system) | ||
654 | { | ||
655 | struct ftrace_event_call *call; | ||
656 | |||
435 | list_for_each_entry(call, &ftrace_events, list) { | 657 | list_for_each_entry(call, &ftrace_events, list) { |
436 | if (!call->define_fields) | 658 | if (strcmp(call->class->system, system->name) != 0) |
437 | continue; | 659 | continue; |
438 | 660 | ||
439 | if (!strcmp(call->system, system->name)) { | 661 | filter_disable_preds(call); |
440 | filter_disable_preds(call); | 662 | remove_filter_string(call->filter); |
441 | remove_filter_string(call->filter); | ||
442 | } | ||
443 | } | 663 | } |
444 | } | 664 | } |
445 | 665 | ||
446 | static int filter_add_pred_fn(struct filter_parse_state *ps, | 666 | static int filter_add_pred_fn(struct filter_parse_state *ps, |
447 | struct ftrace_event_call *call, | 667 | struct ftrace_event_call *call, |
668 | struct event_filter *filter, | ||
448 | struct filter_pred *pred, | 669 | struct filter_pred *pred, |
449 | filter_pred_fn_t fn) | 670 | filter_pred_fn_t fn) |
450 | { | 671 | { |
451 | struct event_filter *filter = call->filter; | ||
452 | int idx, err; | 672 | int idx, err; |
453 | 673 | ||
454 | if (filter->n_preds == MAX_FILTER_PRED) { | 674 | if (filter->n_preds == MAX_FILTER_PRED) { |
@@ -463,17 +683,11 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, | |||
463 | return err; | 683 | return err; |
464 | 684 | ||
465 | filter->n_preds++; | 685 | filter->n_preds++; |
466 | call->filter_active = 1; | ||
467 | 686 | ||
468 | return 0; | 687 | return 0; |
469 | } | 688 | } |
470 | 689 | ||
471 | enum { | 690 | int filter_assign_type(const char *type) |
472 | FILTER_STATIC_STRING = 1, | ||
473 | FILTER_DYN_STRING | ||
474 | }; | ||
475 | |||
476 | static int is_string_field(const char *type) | ||
477 | { | 691 | { |
478 | if (strstr(type, "__data_loc") && strstr(type, "char")) | 692 | if (strstr(type, "__data_loc") && strstr(type, "char")) |
479 | return FILTER_DYN_STRING; | 693 | return FILTER_DYN_STRING; |
@@ -481,12 +695,22 @@ static int is_string_field(const char *type) | |||
481 | if (strchr(type, '[') && strstr(type, "char")) | 695 | if (strchr(type, '[') && strstr(type, "char")) |
482 | return FILTER_STATIC_STRING; | 696 | return FILTER_STATIC_STRING; |
483 | 697 | ||
484 | return 0; | 698 | return FILTER_OTHER; |
699 | } | ||
700 | |||
701 | static bool is_string_field(struct ftrace_event_field *field) | ||
702 | { | ||
703 | return field->filter_type == FILTER_DYN_STRING || | ||
704 | field->filter_type == FILTER_STATIC_STRING || | ||
705 | field->filter_type == FILTER_PTR_STRING; | ||
485 | } | 706 | } |
486 | 707 | ||
487 | static int is_legal_op(struct ftrace_event_field *field, int op) | 708 | static int is_legal_op(struct ftrace_event_field *field, int op) |
488 | { | 709 | { |
489 | if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE)) | 710 | if (is_string_field(field) && |
711 | (op != OP_EQ && op != OP_NE && op != OP_GLOB)) | ||
712 | return 0; | ||
713 | if (!is_string_field(field) && op == OP_GLOB) | ||
490 | return 0; | 714 | return 0; |
491 | 715 | ||
492 | return 1; | 716 | return 1; |
@@ -537,22 +761,25 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, | |||
537 | 761 | ||
538 | static int filter_add_pred(struct filter_parse_state *ps, | 762 | static int filter_add_pred(struct filter_parse_state *ps, |
539 | struct ftrace_event_call *call, | 763 | struct ftrace_event_call *call, |
540 | struct filter_pred *pred) | 764 | struct event_filter *filter, |
765 | struct filter_pred *pred, | ||
766 | bool dry_run) | ||
541 | { | 767 | { |
542 | struct ftrace_event_field *field; | 768 | struct ftrace_event_field *field; |
543 | filter_pred_fn_t fn; | 769 | filter_pred_fn_t fn; |
544 | unsigned long long val; | 770 | unsigned long long val; |
545 | int string_type; | ||
546 | int ret; | 771 | int ret; |
547 | 772 | ||
548 | pred->fn = filter_pred_none; | 773 | pred->fn = filter_pred_none; |
549 | 774 | ||
550 | if (pred->op == OP_AND) { | 775 | if (pred->op == OP_AND) { |
551 | pred->pop_n = 2; | 776 | pred->pop_n = 2; |
552 | return filter_add_pred_fn(ps, call, pred, filter_pred_and); | 777 | fn = filter_pred_and; |
778 | goto add_pred_fn; | ||
553 | } else if (pred->op == OP_OR) { | 779 | } else if (pred->op == OP_OR) { |
554 | pred->pop_n = 2; | 780 | pred->pop_n = 2; |
555 | return filter_add_pred_fn(ps, call, pred, filter_pred_or); | 781 | fn = filter_pred_or; |
782 | goto add_pred_fn; | ||
556 | } | 783 | } |
557 | 784 | ||
558 | field = find_event_field(call, pred->field_name); | 785 | field = find_event_field(call, pred->field_name); |
@@ -568,83 +795,42 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
568 | return -EINVAL; | 795 | return -EINVAL; |
569 | } | 796 | } |
570 | 797 | ||
571 | string_type = is_string_field(field->type); | 798 | if (is_string_field(field)) { |
572 | if (string_type) { | 799 | filter_build_regex(pred); |
573 | if (string_type == FILTER_STATIC_STRING) | 800 | |
801 | if (field->filter_type == FILTER_STATIC_STRING) { | ||
574 | fn = filter_pred_string; | 802 | fn = filter_pred_string; |
575 | else | 803 | pred->regex.field_len = field->size; |
804 | } else if (field->filter_type == FILTER_DYN_STRING) | ||
576 | fn = filter_pred_strloc; | 805 | fn = filter_pred_strloc; |
577 | pred->str_len = field->size; | 806 | else |
578 | if (pred->op == OP_NE) | 807 | fn = filter_pred_pchar; |
579 | pred->not = 1; | ||
580 | return filter_add_pred_fn(ps, call, pred, fn); | ||
581 | } else { | 808 | } else { |
582 | if (field->is_signed) | 809 | if (field->is_signed) |
583 | ret = strict_strtoll(pred->str_val, 0, &val); | 810 | ret = strict_strtoll(pred->regex.pattern, 0, &val); |
584 | else | 811 | else |
585 | ret = strict_strtoull(pred->str_val, 0, &val); | 812 | ret = strict_strtoull(pred->regex.pattern, 0, &val); |
586 | if (ret) { | 813 | if (ret) { |
587 | parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); | 814 | parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); |
588 | return -EINVAL; | 815 | return -EINVAL; |
589 | } | 816 | } |
590 | pred->val = val; | 817 | pred->val = val; |
591 | } | ||
592 | 818 | ||
593 | fn = select_comparison_fn(pred->op, field->size, field->is_signed); | 819 | fn = select_comparison_fn(pred->op, field->size, |
594 | if (!fn) { | 820 | field->is_signed); |
595 | parse_error(ps, FILT_ERR_INVALID_OP, 0); | 821 | if (!fn) { |
596 | return -EINVAL; | 822 | parse_error(ps, FILT_ERR_INVALID_OP, 0); |
823 | return -EINVAL; | ||
824 | } | ||
597 | } | 825 | } |
598 | 826 | ||
599 | if (pred->op == OP_NE) | 827 | if (pred->op == OP_NE) |
600 | pred->not = 1; | 828 | pred->not = 1; |
601 | 829 | ||
602 | return filter_add_pred_fn(ps, call, pred, fn); | 830 | add_pred_fn: |
603 | } | 831 | if (!dry_run) |
604 | 832 | return filter_add_pred_fn(ps, call, filter, pred, fn); | |
605 | static int filter_add_subsystem_pred(struct filter_parse_state *ps, | 833 | return 0; |
606 | struct event_subsystem *system, | ||
607 | struct filter_pred *pred, | ||
608 | char *filter_string) | ||
609 | { | ||
610 | struct event_filter *filter = system->filter; | ||
611 | struct ftrace_event_call *call; | ||
612 | int err = 0; | ||
613 | |||
614 | if (!filter->preds) { | ||
615 | filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), | ||
616 | GFP_KERNEL); | ||
617 | |||
618 | if (!filter->preds) | ||
619 | return -ENOMEM; | ||
620 | } | ||
621 | |||
622 | if (filter->n_preds == MAX_FILTER_PRED) { | ||
623 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); | ||
624 | return -ENOSPC; | ||
625 | } | ||
626 | |||
627 | filter->preds[filter->n_preds] = pred; | ||
628 | filter->n_preds++; | ||
629 | |||
630 | list_for_each_entry(call, &ftrace_events, list) { | ||
631 | |||
632 | if (!call->define_fields) | ||
633 | continue; | ||
634 | |||
635 | if (strcmp(call->system, system->name)) | ||
636 | continue; | ||
637 | |||
638 | err = filter_add_pred(ps, call, pred); | ||
639 | if (err) { | ||
640 | filter_free_subsystem_preds(system); | ||
641 | parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); | ||
642 | goto out; | ||
643 | } | ||
644 | replace_filter_string(call->filter, filter_string); | ||
645 | } | ||
646 | out: | ||
647 | return err; | ||
648 | } | 834 | } |
649 | 835 | ||
650 | static void parse_init(struct filter_parse_state *ps, | 836 | static void parse_init(struct filter_parse_state *ps, |
@@ -844,8 +1030,9 @@ static void postfix_clear(struct filter_parse_state *ps) | |||
844 | 1030 | ||
845 | while (!list_empty(&ps->postfix)) { | 1031 | while (!list_empty(&ps->postfix)) { |
846 | elt = list_first_entry(&ps->postfix, struct postfix_elt, list); | 1032 | elt = list_first_entry(&ps->postfix, struct postfix_elt, list); |
847 | kfree(elt->operand); | ||
848 | list_del(&elt->list); | 1033 | list_del(&elt->list); |
1034 | kfree(elt->operand); | ||
1035 | kfree(elt); | ||
849 | } | 1036 | } |
850 | } | 1037 | } |
851 | 1038 | ||
@@ -955,8 +1142,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2) | |||
955 | return NULL; | 1142 | return NULL; |
956 | } | 1143 | } |
957 | 1144 | ||
958 | strcpy(pred->str_val, operand2); | 1145 | strcpy(pred->regex.pattern, operand2); |
959 | pred->str_len = strlen(operand2); | 1146 | pred->regex.len = strlen(pred->regex.pattern); |
960 | 1147 | ||
961 | pred->op = op; | 1148 | pred->op = op; |
962 | 1149 | ||
@@ -1000,15 +1187,17 @@ static int check_preds(struct filter_parse_state *ps) | |||
1000 | return 0; | 1187 | return 0; |
1001 | } | 1188 | } |
1002 | 1189 | ||
1003 | static int replace_preds(struct event_subsystem *system, | 1190 | static int replace_preds(struct ftrace_event_call *call, |
1004 | struct ftrace_event_call *call, | 1191 | struct event_filter *filter, |
1005 | struct filter_parse_state *ps, | 1192 | struct filter_parse_state *ps, |
1006 | char *filter_string) | 1193 | char *filter_string, |
1194 | bool dry_run) | ||
1007 | { | 1195 | { |
1008 | char *operand1 = NULL, *operand2 = NULL; | 1196 | char *operand1 = NULL, *operand2 = NULL; |
1009 | struct filter_pred *pred; | 1197 | struct filter_pred *pred; |
1010 | struct postfix_elt *elt; | 1198 | struct postfix_elt *elt; |
1011 | int err; | 1199 | int err; |
1200 | int n_preds = 0; | ||
1012 | 1201 | ||
1013 | err = check_preds(ps); | 1202 | err = check_preds(ps); |
1014 | if (err) | 1203 | if (err) |
@@ -1027,19 +1216,14 @@ static int replace_preds(struct event_subsystem *system, | |||
1027 | continue; | 1216 | continue; |
1028 | } | 1217 | } |
1029 | 1218 | ||
1219 | if (n_preds++ == MAX_FILTER_PRED) { | ||
1220 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); | ||
1221 | return -ENOSPC; | ||
1222 | } | ||
1223 | |||
1030 | if (elt->op == OP_AND || elt->op == OP_OR) { | 1224 | if (elt->op == OP_AND || elt->op == OP_OR) { |
1031 | pred = create_logical_pred(elt->op); | 1225 | pred = create_logical_pred(elt->op); |
1032 | if (call) { | 1226 | goto add_pred; |
1033 | err = filter_add_pred(ps, call, pred); | ||
1034 | filter_free_pred(pred); | ||
1035 | } else | ||
1036 | err = filter_add_subsystem_pred(ps, system, | ||
1037 | pred, filter_string); | ||
1038 | if (err) | ||
1039 | return err; | ||
1040 | |||
1041 | operand1 = operand2 = NULL; | ||
1042 | continue; | ||
1043 | } | 1227 | } |
1044 | 1228 | ||
1045 | if (!operand1 || !operand2) { | 1229 | if (!operand1 || !operand2) { |
@@ -1048,12 +1232,11 @@ static int replace_preds(struct event_subsystem *system, | |||
1048 | } | 1232 | } |
1049 | 1233 | ||
1050 | pred = create_pred(elt->op, operand1, operand2); | 1234 | pred = create_pred(elt->op, operand1, operand2); |
1051 | if (call) { | 1235 | add_pred: |
1052 | err = filter_add_pred(ps, call, pred); | 1236 | if (!pred) |
1053 | filter_free_pred(pred); | 1237 | return -ENOMEM; |
1054 | } else | 1238 | err = filter_add_pred(ps, call, filter, pred, dry_run); |
1055 | err = filter_add_subsystem_pred(ps, system, pred, | 1239 | filter_free_pred(pred); |
1056 | filter_string); | ||
1057 | if (err) | 1240 | if (err) |
1058 | return err; | 1241 | return err; |
1059 | 1242 | ||
@@ -1063,19 +1246,59 @@ static int replace_preds(struct event_subsystem *system, | |||
1063 | return 0; | 1246 | return 0; |
1064 | } | 1247 | } |
1065 | 1248 | ||
1066 | int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | 1249 | static int replace_system_preds(struct event_subsystem *system, |
1250 | struct filter_parse_state *ps, | ||
1251 | char *filter_string) | ||
1067 | { | 1252 | { |
1253 | struct ftrace_event_call *call; | ||
1254 | bool fail = true; | ||
1068 | int err; | 1255 | int err; |
1069 | 1256 | ||
1257 | list_for_each_entry(call, &ftrace_events, list) { | ||
1258 | struct event_filter *filter = call->filter; | ||
1259 | |||
1260 | if (strcmp(call->class->system, system->name) != 0) | ||
1261 | continue; | ||
1262 | |||
1263 | /* try to see if the filter can be applied */ | ||
1264 | err = replace_preds(call, filter, ps, filter_string, true); | ||
1265 | if (err) | ||
1266 | continue; | ||
1267 | |||
1268 | /* really apply the filter */ | ||
1269 | filter_disable_preds(call); | ||
1270 | err = replace_preds(call, filter, ps, filter_string, false); | ||
1271 | if (err) | ||
1272 | filter_disable_preds(call); | ||
1273 | else { | ||
1274 | call->flags |= TRACE_EVENT_FL_FILTERED; | ||
1275 | replace_filter_string(filter, filter_string); | ||
1276 | } | ||
1277 | fail = false; | ||
1278 | } | ||
1279 | |||
1280 | if (fail) { | ||
1281 | parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); | ||
1282 | return -EINVAL; | ||
1283 | } | ||
1284 | return 0; | ||
1285 | } | ||
1286 | |||
1287 | int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | ||
1288 | { | ||
1289 | int err; | ||
1070 | struct filter_parse_state *ps; | 1290 | struct filter_parse_state *ps; |
1071 | 1291 | ||
1072 | mutex_lock(&event_mutex); | 1292 | mutex_lock(&event_mutex); |
1073 | 1293 | ||
1294 | err = init_preds(call); | ||
1295 | if (err) | ||
1296 | goto out_unlock; | ||
1297 | |||
1074 | if (!strcmp(strstrip(filter_string), "0")) { | 1298 | if (!strcmp(strstrip(filter_string), "0")) { |
1075 | filter_disable_preds(call); | 1299 | filter_disable_preds(call); |
1076 | remove_filter_string(call->filter); | 1300 | remove_filter_string(call->filter); |
1077 | mutex_unlock(&event_mutex); | 1301 | goto out_unlock; |
1078 | return 0; | ||
1079 | } | 1302 | } |
1080 | 1303 | ||
1081 | err = -ENOMEM; | 1304 | err = -ENOMEM; |
@@ -1093,10 +1316,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | |||
1093 | goto out; | 1316 | goto out; |
1094 | } | 1317 | } |
1095 | 1318 | ||
1096 | err = replace_preds(NULL, call, ps, filter_string); | 1319 | err = replace_preds(call, call->filter, ps, filter_string, false); |
1097 | if (err) | 1320 | if (err) |
1098 | append_filter_err(ps, call->filter); | 1321 | append_filter_err(ps, call->filter); |
1099 | 1322 | else | |
1323 | call->flags |= TRACE_EVENT_FL_FILTERED; | ||
1100 | out: | 1324 | out: |
1101 | filter_opstack_clear(ps); | 1325 | filter_opstack_clear(ps); |
1102 | postfix_clear(ps); | 1326 | postfix_clear(ps); |
@@ -1111,16 +1335,18 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
1111 | char *filter_string) | 1335 | char *filter_string) |
1112 | { | 1336 | { |
1113 | int err; | 1337 | int err; |
1114 | |||
1115 | struct filter_parse_state *ps; | 1338 | struct filter_parse_state *ps; |
1116 | 1339 | ||
1117 | mutex_lock(&event_mutex); | 1340 | mutex_lock(&event_mutex); |
1118 | 1341 | ||
1342 | err = init_subsystem_preds(system); | ||
1343 | if (err) | ||
1344 | goto out_unlock; | ||
1345 | |||
1119 | if (!strcmp(strstrip(filter_string), "0")) { | 1346 | if (!strcmp(strstrip(filter_string), "0")) { |
1120 | filter_free_subsystem_preds(system); | 1347 | filter_free_subsystem_preds(system); |
1121 | remove_filter_string(system->filter); | 1348 | remove_filter_string(system->filter); |
1122 | mutex_unlock(&event_mutex); | 1349 | goto out_unlock; |
1123 | return 0; | ||
1124 | } | 1350 | } |
1125 | 1351 | ||
1126 | err = -ENOMEM; | 1352 | err = -ENOMEM; |
@@ -1128,7 +1354,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
1128 | if (!ps) | 1354 | if (!ps) |
1129 | goto out_unlock; | 1355 | goto out_unlock; |
1130 | 1356 | ||
1131 | filter_free_subsystem_preds(system); | ||
1132 | replace_filter_string(system->filter, filter_string); | 1357 | replace_filter_string(system->filter, filter_string); |
1133 | 1358 | ||
1134 | parse_init(ps, filter_ops, filter_string); | 1359 | parse_init(ps, filter_ops, filter_string); |
@@ -1138,7 +1363,7 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
1138 | goto out; | 1363 | goto out; |
1139 | } | 1364 | } |
1140 | 1365 | ||
1141 | err = replace_preds(system, NULL, ps, filter_string); | 1366 | err = replace_system_preds(system, ps, filter_string); |
1142 | if (err) | 1367 | if (err) |
1143 | append_filter_err(ps, system->filter); | 1368 | append_filter_err(ps, system->filter); |
1144 | 1369 | ||
@@ -1152,3 +1377,73 @@ out_unlock: | |||
1152 | return err; | 1377 | return err; |
1153 | } | 1378 | } |
1154 | 1379 | ||
1380 | #ifdef CONFIG_PERF_EVENTS | ||
1381 | |||
1382 | void ftrace_profile_free_filter(struct perf_event *event) | ||
1383 | { | ||
1384 | struct event_filter *filter = event->filter; | ||
1385 | |||
1386 | event->filter = NULL; | ||
1387 | __free_preds(filter); | ||
1388 | } | ||
1389 | |||
1390 | int ftrace_profile_set_filter(struct perf_event *event, int event_id, | ||
1391 | char *filter_str) | ||
1392 | { | ||
1393 | int err; | ||
1394 | struct event_filter *filter; | ||
1395 | struct filter_parse_state *ps; | ||
1396 | struct ftrace_event_call *call = NULL; | ||
1397 | |||
1398 | mutex_lock(&event_mutex); | ||
1399 | |||
1400 | list_for_each_entry(call, &ftrace_events, list) { | ||
1401 | if (call->event.type == event_id) | ||
1402 | break; | ||
1403 | } | ||
1404 | |||
1405 | err = -EINVAL; | ||
1406 | if (&call->list == &ftrace_events) | ||
1407 | goto out_unlock; | ||
1408 | |||
1409 | err = -EEXIST; | ||
1410 | if (event->filter) | ||
1411 | goto out_unlock; | ||
1412 | |||
1413 | filter = __alloc_preds(); | ||
1414 | if (IS_ERR(filter)) { | ||
1415 | err = PTR_ERR(filter); | ||
1416 | goto out_unlock; | ||
1417 | } | ||
1418 | |||
1419 | err = -ENOMEM; | ||
1420 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
1421 | if (!ps) | ||
1422 | goto free_preds; | ||
1423 | |||
1424 | parse_init(ps, filter_ops, filter_str); | ||
1425 | err = filter_parse(ps); | ||
1426 | if (err) | ||
1427 | goto free_ps; | ||
1428 | |||
1429 | err = replace_preds(call, filter, ps, filter_str, false); | ||
1430 | if (!err) | ||
1431 | event->filter = filter; | ||
1432 | |||
1433 | free_ps: | ||
1434 | filter_opstack_clear(ps); | ||
1435 | postfix_clear(ps); | ||
1436 | kfree(ps); | ||
1437 | |||
1438 | free_preds: | ||
1439 | if (err) | ||
1440 | __free_preds(filter); | ||
1441 | |||
1442 | out_unlock: | ||
1443 | mutex_unlock(&event_mutex); | ||
1444 | |||
1445 | return err; | ||
1446 | } | ||
1447 | |||
1448 | #endif /* CONFIG_PERF_EVENTS */ | ||
1449 | |||
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d06cf898dc86..4b74d71705c0 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -15,192 +15,159 @@ | |||
15 | 15 | ||
16 | #include "trace_output.h" | 16 | #include "trace_output.h" |
17 | 17 | ||
18 | #undef TRACE_SYSTEM | ||
19 | #define TRACE_SYSTEM ftrace | ||
18 | 20 | ||
19 | #undef TRACE_STRUCT | 21 | /* not needed for this file */ |
20 | #define TRACE_STRUCT(args...) args | 22 | #undef __field_struct |
21 | 23 | #define __field_struct(type, item) | |
22 | extern void __bad_type_size(void); | ||
23 | |||
24 | #undef TRACE_FIELD | ||
25 | #define TRACE_FIELD(type, item, assign) \ | ||
26 | if (sizeof(type) != sizeof(field.item)) \ | ||
27 | __bad_type_size(); \ | ||
28 | ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ | ||
29 | "offset:%u;\tsize:%u;\n", \ | ||
30 | (unsigned int)offsetof(typeof(field), item), \ | ||
31 | (unsigned int)sizeof(field.item)); \ | ||
32 | if (!ret) \ | ||
33 | return 0; | ||
34 | |||
35 | |||
36 | #undef TRACE_FIELD_SPECIAL | ||
37 | #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ | ||
38 | ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ | ||
39 | "offset:%u;\tsize:%u;\n", \ | ||
40 | (unsigned int)offsetof(typeof(field), item), \ | ||
41 | (unsigned int)sizeof(field.item)); \ | ||
42 | if (!ret) \ | ||
43 | return 0; | ||
44 | |||
45 | #undef TRACE_FIELD_ZERO_CHAR | ||
46 | #define TRACE_FIELD_ZERO_CHAR(item) \ | ||
47 | ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ | ||
48 | "offset:%u;\tsize:0;\n", \ | ||
49 | (unsigned int)offsetof(typeof(field), item)); \ | ||
50 | if (!ret) \ | ||
51 | return 0; | ||
52 | |||
53 | #undef TRACE_FIELD_SIGN | ||
54 | #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ | ||
55 | TRACE_FIELD(type, item, assign) | ||
56 | |||
57 | #undef TP_RAW_FMT | ||
58 | #define TP_RAW_FMT(args...) args | ||
59 | |||
60 | #undef TRACE_EVENT_FORMAT | ||
61 | #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ | ||
62 | static int \ | ||
63 | ftrace_format_##call(struct trace_seq *s) \ | ||
64 | { \ | ||
65 | struct args field; \ | ||
66 | int ret; \ | ||
67 | \ | ||
68 | tstruct; \ | ||
69 | \ | ||
70 | trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ | ||
71 | \ | ||
72 | return ret; \ | ||
73 | } | ||
74 | 24 | ||
75 | #undef TRACE_EVENT_FORMAT_NOFILTER | 25 | #undef __field |
76 | #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ | 26 | #define __field(type, item) type item; |
77 | tpfmt) \ | ||
78 | static int \ | ||
79 | ftrace_format_##call(struct trace_seq *s) \ | ||
80 | { \ | ||
81 | struct args field; \ | ||
82 | int ret; \ | ||
83 | \ | ||
84 | tstruct; \ | ||
85 | \ | ||
86 | trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ | ||
87 | \ | ||
88 | return ret; \ | ||
89 | } | ||
90 | 27 | ||
91 | #include "trace_event_types.h" | 28 | #undef __field_desc |
29 | #define __field_desc(type, container, item) type item; | ||
92 | 30 | ||
93 | #undef TRACE_ZERO_CHAR | 31 | #undef __array |
94 | #define TRACE_ZERO_CHAR(arg) | 32 | #define __array(type, item, size) type item[size]; |
95 | 33 | ||
96 | #undef TRACE_FIELD | 34 | #undef __array_desc |
97 | #define TRACE_FIELD(type, item, assign)\ | 35 | #define __array_desc(type, container, item, size) type item[size]; |
98 | entry->item = assign; | ||
99 | 36 | ||
100 | #undef TRACE_FIELD | 37 | #undef __dynamic_array |
101 | #define TRACE_FIELD(type, item, assign)\ | 38 | #define __dynamic_array(type, item) type item[]; |
102 | entry->item = assign; | ||
103 | 39 | ||
104 | #undef TRACE_FIELD_SIGN | 40 | #undef F_STRUCT |
105 | #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ | 41 | #define F_STRUCT(args...) args |
106 | TRACE_FIELD(type, item, assign) | ||
107 | 42 | ||
108 | #undef TP_CMD | 43 | #undef F_printk |
109 | #define TP_CMD(cmd...) cmd | 44 | #define F_printk(fmt, args...) fmt, args |
110 | 45 | ||
111 | #undef TRACE_ENTRY | 46 | #undef FTRACE_ENTRY |
112 | #define TRACE_ENTRY entry | 47 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ |
48 | struct ____ftrace_##name { \ | ||
49 | tstruct \ | ||
50 | }; \ | ||
51 | static void __always_unused ____ftrace_check_##name(void) \ | ||
52 | { \ | ||
53 | struct ____ftrace_##name *__entry = NULL; \ | ||
54 | \ | ||
55 | /* force compile-time check on F_printk() */ \ | ||
56 | printk(print); \ | ||
57 | } | ||
113 | 58 | ||
114 | #undef TRACE_FIELD_SPECIAL | 59 | #undef FTRACE_ENTRY_DUP |
115 | #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ | 60 | #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ |
116 | cmd; | 61 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) |
117 | 62 | ||
118 | #undef TRACE_EVENT_FORMAT | 63 | #include "trace_entries.h" |
119 | #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ | ||
120 | int ftrace_define_fields_##call(void); \ | ||
121 | static int ftrace_raw_init_event_##call(void); \ | ||
122 | \ | ||
123 | struct ftrace_event_call __used \ | ||
124 | __attribute__((__aligned__(4))) \ | ||
125 | __attribute__((section("_ftrace_events"))) event_##call = { \ | ||
126 | .name = #call, \ | ||
127 | .id = proto, \ | ||
128 | .system = __stringify(TRACE_SYSTEM), \ | ||
129 | .raw_init = ftrace_raw_init_event_##call, \ | ||
130 | .show_format = ftrace_format_##call, \ | ||
131 | .define_fields = ftrace_define_fields_##call, \ | ||
132 | }; \ | ||
133 | static int ftrace_raw_init_event_##call(void) \ | ||
134 | { \ | ||
135 | INIT_LIST_HEAD(&event_##call.fields); \ | ||
136 | init_preds(&event_##call); \ | ||
137 | return 0; \ | ||
138 | } \ | ||
139 | |||
140 | #undef TRACE_EVENT_FORMAT_NOFILTER | ||
141 | #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ | ||
142 | tpfmt) \ | ||
143 | \ | ||
144 | struct ftrace_event_call __used \ | ||
145 | __attribute__((__aligned__(4))) \ | ||
146 | __attribute__((section("_ftrace_events"))) event_##call = { \ | ||
147 | .name = #call, \ | ||
148 | .id = proto, \ | ||
149 | .system = __stringify(TRACE_SYSTEM), \ | ||
150 | .show_format = ftrace_format_##call, \ | ||
151 | }; | ||
152 | 64 | ||
153 | #include "trace_event_types.h" | 65 | #undef __field |
154 | 66 | #define __field(type, item) \ | |
155 | #undef TRACE_FIELD | ||
156 | #define TRACE_FIELD(type, item, assign) \ | ||
157 | ret = trace_define_field(event_call, #type, #item, \ | 67 | ret = trace_define_field(event_call, #type, #item, \ |
158 | offsetof(typeof(field), item), \ | 68 | offsetof(typeof(field), item), \ |
159 | sizeof(field.item), is_signed_type(type)); \ | 69 | sizeof(field.item), \ |
70 | is_signed_type(type), FILTER_OTHER); \ | ||
160 | if (ret) \ | 71 | if (ret) \ |
161 | return ret; | 72 | return ret; |
162 | 73 | ||
163 | #undef TRACE_FIELD_SPECIAL | 74 | #undef __field_desc |
164 | #define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ | 75 | #define __field_desc(type, container, item) \ |
165 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | 76 | ret = trace_define_field(event_call, #type, #item, \ |
77 | offsetof(typeof(field), \ | ||
78 | container.item), \ | ||
79 | sizeof(field.container.item), \ | ||
80 | is_signed_type(type), FILTER_OTHER); \ | ||
81 | if (ret) \ | ||
82 | return ret; | ||
83 | |||
84 | #undef __array | ||
85 | #define __array(type, item, len) \ | ||
86 | do { \ | ||
87 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ | ||
88 | mutex_lock(&event_storage_mutex); \ | ||
89 | snprintf(event_storage, sizeof(event_storage), \ | ||
90 | "%s[%d]", #type, len); \ | ||
91 | ret = trace_define_field(event_call, event_storage, #item, \ | ||
166 | offsetof(typeof(field), item), \ | 92 | offsetof(typeof(field), item), \ |
167 | sizeof(field.item), 0); \ | 93 | sizeof(field.item), \ |
94 | is_signed_type(type), FILTER_OTHER); \ | ||
95 | mutex_unlock(&event_storage_mutex); \ | ||
96 | if (ret) \ | ||
97 | return ret; \ | ||
98 | } while (0); | ||
99 | |||
100 | #undef __array_desc | ||
101 | #define __array_desc(type, container, item, len) \ | ||
102 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ | ||
103 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | ||
104 | offsetof(typeof(field), \ | ||
105 | container.item), \ | ||
106 | sizeof(field.container.item), \ | ||
107 | is_signed_type(type), FILTER_OTHER); \ | ||
168 | if (ret) \ | 108 | if (ret) \ |
169 | return ret; | 109 | return ret; |
170 | 110 | ||
171 | #undef TRACE_FIELD_SIGN | 111 | #undef __dynamic_array |
172 | #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ | 112 | #define __dynamic_array(type, item) \ |
173 | ret = trace_define_field(event_call, #type, #item, \ | 113 | ret = trace_define_field(event_call, #type, #item, \ |
174 | offsetof(typeof(field), item), \ | 114 | offsetof(typeof(field), item), \ |
175 | sizeof(field.item), is_signed); \ | 115 | 0, is_signed_type(type), FILTER_OTHER);\ |
176 | if (ret) \ | 116 | if (ret) \ |
177 | return ret; | 117 | return ret; |
178 | 118 | ||
179 | #undef TRACE_FIELD_ZERO_CHAR | 119 | #undef FTRACE_ENTRY |
180 | #define TRACE_FIELD_ZERO_CHAR(item) | 120 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ |
181 | |||
182 | #undef TRACE_EVENT_FORMAT | ||
183 | #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ | ||
184 | int \ | 121 | int \ |
185 | ftrace_define_fields_##call(void) \ | 122 | ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ |
186 | { \ | 123 | { \ |
187 | struct ftrace_event_call *event_call = &event_##call; \ | 124 | struct struct_name field; \ |
188 | struct args field; \ | ||
189 | int ret; \ | 125 | int ret; \ |
190 | \ | 126 | \ |
191 | __common_field(unsigned char, type, 0); \ | ||
192 | __common_field(unsigned char, flags, 0); \ | ||
193 | __common_field(unsigned char, preempt_count, 0); \ | ||
194 | __common_field(int, pid, 1); \ | ||
195 | __common_field(int, tgid, 1); \ | ||
196 | \ | ||
197 | tstruct; \ | 127 | tstruct; \ |
198 | \ | 128 | \ |
199 | return ret; \ | 129 | return ret; \ |
200 | } | 130 | } |
201 | 131 | ||
202 | #undef TRACE_EVENT_FORMAT_NOFILTER | 132 | #include "trace_entries.h" |
203 | #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ | 133 | |
204 | tpfmt) | 134 | #undef __entry |
135 | #define __entry REC | ||
136 | |||
137 | #undef __field | ||
138 | #define __field(type, item) | ||
139 | |||
140 | #undef __field_desc | ||
141 | #define __field_desc(type, container, item) | ||
142 | |||
143 | #undef __array | ||
144 | #define __array(type, item, len) | ||
145 | |||
146 | #undef __array_desc | ||
147 | #define __array_desc(type, container, item, len) | ||
148 | |||
149 | #undef __dynamic_array | ||
150 | #define __dynamic_array(type, item) | ||
151 | |||
152 | #undef F_printk | ||
153 | #define F_printk(fmt, args...) #fmt ", " __stringify(args) | ||
154 | |||
155 | #undef FTRACE_ENTRY | ||
156 | #define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ | ||
157 | \ | ||
158 | struct ftrace_event_class event_class_ftrace_##call = { \ | ||
159 | .system = __stringify(TRACE_SYSTEM), \ | ||
160 | .define_fields = ftrace_define_fields_##call, \ | ||
161 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ | ||
162 | }; \ | ||
163 | \ | ||
164 | struct ftrace_event_call __used \ | ||
165 | __attribute__((__aligned__(4))) \ | ||
166 | __attribute__((section("_ftrace_events"))) event_##call = { \ | ||
167 | .name = #call, \ | ||
168 | .event.type = etype, \ | ||
169 | .class = &event_class_ftrace_##call, \ | ||
170 | .print_fmt = print, \ | ||
171 | }; \ | ||
205 | 172 | ||
206 | #include "trace_event_types.h" | 173 | #include "trace_entries.h" |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 90f134764837..16aee4d44e8f 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) | |||
54 | struct trace_array_cpu *data; | 54 | struct trace_array_cpu *data; |
55 | unsigned long flags; | 55 | unsigned long flags; |
56 | long disabled; | 56 | long disabled; |
57 | int cpu, resched; | 57 | int cpu; |
58 | int pc; | 58 | int pc; |
59 | 59 | ||
60 | if (unlikely(!ftrace_function_enabled)) | 60 | if (unlikely(!ftrace_function_enabled)) |
61 | return; | 61 | return; |
62 | 62 | ||
63 | pc = preempt_count(); | 63 | pc = preempt_count(); |
64 | resched = ftrace_preempt_disable(); | 64 | preempt_disable_notrace(); |
65 | local_save_flags(flags); | 65 | local_save_flags(flags); |
66 | cpu = raw_smp_processor_id(); | 66 | cpu = raw_smp_processor_id(); |
67 | data = tr->data[cpu]; | 67 | data = tr->data[cpu]; |
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) | |||
71 | trace_function(tr, ip, parent_ip, flags, pc); | 71 | trace_function(tr, ip, parent_ip, flags, pc); |
72 | 72 | ||
73 | atomic_dec(&data->disabled); | 73 | atomic_dec(&data->disabled); |
74 | ftrace_preempt_enable(resched); | 74 | preempt_enable_notrace(); |
75 | } | 75 | } |
76 | 76 | ||
77 | static void | 77 | static void |
@@ -288,11 +288,9 @@ static int | |||
288 | ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, | 288 | ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, |
289 | struct ftrace_probe_ops *ops, void *data) | 289 | struct ftrace_probe_ops *ops, void *data) |
290 | { | 290 | { |
291 | char str[KSYM_SYMBOL_LEN]; | ||
292 | long count = (long)data; | 291 | long count = (long)data; |
293 | 292 | ||
294 | kallsyms_lookup(ip, NULL, NULL, NULL, str); | 293 | seq_printf(m, "%ps:", (void *)ip); |
295 | seq_printf(m, "%s:", str); | ||
296 | 294 | ||
297 | if (ops == &traceon_probe_ops) | 295 | if (ops == &traceon_probe_ops) |
298 | seq_printf(m, "traceon"); | 296 | seq_printf(m, "traceon"); |
@@ -302,8 +300,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, | |||
302 | if (count == -1) | 300 | if (count == -1) |
303 | seq_printf(m, ":unlimited\n"); | 301 | seq_printf(m, ":unlimited\n"); |
304 | else | 302 | else |
305 | seq_printf(m, ":count=%ld", count); | 303 | seq_printf(m, ":count=%ld\n", count); |
306 | seq_putc(m, '\n'); | ||
307 | 304 | ||
308 | return 0; | 305 | return 0; |
309 | } | 306 | } |
@@ -364,7 +361,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) | |||
364 | out_reg: | 361 | out_reg: |
365 | ret = register_ftrace_function_probe(glob, ops, count); | 362 | ret = register_ftrace_function_probe(glob, ops, count); |
366 | 363 | ||
367 | return ret; | 364 | return ret < 0 ? ret : 0; |
368 | } | 365 | } |
369 | 366 | ||
370 | static struct ftrace_func_command ftrace_traceon_cmd = { | 367 | static struct ftrace_func_command ftrace_traceon_cmd = { |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d2249abafb53..76b05980225c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -9,14 +9,31 @@ | |||
9 | #include <linux/debugfs.h> | 9 | #include <linux/debugfs.h> |
10 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | #include <linux/slab.h> | ||
12 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
13 | 14 | ||
14 | #include "trace.h" | 15 | #include "trace.h" |
15 | #include "trace_output.h" | 16 | #include "trace_output.h" |
16 | 17 | ||
17 | struct fgraph_data { | 18 | /* When set, irq functions will be ignored */ |
19 | static int ftrace_graph_skip_irqs; | ||
20 | |||
21 | struct fgraph_cpu_data { | ||
18 | pid_t last_pid; | 22 | pid_t last_pid; |
19 | int depth; | 23 | int depth; |
24 | int depth_irq; | ||
25 | int ignore; | ||
26 | unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; | ||
27 | }; | ||
28 | |||
29 | struct fgraph_data { | ||
30 | struct fgraph_cpu_data __percpu *cpu_data; | ||
31 | |||
32 | /* Place to preserve last processed entry. */ | ||
33 | struct ftrace_graph_ent_entry ent; | ||
34 | struct ftrace_graph_ret_entry ret; | ||
35 | int failed; | ||
36 | int cpu; | ||
20 | }; | 37 | }; |
21 | 38 | ||
22 | #define TRACE_GRAPH_INDENT 2 | 39 | #define TRACE_GRAPH_INDENT 2 |
@@ -27,7 +44,8 @@ struct fgraph_data { | |||
27 | #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 | 44 | #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 |
28 | #define TRACE_GRAPH_PRINT_PROC 0x8 | 45 | #define TRACE_GRAPH_PRINT_PROC 0x8 |
29 | #define TRACE_GRAPH_PRINT_DURATION 0x10 | 46 | #define TRACE_GRAPH_PRINT_DURATION 0x10 |
30 | #define TRACE_GRAPH_PRINT_ABS_TIME 0X20 | 47 | #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 |
48 | #define TRACE_GRAPH_PRINT_IRQS 0x40 | ||
31 | 49 | ||
32 | static struct tracer_opt trace_opts[] = { | 50 | static struct tracer_opt trace_opts[] = { |
33 | /* Display overruns? (for self-debug purpose) */ | 51 | /* Display overruns? (for self-debug purpose) */ |
@@ -42,17 +60,19 @@ static struct tracer_opt trace_opts[] = { | |||
42 | { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, | 60 | { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, |
43 | /* Display absolute time of an entry */ | 61 | /* Display absolute time of an entry */ |
44 | { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, | 62 | { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, |
63 | /* Display interrupts */ | ||
64 | { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, | ||
45 | { } /* Empty entry */ | 65 | { } /* Empty entry */ |
46 | }; | 66 | }; |
47 | 67 | ||
48 | static struct tracer_flags tracer_flags = { | 68 | static struct tracer_flags tracer_flags = { |
49 | /* Don't display overruns and proc by default */ | 69 | /* Don't display overruns and proc by default */ |
50 | .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | | 70 | .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | |
51 | TRACE_GRAPH_PRINT_DURATION, | 71 | TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS, |
52 | .opts = trace_opts | 72 | .opts = trace_opts |
53 | }; | 73 | }; |
54 | 74 | ||
55 | /* pid on the last trace processed */ | 75 | static struct trace_array *graph_array; |
56 | 76 | ||
57 | 77 | ||
58 | /* Add a function return address to the trace stack on thread info.*/ | 78 | /* Add a function return address to the trace stack on thread info.*/ |
@@ -124,7 +144,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, | |||
124 | if (unlikely(current->ret_stack[index].fp != frame_pointer)) { | 144 | if (unlikely(current->ret_stack[index].fp != frame_pointer)) { |
125 | ftrace_graph_stop(); | 145 | ftrace_graph_stop(); |
126 | WARN(1, "Bad frame pointer: expected %lx, received %lx\n" | 146 | WARN(1, "Bad frame pointer: expected %lx, received %lx\n" |
127 | " from func %pF return to %lx\n", | 147 | " from func %ps return to %lx\n", |
128 | current->ret_stack[index].fp, | 148 | current->ret_stack[index].fp, |
129 | frame_pointer, | 149 | frame_pointer, |
130 | (void *)current->ret_stack[index].func, | 150 | (void *)current->ret_stack[index].func, |
@@ -166,10 +186,183 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) | |||
166 | return ret; | 186 | return ret; |
167 | } | 187 | } |
168 | 188 | ||
189 | int __trace_graph_entry(struct trace_array *tr, | ||
190 | struct ftrace_graph_ent *trace, | ||
191 | unsigned long flags, | ||
192 | int pc) | ||
193 | { | ||
194 | struct ftrace_event_call *call = &event_funcgraph_entry; | ||
195 | struct ring_buffer_event *event; | ||
196 | struct ring_buffer *buffer = tr->buffer; | ||
197 | struct ftrace_graph_ent_entry *entry; | ||
198 | |||
199 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) | ||
200 | return 0; | ||
201 | |||
202 | event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, | ||
203 | sizeof(*entry), flags, pc); | ||
204 | if (!event) | ||
205 | return 0; | ||
206 | entry = ring_buffer_event_data(event); | ||
207 | entry->graph_ent = *trace; | ||
208 | if (!filter_current_check_discard(buffer, call, entry, event)) | ||
209 | ring_buffer_unlock_commit(buffer, event); | ||
210 | |||
211 | return 1; | ||
212 | } | ||
213 | |||
214 | static inline int ftrace_graph_ignore_irqs(void) | ||
215 | { | ||
216 | if (!ftrace_graph_skip_irqs) | ||
217 | return 0; | ||
218 | |||
219 | return in_irq(); | ||
220 | } | ||
221 | |||
222 | int trace_graph_entry(struct ftrace_graph_ent *trace) | ||
223 | { | ||
224 | struct trace_array *tr = graph_array; | ||
225 | struct trace_array_cpu *data; | ||
226 | unsigned long flags; | ||
227 | long disabled; | ||
228 | int ret; | ||
229 | int cpu; | ||
230 | int pc; | ||
231 | |||
232 | if (!ftrace_trace_task(current)) | ||
233 | return 0; | ||
234 | |||
235 | /* trace it when it is-nested-in or is a function enabled. */ | ||
236 | if (!(trace->depth || ftrace_graph_addr(trace->func)) || | ||
237 | ftrace_graph_ignore_irqs()) | ||
238 | return 0; | ||
239 | |||
240 | local_irq_save(flags); | ||
241 | cpu = raw_smp_processor_id(); | ||
242 | data = tr->data[cpu]; | ||
243 | disabled = atomic_inc_return(&data->disabled); | ||
244 | if (likely(disabled == 1)) { | ||
245 | pc = preempt_count(); | ||
246 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
247 | } else { | ||
248 | ret = 0; | ||
249 | } | ||
250 | |||
251 | atomic_dec(&data->disabled); | ||
252 | local_irq_restore(flags); | ||
253 | |||
254 | return ret; | ||
255 | } | ||
256 | |||
257 | int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) | ||
258 | { | ||
259 | if (tracing_thresh) | ||
260 | return 1; | ||
261 | else | ||
262 | return trace_graph_entry(trace); | ||
263 | } | ||
264 | |||
265 | static void | ||
266 | __trace_graph_function(struct trace_array *tr, | ||
267 | unsigned long ip, unsigned long flags, int pc) | ||
268 | { | ||
269 | u64 time = trace_clock_local(); | ||
270 | struct ftrace_graph_ent ent = { | ||
271 | .func = ip, | ||
272 | .depth = 0, | ||
273 | }; | ||
274 | struct ftrace_graph_ret ret = { | ||
275 | .func = ip, | ||
276 | .depth = 0, | ||
277 | .calltime = time, | ||
278 | .rettime = time, | ||
279 | }; | ||
280 | |||
281 | __trace_graph_entry(tr, &ent, flags, pc); | ||
282 | __trace_graph_return(tr, &ret, flags, pc); | ||
283 | } | ||
284 | |||
285 | void | ||
286 | trace_graph_function(struct trace_array *tr, | ||
287 | unsigned long ip, unsigned long parent_ip, | ||
288 | unsigned long flags, int pc) | ||
289 | { | ||
290 | __trace_graph_function(tr, ip, flags, pc); | ||
291 | } | ||
292 | |||
293 | void __trace_graph_return(struct trace_array *tr, | ||
294 | struct ftrace_graph_ret *trace, | ||
295 | unsigned long flags, | ||
296 | int pc) | ||
297 | { | ||
298 | struct ftrace_event_call *call = &event_funcgraph_exit; | ||
299 | struct ring_buffer_event *event; | ||
300 | struct ring_buffer *buffer = tr->buffer; | ||
301 | struct ftrace_graph_ret_entry *entry; | ||
302 | |||
303 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) | ||
304 | return; | ||
305 | |||
306 | event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, | ||
307 | sizeof(*entry), flags, pc); | ||
308 | if (!event) | ||
309 | return; | ||
310 | entry = ring_buffer_event_data(event); | ||
311 | entry->ret = *trace; | ||
312 | if (!filter_current_check_discard(buffer, call, entry, event)) | ||
313 | ring_buffer_unlock_commit(buffer, event); | ||
314 | } | ||
315 | |||
316 | void trace_graph_return(struct ftrace_graph_ret *trace) | ||
317 | { | ||
318 | struct trace_array *tr = graph_array; | ||
319 | struct trace_array_cpu *data; | ||
320 | unsigned long flags; | ||
321 | long disabled; | ||
322 | int cpu; | ||
323 | int pc; | ||
324 | |||
325 | local_irq_save(flags); | ||
326 | cpu = raw_smp_processor_id(); | ||
327 | data = tr->data[cpu]; | ||
328 | disabled = atomic_inc_return(&data->disabled); | ||
329 | if (likely(disabled == 1)) { | ||
330 | pc = preempt_count(); | ||
331 | __trace_graph_return(tr, trace, flags, pc); | ||
332 | } | ||
333 | atomic_dec(&data->disabled); | ||
334 | local_irq_restore(flags); | ||
335 | } | ||
336 | |||
337 | void set_graph_array(struct trace_array *tr) | ||
338 | { | ||
339 | graph_array = tr; | ||
340 | |||
341 | /* Make graph_array visible before we start tracing */ | ||
342 | |||
343 | smp_mb(); | ||
344 | } | ||
345 | |||
346 | void trace_graph_thresh_return(struct ftrace_graph_ret *trace) | ||
347 | { | ||
348 | if (tracing_thresh && | ||
349 | (trace->rettime - trace->calltime < tracing_thresh)) | ||
350 | return; | ||
351 | else | ||
352 | trace_graph_return(trace); | ||
353 | } | ||
354 | |||
169 | static int graph_trace_init(struct trace_array *tr) | 355 | static int graph_trace_init(struct trace_array *tr) |
170 | { | 356 | { |
171 | int ret = register_ftrace_graph(&trace_graph_return, | 357 | int ret; |
172 | &trace_graph_entry); | 358 | |
359 | set_graph_array(tr); | ||
360 | if (tracing_thresh) | ||
361 | ret = register_ftrace_graph(&trace_graph_thresh_return, | ||
362 | &trace_graph_thresh_entry); | ||
363 | else | ||
364 | ret = register_ftrace_graph(&trace_graph_return, | ||
365 | &trace_graph_entry); | ||
173 | if (ret) | 366 | if (ret) |
174 | return ret; | 367 | return ret; |
175 | tracing_start_cmdline_record(); | 368 | tracing_start_cmdline_record(); |
@@ -183,43 +376,19 @@ static void graph_trace_reset(struct trace_array *tr) | |||
183 | unregister_ftrace_graph(); | 376 | unregister_ftrace_graph(); |
184 | } | 377 | } |
185 | 378 | ||
186 | static inline int log10_cpu(int nb) | 379 | static int max_bytes_for_cpu; |
187 | { | ||
188 | if (nb / 100) | ||
189 | return 3; | ||
190 | if (nb / 10) | ||
191 | return 2; | ||
192 | return 1; | ||
193 | } | ||
194 | 380 | ||
195 | static enum print_line_t | 381 | static enum print_line_t |
196 | print_graph_cpu(struct trace_seq *s, int cpu) | 382 | print_graph_cpu(struct trace_seq *s, int cpu) |
197 | { | 383 | { |
198 | int i; | ||
199 | int ret; | 384 | int ret; |
200 | int log10_this = log10_cpu(cpu); | ||
201 | int log10_all = log10_cpu(cpumask_weight(cpu_online_mask)); | ||
202 | |||
203 | 385 | ||
204 | /* | 386 | /* |
205 | * Start with a space character - to make it stand out | 387 | * Start with a space character - to make it stand out |
206 | * to the right a bit when trace output is pasted into | 388 | * to the right a bit when trace output is pasted into |
207 | * email: | 389 | * email: |
208 | */ | 390 | */ |
209 | ret = trace_seq_printf(s, " "); | 391 | ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu); |
210 | |||
211 | /* | ||
212 | * Tricky - we space the CPU field according to the max | ||
213 | * number of online CPUs. On a 2-cpu system it would take | ||
214 | * a maximum of 1 digit - on a 128 cpu system it would | ||
215 | * take up to 3 digits: | ||
216 | */ | ||
217 | for (i = 0; i < log10_all - log10_this; i++) { | ||
218 | ret = trace_seq_printf(s, " "); | ||
219 | if (!ret) | ||
220 | return TRACE_TYPE_PARTIAL_LINE; | ||
221 | } | ||
222 | ret = trace_seq_printf(s, "%d) ", cpu); | ||
223 | if (!ret) | 392 | if (!ret) |
224 | return TRACE_TYPE_PARTIAL_LINE; | 393 | return TRACE_TYPE_PARTIAL_LINE; |
225 | 394 | ||
@@ -270,6 +439,15 @@ print_graph_proc(struct trace_seq *s, pid_t pid) | |||
270 | } | 439 | } |
271 | 440 | ||
272 | 441 | ||
442 | static enum print_line_t | ||
443 | print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | ||
444 | { | ||
445 | if (!trace_seq_putc(s, ' ')) | ||
446 | return 0; | ||
447 | |||
448 | return trace_print_lat_fmt(s, entry); | ||
449 | } | ||
450 | |||
273 | /* If the pid changed since the last trace, output this event */ | 451 | /* If the pid changed since the last trace, output this event */ |
274 | static enum print_line_t | 452 | static enum print_line_t |
275 | verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | 453 | verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) |
@@ -281,7 +459,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
281 | if (!data) | 459 | if (!data) |
282 | return TRACE_TYPE_HANDLED; | 460 | return TRACE_TYPE_HANDLED; |
283 | 461 | ||
284 | last_pid = &(per_cpu_ptr(data, cpu)->last_pid); | 462 | last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); |
285 | 463 | ||
286 | if (*last_pid == pid) | 464 | if (*last_pid == pid) |
287 | return TRACE_TYPE_HANDLED; | 465 | return TRACE_TYPE_HANDLED; |
@@ -332,27 +510,59 @@ static struct ftrace_graph_ret_entry * | |||
332 | get_return_for_leaf(struct trace_iterator *iter, | 510 | get_return_for_leaf(struct trace_iterator *iter, |
333 | struct ftrace_graph_ent_entry *curr) | 511 | struct ftrace_graph_ent_entry *curr) |
334 | { | 512 | { |
335 | struct ring_buffer_iter *ring_iter; | 513 | struct fgraph_data *data = iter->private; |
514 | struct ring_buffer_iter *ring_iter = NULL; | ||
336 | struct ring_buffer_event *event; | 515 | struct ring_buffer_event *event; |
337 | struct ftrace_graph_ret_entry *next; | 516 | struct ftrace_graph_ret_entry *next; |
338 | 517 | ||
339 | ring_iter = iter->buffer_iter[iter->cpu]; | 518 | /* |
519 | * If the previous output failed to write to the seq buffer, | ||
520 | * then we just reuse the data from before. | ||
521 | */ | ||
522 | if (data && data->failed) { | ||
523 | curr = &data->ent; | ||
524 | next = &data->ret; | ||
525 | } else { | ||
526 | |||
527 | ring_iter = iter->buffer_iter[iter->cpu]; | ||
528 | |||
529 | /* First peek to compare current entry and the next one */ | ||
530 | if (ring_iter) | ||
531 | event = ring_buffer_iter_peek(ring_iter, NULL); | ||
532 | else { | ||
533 | /* | ||
534 | * We need to consume the current entry to see | ||
535 | * the next one. | ||
536 | */ | ||
537 | ring_buffer_consume(iter->tr->buffer, iter->cpu, | ||
538 | NULL, NULL); | ||
539 | event = ring_buffer_peek(iter->tr->buffer, iter->cpu, | ||
540 | NULL, NULL); | ||
541 | } | ||
340 | 542 | ||
341 | /* First peek to compare current entry and the next one */ | 543 | if (!event) |
342 | if (ring_iter) | 544 | return NULL; |
343 | event = ring_buffer_iter_peek(ring_iter, NULL); | 545 | |
344 | else { | 546 | next = ring_buffer_event_data(event); |
345 | /* We need to consume the current entry to see the next one */ | 547 | |
346 | ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); | 548 | if (data) { |
347 | event = ring_buffer_peek(iter->tr->buffer, iter->cpu, | 549 | /* |
348 | NULL); | 550 | * Save current and next entries for later reference |
551 | * if the output fails. | ||
552 | */ | ||
553 | data->ent = *curr; | ||
554 | /* | ||
555 | * If the next event is not a return type, then | ||
556 | * we only care about what type it is. Otherwise we can | ||
557 | * safely copy the entire event. | ||
558 | */ | ||
559 | if (next->ent.type == TRACE_GRAPH_RET) | ||
560 | data->ret = *next; | ||
561 | else | ||
562 | data->ret.ent.type = next->ent.type; | ||
563 | } | ||
349 | } | 564 | } |
350 | 565 | ||
351 | if (!event) | ||
352 | return NULL; | ||
353 | |||
354 | next = ring_buffer_event_data(event); | ||
355 | |||
356 | if (next->ent.type != TRACE_GRAPH_RET) | 566 | if (next->ent.type != TRACE_GRAPH_RET) |
357 | return NULL; | 567 | return NULL; |
358 | 568 | ||
@@ -369,17 +579,18 @@ get_return_for_leaf(struct trace_iterator *iter, | |||
369 | 579 | ||
370 | /* Signal a overhead of time execution to the output */ | 580 | /* Signal a overhead of time execution to the output */ |
371 | static int | 581 | static int |
372 | print_graph_overhead(unsigned long long duration, struct trace_seq *s) | 582 | print_graph_overhead(unsigned long long duration, struct trace_seq *s, |
583 | u32 flags) | ||
373 | { | 584 | { |
374 | /* If duration disappear, we don't need anything */ | 585 | /* If duration disappear, we don't need anything */ |
375 | if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)) | 586 | if (!(flags & TRACE_GRAPH_PRINT_DURATION)) |
376 | return 1; | 587 | return 1; |
377 | 588 | ||
378 | /* Non nested entry or return */ | 589 | /* Non nested entry or return */ |
379 | if (duration == -1) | 590 | if (duration == -1) |
380 | return trace_seq_printf(s, " "); | 591 | return trace_seq_printf(s, " "); |
381 | 592 | ||
382 | if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { | 593 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { |
383 | /* Duration exceeded 100 msecs */ | 594 | /* Duration exceeded 100 msecs */ |
384 | if (duration > 100000ULL) | 595 | if (duration > 100000ULL) |
385 | return trace_seq_printf(s, "! "); | 596 | return trace_seq_printf(s, "! "); |
@@ -405,7 +616,7 @@ static int print_graph_abs_time(u64 t, struct trace_seq *s) | |||
405 | 616 | ||
406 | static enum print_line_t | 617 | static enum print_line_t |
407 | print_graph_irq(struct trace_iterator *iter, unsigned long addr, | 618 | print_graph_irq(struct trace_iterator *iter, unsigned long addr, |
408 | enum trace_type type, int cpu, pid_t pid) | 619 | enum trace_type type, int cpu, pid_t pid, u32 flags) |
409 | { | 620 | { |
410 | int ret; | 621 | int ret; |
411 | struct trace_seq *s = &iter->seq; | 622 | struct trace_seq *s = &iter->seq; |
@@ -415,20 +626,21 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
415 | return TRACE_TYPE_UNHANDLED; | 626 | return TRACE_TYPE_UNHANDLED; |
416 | 627 | ||
417 | /* Absolute time */ | 628 | /* Absolute time */ |
418 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { | 629 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { |
419 | ret = print_graph_abs_time(iter->ts, s); | 630 | ret = print_graph_abs_time(iter->ts, s); |
420 | if (!ret) | 631 | if (!ret) |
421 | return TRACE_TYPE_PARTIAL_LINE; | 632 | return TRACE_TYPE_PARTIAL_LINE; |
422 | } | 633 | } |
423 | 634 | ||
424 | /* Cpu */ | 635 | /* Cpu */ |
425 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { | 636 | if (flags & TRACE_GRAPH_PRINT_CPU) { |
426 | ret = print_graph_cpu(s, cpu); | 637 | ret = print_graph_cpu(s, cpu); |
427 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 638 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
428 | return TRACE_TYPE_PARTIAL_LINE; | 639 | return TRACE_TYPE_PARTIAL_LINE; |
429 | } | 640 | } |
641 | |||
430 | /* Proc */ | 642 | /* Proc */ |
431 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { | 643 | if (flags & TRACE_GRAPH_PRINT_PROC) { |
432 | ret = print_graph_proc(s, pid); | 644 | ret = print_graph_proc(s, pid); |
433 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 645 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
434 | return TRACE_TYPE_PARTIAL_LINE; | 646 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -438,7 +650,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
438 | } | 650 | } |
439 | 651 | ||
440 | /* No overhead */ | 652 | /* No overhead */ |
441 | ret = print_graph_overhead(-1, s); | 653 | ret = print_graph_overhead(-1, s, flags); |
442 | if (!ret) | 654 | if (!ret) |
443 | return TRACE_TYPE_PARTIAL_LINE; | 655 | return TRACE_TYPE_PARTIAL_LINE; |
444 | 656 | ||
@@ -451,7 +663,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
451 | return TRACE_TYPE_PARTIAL_LINE; | 663 | return TRACE_TYPE_PARTIAL_LINE; |
452 | 664 | ||
453 | /* Don't close the duration column if haven't one */ | 665 | /* Don't close the duration column if haven't one */ |
454 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) | 666 | if (flags & TRACE_GRAPH_PRINT_DURATION) |
455 | trace_seq_printf(s, " |"); | 667 | trace_seq_printf(s, " |"); |
456 | ret = trace_seq_printf(s, "\n"); | 668 | ret = trace_seq_printf(s, "\n"); |
457 | 669 | ||
@@ -481,7 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) | |||
481 | 693 | ||
482 | /* Print nsecs (we don't want to exceed 7 numbers) */ | 694 | /* Print nsecs (we don't want to exceed 7 numbers) */ |
483 | if (len < 7) { | 695 | if (len < 7) { |
484 | snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); | 696 | size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len); |
697 | |||
698 | snprintf(nsecs_str, slen, "%03lu", nsecs_rem); | ||
485 | ret = trace_seq_printf(s, ".%s", nsecs_str); | 699 | ret = trace_seq_printf(s, ".%s", nsecs_str); |
486 | if (!ret) | 700 | if (!ret) |
487 | return TRACE_TYPE_PARTIAL_LINE; | 701 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -521,7 +735,8 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s) | |||
521 | static enum print_line_t | 735 | static enum print_line_t |
522 | print_graph_entry_leaf(struct trace_iterator *iter, | 736 | print_graph_entry_leaf(struct trace_iterator *iter, |
523 | struct ftrace_graph_ent_entry *entry, | 737 | struct ftrace_graph_ent_entry *entry, |
524 | struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) | 738 | struct ftrace_graph_ret_entry *ret_entry, |
739 | struct trace_seq *s, u32 flags) | ||
525 | { | 740 | { |
526 | struct fgraph_data *data = iter->private; | 741 | struct fgraph_data *data = iter->private; |
527 | struct ftrace_graph_ret *graph_ret; | 742 | struct ftrace_graph_ret *graph_ret; |
@@ -535,24 +750,30 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
535 | duration = graph_ret->rettime - graph_ret->calltime; | 750 | duration = graph_ret->rettime - graph_ret->calltime; |
536 | 751 | ||
537 | if (data) { | 752 | if (data) { |
753 | struct fgraph_cpu_data *cpu_data; | ||
538 | int cpu = iter->cpu; | 754 | int cpu = iter->cpu; |
539 | int *depth = &(per_cpu_ptr(data, cpu)->depth); | 755 | |
756 | cpu_data = per_cpu_ptr(data->cpu_data, cpu); | ||
540 | 757 | ||
541 | /* | 758 | /* |
542 | * Comments display at + 1 to depth. Since | 759 | * Comments display at + 1 to depth. Since |
543 | * this is a leaf function, keep the comments | 760 | * this is a leaf function, keep the comments |
544 | * equal to this depth. | 761 | * equal to this depth. |
545 | */ | 762 | */ |
546 | *depth = call->depth - 1; | 763 | cpu_data->depth = call->depth - 1; |
764 | |||
765 | /* No need to keep this function around for this depth */ | ||
766 | if (call->depth < FTRACE_RETFUNC_DEPTH) | ||
767 | cpu_data->enter_funcs[call->depth] = 0; | ||
547 | } | 768 | } |
548 | 769 | ||
549 | /* Overhead */ | 770 | /* Overhead */ |
550 | ret = print_graph_overhead(duration, s); | 771 | ret = print_graph_overhead(duration, s, flags); |
551 | if (!ret) | 772 | if (!ret) |
552 | return TRACE_TYPE_PARTIAL_LINE; | 773 | return TRACE_TYPE_PARTIAL_LINE; |
553 | 774 | ||
554 | /* Duration */ | 775 | /* Duration */ |
555 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { | 776 | if (flags & TRACE_GRAPH_PRINT_DURATION) { |
556 | ret = print_graph_duration(duration, s); | 777 | ret = print_graph_duration(duration, s); |
557 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 778 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
558 | return TRACE_TYPE_PARTIAL_LINE; | 779 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -565,11 +786,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
565 | return TRACE_TYPE_PARTIAL_LINE; | 786 | return TRACE_TYPE_PARTIAL_LINE; |
566 | } | 787 | } |
567 | 788 | ||
568 | ret = seq_print_ip_sym(s, call->func, 0); | 789 | ret = trace_seq_printf(s, "%ps();\n", (void *)call->func); |
569 | if (!ret) | ||
570 | return TRACE_TYPE_PARTIAL_LINE; | ||
571 | |||
572 | ret = trace_seq_printf(s, "();\n"); | ||
573 | if (!ret) | 790 | if (!ret) |
574 | return TRACE_TYPE_PARTIAL_LINE; | 791 | return TRACE_TYPE_PARTIAL_LINE; |
575 | 792 | ||
@@ -579,7 +796,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
579 | static enum print_line_t | 796 | static enum print_line_t |
580 | print_graph_entry_nested(struct trace_iterator *iter, | 797 | print_graph_entry_nested(struct trace_iterator *iter, |
581 | struct ftrace_graph_ent_entry *entry, | 798 | struct ftrace_graph_ent_entry *entry, |
582 | struct trace_seq *s, int cpu) | 799 | struct trace_seq *s, int cpu, u32 flags) |
583 | { | 800 | { |
584 | struct ftrace_graph_ent *call = &entry->graph_ent; | 801 | struct ftrace_graph_ent *call = &entry->graph_ent; |
585 | struct fgraph_data *data = iter->private; | 802 | struct fgraph_data *data = iter->private; |
@@ -587,19 +804,24 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
587 | int i; | 804 | int i; |
588 | 805 | ||
589 | if (data) { | 806 | if (data) { |
807 | struct fgraph_cpu_data *cpu_data; | ||
590 | int cpu = iter->cpu; | 808 | int cpu = iter->cpu; |
591 | int *depth = &(per_cpu_ptr(data, cpu)->depth); | ||
592 | 809 | ||
593 | *depth = call->depth; | 810 | cpu_data = per_cpu_ptr(data->cpu_data, cpu); |
811 | cpu_data->depth = call->depth; | ||
812 | |||
813 | /* Save this function pointer to see if the exit matches */ | ||
814 | if (call->depth < FTRACE_RETFUNC_DEPTH) | ||
815 | cpu_data->enter_funcs[call->depth] = call->func; | ||
594 | } | 816 | } |
595 | 817 | ||
596 | /* No overhead */ | 818 | /* No overhead */ |
597 | ret = print_graph_overhead(-1, s); | 819 | ret = print_graph_overhead(-1, s, flags); |
598 | if (!ret) | 820 | if (!ret) |
599 | return TRACE_TYPE_PARTIAL_LINE; | 821 | return TRACE_TYPE_PARTIAL_LINE; |
600 | 822 | ||
601 | /* No time */ | 823 | /* No time */ |
602 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { | 824 | if (flags & TRACE_GRAPH_PRINT_DURATION) { |
603 | ret = trace_seq_printf(s, " | "); | 825 | ret = trace_seq_printf(s, " | "); |
604 | if (!ret) | 826 | if (!ret) |
605 | return TRACE_TYPE_PARTIAL_LINE; | 827 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -612,11 +834,7 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
612 | return TRACE_TYPE_PARTIAL_LINE; | 834 | return TRACE_TYPE_PARTIAL_LINE; |
613 | } | 835 | } |
614 | 836 | ||
615 | ret = seq_print_ip_sym(s, call->func, 0); | 837 | ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func); |
616 | if (!ret) | ||
617 | return TRACE_TYPE_PARTIAL_LINE; | ||
618 | |||
619 | ret = trace_seq_printf(s, "() {\n"); | ||
620 | if (!ret) | 838 | if (!ret) |
621 | return TRACE_TYPE_PARTIAL_LINE; | 839 | return TRACE_TYPE_PARTIAL_LINE; |
622 | 840 | ||
@@ -629,7 +847,7 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
629 | 847 | ||
630 | static enum print_line_t | 848 | static enum print_line_t |
631 | print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | 849 | print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, |
632 | int type, unsigned long addr) | 850 | int type, unsigned long addr, u32 flags) |
633 | { | 851 | { |
634 | struct fgraph_data *data = iter->private; | 852 | struct fgraph_data *data = iter->private; |
635 | struct trace_entry *ent = iter->ent; | 853 | struct trace_entry *ent = iter->ent; |
@@ -642,27 +860,27 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
642 | 860 | ||
643 | if (type) { | 861 | if (type) { |
644 | /* Interrupt */ | 862 | /* Interrupt */ |
645 | ret = print_graph_irq(iter, addr, type, cpu, ent->pid); | 863 | ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags); |
646 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 864 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
647 | return TRACE_TYPE_PARTIAL_LINE; | 865 | return TRACE_TYPE_PARTIAL_LINE; |
648 | } | 866 | } |
649 | 867 | ||
650 | /* Absolute time */ | 868 | /* Absolute time */ |
651 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { | 869 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { |
652 | ret = print_graph_abs_time(iter->ts, s); | 870 | ret = print_graph_abs_time(iter->ts, s); |
653 | if (!ret) | 871 | if (!ret) |
654 | return TRACE_TYPE_PARTIAL_LINE; | 872 | return TRACE_TYPE_PARTIAL_LINE; |
655 | } | 873 | } |
656 | 874 | ||
657 | /* Cpu */ | 875 | /* Cpu */ |
658 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { | 876 | if (flags & TRACE_GRAPH_PRINT_CPU) { |
659 | ret = print_graph_cpu(s, cpu); | 877 | ret = print_graph_cpu(s, cpu); |
660 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 878 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
661 | return TRACE_TYPE_PARTIAL_LINE; | 879 | return TRACE_TYPE_PARTIAL_LINE; |
662 | } | 880 | } |
663 | 881 | ||
664 | /* Proc */ | 882 | /* Proc */ |
665 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { | 883 | if (flags & TRACE_GRAPH_PRINT_PROC) { |
666 | ret = print_graph_proc(s, ent->pid); | 884 | ret = print_graph_proc(s, ent->pid); |
667 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 885 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
668 | return TRACE_TYPE_PARTIAL_LINE; | 886 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -672,61 +890,201 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
672 | return TRACE_TYPE_PARTIAL_LINE; | 890 | return TRACE_TYPE_PARTIAL_LINE; |
673 | } | 891 | } |
674 | 892 | ||
893 | /* Latency format */ | ||
894 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
895 | ret = print_graph_lat_fmt(s, ent); | ||
896 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
897 | return TRACE_TYPE_PARTIAL_LINE; | ||
898 | } | ||
899 | |||
675 | return 0; | 900 | return 0; |
676 | } | 901 | } |
677 | 902 | ||
903 | /* | ||
904 | * Entry check for irq code | ||
905 | * | ||
906 | * returns 1 if | ||
907 | * - we are inside irq code | ||
908 | * - we just extered irq code | ||
909 | * | ||
910 | * retunns 0 if | ||
911 | * - funcgraph-interrupts option is set | ||
912 | * - we are not inside irq code | ||
913 | */ | ||
914 | static int | ||
915 | check_irq_entry(struct trace_iterator *iter, u32 flags, | ||
916 | unsigned long addr, int depth) | ||
917 | { | ||
918 | int cpu = iter->cpu; | ||
919 | int *depth_irq; | ||
920 | struct fgraph_data *data = iter->private; | ||
921 | |||
922 | /* | ||
923 | * If we are either displaying irqs, or we got called as | ||
924 | * a graph event and private data does not exist, | ||
925 | * then we bypass the irq check. | ||
926 | */ | ||
927 | if ((flags & TRACE_GRAPH_PRINT_IRQS) || | ||
928 | (!data)) | ||
929 | return 0; | ||
930 | |||
931 | depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
932 | |||
933 | /* | ||
934 | * We are inside the irq code | ||
935 | */ | ||
936 | if (*depth_irq >= 0) | ||
937 | return 1; | ||
938 | |||
939 | if ((addr < (unsigned long)__irqentry_text_start) || | ||
940 | (addr >= (unsigned long)__irqentry_text_end)) | ||
941 | return 0; | ||
942 | |||
943 | /* | ||
944 | * We are entering irq code. | ||
945 | */ | ||
946 | *depth_irq = depth; | ||
947 | return 1; | ||
948 | } | ||
949 | |||
950 | /* | ||
951 | * Return check for irq code | ||
952 | * | ||
953 | * returns 1 if | ||
954 | * - we are inside irq code | ||
955 | * - we just left irq code | ||
956 | * | ||
957 | * returns 0 if | ||
958 | * - funcgraph-interrupts option is set | ||
959 | * - we are not inside irq code | ||
960 | */ | ||
961 | static int | ||
962 | check_irq_return(struct trace_iterator *iter, u32 flags, int depth) | ||
963 | { | ||
964 | int cpu = iter->cpu; | ||
965 | int *depth_irq; | ||
966 | struct fgraph_data *data = iter->private; | ||
967 | |||
968 | /* | ||
969 | * If we are either displaying irqs, or we got called as | ||
970 | * a graph event and private data does not exist, | ||
971 | * then we bypass the irq check. | ||
972 | */ | ||
973 | if ((flags & TRACE_GRAPH_PRINT_IRQS) || | ||
974 | (!data)) | ||
975 | return 0; | ||
976 | |||
977 | depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
978 | |||
979 | /* | ||
980 | * We are not inside the irq code. | ||
981 | */ | ||
982 | if (*depth_irq == -1) | ||
983 | return 0; | ||
984 | |||
985 | /* | ||
986 | * We are inside the irq code, and this is returning entry. | ||
987 | * Let's not trace it and clear the entry depth, since | ||
988 | * we are out of irq code. | ||
989 | * | ||
990 | * This condition ensures that we 'leave the irq code' once | ||
991 | * we are out of the entry depth. Thus protecting us from | ||
992 | * the RETURN entry loss. | ||
993 | */ | ||
994 | if (*depth_irq >= depth) { | ||
995 | *depth_irq = -1; | ||
996 | return 1; | ||
997 | } | ||
998 | |||
999 | /* | ||
1000 | * We are inside the irq code, and this is not the entry. | ||
1001 | */ | ||
1002 | return 1; | ||
1003 | } | ||
1004 | |||
678 | static enum print_line_t | 1005 | static enum print_line_t |
679 | print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, | 1006 | print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, |
680 | struct trace_iterator *iter) | 1007 | struct trace_iterator *iter, u32 flags) |
681 | { | 1008 | { |
682 | int cpu = iter->cpu; | 1009 | struct fgraph_data *data = iter->private; |
683 | struct ftrace_graph_ent *call = &field->graph_ent; | 1010 | struct ftrace_graph_ent *call = &field->graph_ent; |
684 | struct ftrace_graph_ret_entry *leaf_ret; | 1011 | struct ftrace_graph_ret_entry *leaf_ret; |
1012 | static enum print_line_t ret; | ||
1013 | int cpu = iter->cpu; | ||
1014 | |||
1015 | if (check_irq_entry(iter, flags, call->func, call->depth)) | ||
1016 | return TRACE_TYPE_HANDLED; | ||
685 | 1017 | ||
686 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) | 1018 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) |
687 | return TRACE_TYPE_PARTIAL_LINE; | 1019 | return TRACE_TYPE_PARTIAL_LINE; |
688 | 1020 | ||
689 | leaf_ret = get_return_for_leaf(iter, field); | 1021 | leaf_ret = get_return_for_leaf(iter, field); |
690 | if (leaf_ret) | 1022 | if (leaf_ret) |
691 | return print_graph_entry_leaf(iter, field, leaf_ret, s); | 1023 | ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags); |
692 | else | 1024 | else |
693 | return print_graph_entry_nested(iter, field, s, cpu); | 1025 | ret = print_graph_entry_nested(iter, field, s, cpu, flags); |
1026 | |||
1027 | if (data) { | ||
1028 | /* | ||
1029 | * If we failed to write our output, then we need to make | ||
1030 | * note of it. Because we already consumed our entry. | ||
1031 | */ | ||
1032 | if (s->full) { | ||
1033 | data->failed = 1; | ||
1034 | data->cpu = cpu; | ||
1035 | } else | ||
1036 | data->failed = 0; | ||
1037 | } | ||
694 | 1038 | ||
1039 | return ret; | ||
695 | } | 1040 | } |
696 | 1041 | ||
697 | static enum print_line_t | 1042 | static enum print_line_t |
698 | print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | 1043 | print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, |
699 | struct trace_entry *ent, struct trace_iterator *iter) | 1044 | struct trace_entry *ent, struct trace_iterator *iter, |
1045 | u32 flags) | ||
700 | { | 1046 | { |
701 | unsigned long long duration = trace->rettime - trace->calltime; | 1047 | unsigned long long duration = trace->rettime - trace->calltime; |
702 | struct fgraph_data *data = iter->private; | 1048 | struct fgraph_data *data = iter->private; |
703 | pid_t pid = ent->pid; | 1049 | pid_t pid = ent->pid; |
704 | int cpu = iter->cpu; | 1050 | int cpu = iter->cpu; |
1051 | int func_match = 1; | ||
705 | int ret; | 1052 | int ret; |
706 | int i; | 1053 | int i; |
707 | 1054 | ||
1055 | if (check_irq_return(iter, flags, trace->depth)) | ||
1056 | return TRACE_TYPE_HANDLED; | ||
1057 | |||
708 | if (data) { | 1058 | if (data) { |
1059 | struct fgraph_cpu_data *cpu_data; | ||
709 | int cpu = iter->cpu; | 1060 | int cpu = iter->cpu; |
710 | int *depth = &(per_cpu_ptr(data, cpu)->depth); | 1061 | |
1062 | cpu_data = per_cpu_ptr(data->cpu_data, cpu); | ||
711 | 1063 | ||
712 | /* | 1064 | /* |
713 | * Comments display at + 1 to depth. This is the | 1065 | * Comments display at + 1 to depth. This is the |
714 | * return from a function, we now want the comments | 1066 | * return from a function, we now want the comments |
715 | * to display at the same level of the bracket. | 1067 | * to display at the same level of the bracket. |
716 | */ | 1068 | */ |
717 | *depth = trace->depth - 1; | 1069 | cpu_data->depth = trace->depth - 1; |
1070 | |||
1071 | if (trace->depth < FTRACE_RETFUNC_DEPTH) { | ||
1072 | if (cpu_data->enter_funcs[trace->depth] != trace->func) | ||
1073 | func_match = 0; | ||
1074 | cpu_data->enter_funcs[trace->depth] = 0; | ||
1075 | } | ||
718 | } | 1076 | } |
719 | 1077 | ||
720 | if (print_graph_prologue(iter, s, 0, 0)) | 1078 | if (print_graph_prologue(iter, s, 0, 0, flags)) |
721 | return TRACE_TYPE_PARTIAL_LINE; | 1079 | return TRACE_TYPE_PARTIAL_LINE; |
722 | 1080 | ||
723 | /* Overhead */ | 1081 | /* Overhead */ |
724 | ret = print_graph_overhead(duration, s); | 1082 | ret = print_graph_overhead(duration, s, flags); |
725 | if (!ret) | 1083 | if (!ret) |
726 | return TRACE_TYPE_PARTIAL_LINE; | 1084 | return TRACE_TYPE_PARTIAL_LINE; |
727 | 1085 | ||
728 | /* Duration */ | 1086 | /* Duration */ |
729 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { | 1087 | if (flags & TRACE_GRAPH_PRINT_DURATION) { |
730 | ret = print_graph_duration(duration, s); | 1088 | ret = print_graph_duration(duration, s); |
731 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 1089 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
732 | return TRACE_TYPE_PARTIAL_LINE; | 1090 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -739,19 +1097,32 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
739 | return TRACE_TYPE_PARTIAL_LINE; | 1097 | return TRACE_TYPE_PARTIAL_LINE; |
740 | } | 1098 | } |
741 | 1099 | ||
742 | ret = trace_seq_printf(s, "}\n"); | 1100 | /* |
743 | if (!ret) | 1101 | * If the return function does not have a matching entry, |
744 | return TRACE_TYPE_PARTIAL_LINE; | 1102 | * then the entry was lost. Instead of just printing |
1103 | * the '}' and letting the user guess what function this | ||
1104 | * belongs to, write out the function name. | ||
1105 | */ | ||
1106 | if (func_match) { | ||
1107 | ret = trace_seq_printf(s, "}\n"); | ||
1108 | if (!ret) | ||
1109 | return TRACE_TYPE_PARTIAL_LINE; | ||
1110 | } else { | ||
1111 | ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func); | ||
1112 | if (!ret) | ||
1113 | return TRACE_TYPE_PARTIAL_LINE; | ||
1114 | } | ||
745 | 1115 | ||
746 | /* Overrun */ | 1116 | /* Overrun */ |
747 | if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { | 1117 | if (flags & TRACE_GRAPH_PRINT_OVERRUN) { |
748 | ret = trace_seq_printf(s, " (Overruns: %lu)\n", | 1118 | ret = trace_seq_printf(s, " (Overruns: %lu)\n", |
749 | trace->overrun); | 1119 | trace->overrun); |
750 | if (!ret) | 1120 | if (!ret) |
751 | return TRACE_TYPE_PARTIAL_LINE; | 1121 | return TRACE_TYPE_PARTIAL_LINE; |
752 | } | 1122 | } |
753 | 1123 | ||
754 | ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid); | 1124 | ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, |
1125 | cpu, pid, flags); | ||
755 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 1126 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
756 | return TRACE_TYPE_PARTIAL_LINE; | 1127 | return TRACE_TYPE_PARTIAL_LINE; |
757 | 1128 | ||
@@ -759,8 +1130,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
759 | } | 1130 | } |
760 | 1131 | ||
761 | static enum print_line_t | 1132 | static enum print_line_t |
762 | print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | 1133 | print_graph_comment(struct trace_seq *s, struct trace_entry *ent, |
763 | struct trace_iterator *iter) | 1134 | struct trace_iterator *iter, u32 flags) |
764 | { | 1135 | { |
765 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); | 1136 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); |
766 | struct fgraph_data *data = iter->private; | 1137 | struct fgraph_data *data = iter->private; |
@@ -770,18 +1141,18 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
770 | int i; | 1141 | int i; |
771 | 1142 | ||
772 | if (data) | 1143 | if (data) |
773 | depth = per_cpu_ptr(data, iter->cpu)->depth; | 1144 | depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; |
774 | 1145 | ||
775 | if (print_graph_prologue(iter, s, 0, 0)) | 1146 | if (print_graph_prologue(iter, s, 0, 0, flags)) |
776 | return TRACE_TYPE_PARTIAL_LINE; | 1147 | return TRACE_TYPE_PARTIAL_LINE; |
777 | 1148 | ||
778 | /* No overhead */ | 1149 | /* No overhead */ |
779 | ret = print_graph_overhead(-1, s); | 1150 | ret = print_graph_overhead(-1, s, flags); |
780 | if (!ret) | 1151 | if (!ret) |
781 | return TRACE_TYPE_PARTIAL_LINE; | 1152 | return TRACE_TYPE_PARTIAL_LINE; |
782 | 1153 | ||
783 | /* No time */ | 1154 | /* No time */ |
784 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { | 1155 | if (flags & TRACE_GRAPH_PRINT_DURATION) { |
785 | ret = trace_seq_printf(s, " | "); | 1156 | ret = trace_seq_printf(s, " | "); |
786 | if (!ret) | 1157 | if (!ret) |
787 | return TRACE_TYPE_PARTIAL_LINE; | 1158 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -816,7 +1187,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
816 | if (!event) | 1187 | if (!event) |
817 | return TRACE_TYPE_UNHANDLED; | 1188 | return TRACE_TYPE_UNHANDLED; |
818 | 1189 | ||
819 | ret = event->trace(iter, sym_flags); | 1190 | ret = event->funcs->trace(iter, sym_flags, event); |
820 | if (ret != TRACE_TYPE_HANDLED) | 1191 | if (ret != TRACE_TYPE_HANDLED) |
821 | return ret; | 1192 | return ret; |
822 | } | 1193 | } |
@@ -836,90 +1207,253 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
836 | 1207 | ||
837 | 1208 | ||
838 | enum print_line_t | 1209 | enum print_line_t |
839 | print_graph_function(struct trace_iterator *iter) | 1210 | __print_graph_function_flags(struct trace_iterator *iter, u32 flags) |
840 | { | 1211 | { |
1212 | struct ftrace_graph_ent_entry *field; | ||
1213 | struct fgraph_data *data = iter->private; | ||
841 | struct trace_entry *entry = iter->ent; | 1214 | struct trace_entry *entry = iter->ent; |
842 | struct trace_seq *s = &iter->seq; | 1215 | struct trace_seq *s = &iter->seq; |
1216 | int cpu = iter->cpu; | ||
1217 | int ret; | ||
1218 | |||
1219 | if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) { | ||
1220 | per_cpu_ptr(data->cpu_data, cpu)->ignore = 0; | ||
1221 | return TRACE_TYPE_HANDLED; | ||
1222 | } | ||
1223 | |||
1224 | /* | ||
1225 | * If the last output failed, there's a possibility we need | ||
1226 | * to print out the missing entry which would never go out. | ||
1227 | */ | ||
1228 | if (data && data->failed) { | ||
1229 | field = &data->ent; | ||
1230 | iter->cpu = data->cpu; | ||
1231 | ret = print_graph_entry(field, s, iter, flags); | ||
1232 | if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { | ||
1233 | per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1; | ||
1234 | ret = TRACE_TYPE_NO_CONSUME; | ||
1235 | } | ||
1236 | iter->cpu = cpu; | ||
1237 | return ret; | ||
1238 | } | ||
843 | 1239 | ||
844 | switch (entry->type) { | 1240 | switch (entry->type) { |
845 | case TRACE_GRAPH_ENT: { | 1241 | case TRACE_GRAPH_ENT: { |
846 | struct ftrace_graph_ent_entry *field; | 1242 | /* |
1243 | * print_graph_entry() may consume the current event, | ||
1244 | * thus @field may become invalid, so we need to save it. | ||
1245 | * sizeof(struct ftrace_graph_ent_entry) is very small, | ||
1246 | * it can be safely saved at the stack. | ||
1247 | */ | ||
1248 | struct ftrace_graph_ent_entry saved; | ||
847 | trace_assign_type(field, entry); | 1249 | trace_assign_type(field, entry); |
848 | return print_graph_entry(field, s, iter); | 1250 | saved = *field; |
1251 | return print_graph_entry(&saved, s, iter, flags); | ||
849 | } | 1252 | } |
850 | case TRACE_GRAPH_RET: { | 1253 | case TRACE_GRAPH_RET: { |
851 | struct ftrace_graph_ret_entry *field; | 1254 | struct ftrace_graph_ret_entry *field; |
852 | trace_assign_type(field, entry); | 1255 | trace_assign_type(field, entry); |
853 | return print_graph_return(&field->ret, s, entry, iter); | 1256 | return print_graph_return(&field->ret, s, entry, iter, flags); |
854 | } | 1257 | } |
1258 | case TRACE_STACK: | ||
1259 | case TRACE_FN: | ||
1260 | /* dont trace stack and functions as comments */ | ||
1261 | return TRACE_TYPE_UNHANDLED; | ||
1262 | |||
855 | default: | 1263 | default: |
856 | return print_graph_comment(s, entry, iter); | 1264 | return print_graph_comment(s, entry, iter, flags); |
857 | } | 1265 | } |
858 | 1266 | ||
859 | return TRACE_TYPE_HANDLED; | 1267 | return TRACE_TYPE_HANDLED; |
860 | } | 1268 | } |
861 | 1269 | ||
862 | static void print_graph_headers(struct seq_file *s) | 1270 | static enum print_line_t |
1271 | print_graph_function(struct trace_iterator *iter) | ||
1272 | { | ||
1273 | return __print_graph_function_flags(iter, tracer_flags.val); | ||
1274 | } | ||
1275 | |||
1276 | enum print_line_t print_graph_function_flags(struct trace_iterator *iter, | ||
1277 | u32 flags) | ||
1278 | { | ||
1279 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
1280 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
1281 | else | ||
1282 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
1283 | |||
1284 | return __print_graph_function_flags(iter, flags); | ||
1285 | } | ||
1286 | |||
1287 | static enum print_line_t | ||
1288 | print_graph_function_event(struct trace_iterator *iter, int flags, | ||
1289 | struct trace_event *event) | ||
1290 | { | ||
1291 | return print_graph_function(iter); | ||
1292 | } | ||
1293 | |||
1294 | static void print_lat_header(struct seq_file *s, u32 flags) | ||
1295 | { | ||
1296 | static const char spaces[] = " " /* 16 spaces */ | ||
1297 | " " /* 4 spaces */ | ||
1298 | " "; /* 17 spaces */ | ||
1299 | int size = 0; | ||
1300 | |||
1301 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) | ||
1302 | size += 16; | ||
1303 | if (flags & TRACE_GRAPH_PRINT_CPU) | ||
1304 | size += 4; | ||
1305 | if (flags & TRACE_GRAPH_PRINT_PROC) | ||
1306 | size += 17; | ||
1307 | |||
1308 | seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); | ||
1309 | seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); | ||
1310 | seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); | ||
1311 | seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); | ||
1312 | seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces); | ||
1313 | seq_printf(s, "#%.*s|||| / \n", size, spaces); | ||
1314 | } | ||
1315 | |||
1316 | static void __print_graph_headers_flags(struct seq_file *s, u32 flags) | ||
863 | { | 1317 | { |
1318 | int lat = trace_flags & TRACE_ITER_LATENCY_FMT; | ||
1319 | |||
1320 | if (lat) | ||
1321 | print_lat_header(s, flags); | ||
1322 | |||
864 | /* 1st line */ | 1323 | /* 1st line */ |
865 | seq_printf(s, "# "); | 1324 | seq_printf(s, "#"); |
866 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) | 1325 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) |
867 | seq_printf(s, " TIME "); | 1326 | seq_printf(s, " TIME "); |
868 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) | 1327 | if (flags & TRACE_GRAPH_PRINT_CPU) |
869 | seq_printf(s, "CPU"); | 1328 | seq_printf(s, " CPU"); |
870 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) | 1329 | if (flags & TRACE_GRAPH_PRINT_PROC) |
871 | seq_printf(s, " TASK/PID "); | 1330 | seq_printf(s, " TASK/PID "); |
872 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) | 1331 | if (lat) |
1332 | seq_printf(s, "|||||"); | ||
1333 | if (flags & TRACE_GRAPH_PRINT_DURATION) | ||
873 | seq_printf(s, " DURATION "); | 1334 | seq_printf(s, " DURATION "); |
874 | seq_printf(s, " FUNCTION CALLS\n"); | 1335 | seq_printf(s, " FUNCTION CALLS\n"); |
875 | 1336 | ||
876 | /* 2nd line */ | 1337 | /* 2nd line */ |
877 | seq_printf(s, "# "); | 1338 | seq_printf(s, "#"); |
878 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) | 1339 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) |
879 | seq_printf(s, " | "); | 1340 | seq_printf(s, " | "); |
880 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) | 1341 | if (flags & TRACE_GRAPH_PRINT_CPU) |
881 | seq_printf(s, "| "); | 1342 | seq_printf(s, " | "); |
882 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) | 1343 | if (flags & TRACE_GRAPH_PRINT_PROC) |
883 | seq_printf(s, " | | "); | 1344 | seq_printf(s, " | | "); |
884 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) | 1345 | if (lat) |
1346 | seq_printf(s, "|||||"); | ||
1347 | if (flags & TRACE_GRAPH_PRINT_DURATION) | ||
885 | seq_printf(s, " | | "); | 1348 | seq_printf(s, " | | "); |
886 | seq_printf(s, " | | | |\n"); | 1349 | seq_printf(s, " | | | |\n"); |
887 | } | 1350 | } |
888 | 1351 | ||
889 | static void graph_trace_open(struct trace_iterator *iter) | 1352 | void print_graph_headers(struct seq_file *s) |
1353 | { | ||
1354 | print_graph_headers_flags(s, tracer_flags.val); | ||
1355 | } | ||
1356 | |||
1357 | void print_graph_headers_flags(struct seq_file *s, u32 flags) | ||
1358 | { | ||
1359 | struct trace_iterator *iter = s->private; | ||
1360 | |||
1361 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
1362 | /* print nothing if the buffers are empty */ | ||
1363 | if (trace_empty(iter)) | ||
1364 | return; | ||
1365 | |||
1366 | print_trace_header(s, iter); | ||
1367 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
1368 | } else | ||
1369 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
1370 | |||
1371 | __print_graph_headers_flags(s, flags); | ||
1372 | } | ||
1373 | |||
1374 | void graph_trace_open(struct trace_iterator *iter) | ||
890 | { | 1375 | { |
891 | /* pid and depth on the last trace processed */ | 1376 | /* pid and depth on the last trace processed */ |
892 | struct fgraph_data *data = alloc_percpu(struct fgraph_data); | 1377 | struct fgraph_data *data; |
893 | int cpu; | 1378 | int cpu; |
894 | 1379 | ||
1380 | iter->private = NULL; | ||
1381 | |||
1382 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
895 | if (!data) | 1383 | if (!data) |
896 | pr_warning("function graph tracer: not enough memory\n"); | 1384 | goto out_err; |
897 | else | 1385 | |
898 | for_each_possible_cpu(cpu) { | 1386 | data->cpu_data = alloc_percpu(struct fgraph_cpu_data); |
899 | pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); | 1387 | if (!data->cpu_data) |
900 | int *depth = &(per_cpu_ptr(data, cpu)->depth); | 1388 | goto out_err_free; |
901 | *pid = -1; | 1389 | |
902 | *depth = 0; | 1390 | for_each_possible_cpu(cpu) { |
903 | } | 1391 | pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); |
1392 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); | ||
1393 | int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); | ||
1394 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
1395 | |||
1396 | *pid = -1; | ||
1397 | *depth = 0; | ||
1398 | *ignore = 0; | ||
1399 | *depth_irq = -1; | ||
1400 | } | ||
904 | 1401 | ||
905 | iter->private = data; | 1402 | iter->private = data; |
1403 | |||
1404 | return; | ||
1405 | |||
1406 | out_err_free: | ||
1407 | kfree(data); | ||
1408 | out_err: | ||
1409 | pr_warning("function graph tracer: not enough memory\n"); | ||
1410 | } | ||
1411 | |||
1412 | void graph_trace_close(struct trace_iterator *iter) | ||
1413 | { | ||
1414 | struct fgraph_data *data = iter->private; | ||
1415 | |||
1416 | if (data) { | ||
1417 | free_percpu(data->cpu_data); | ||
1418 | kfree(data); | ||
1419 | } | ||
906 | } | 1420 | } |
907 | 1421 | ||
908 | static void graph_trace_close(struct trace_iterator *iter) | 1422 | static int func_graph_set_flag(u32 old_flags, u32 bit, int set) |
909 | { | 1423 | { |
910 | free_percpu(iter->private); | 1424 | if (bit == TRACE_GRAPH_PRINT_IRQS) |
1425 | ftrace_graph_skip_irqs = !set; | ||
1426 | |||
1427 | return 0; | ||
911 | } | 1428 | } |
912 | 1429 | ||
1430 | static struct trace_event_functions graph_functions = { | ||
1431 | .trace = print_graph_function_event, | ||
1432 | }; | ||
1433 | |||
1434 | static struct trace_event graph_trace_entry_event = { | ||
1435 | .type = TRACE_GRAPH_ENT, | ||
1436 | .funcs = &graph_functions, | ||
1437 | }; | ||
1438 | |||
1439 | static struct trace_event graph_trace_ret_event = { | ||
1440 | .type = TRACE_GRAPH_RET, | ||
1441 | .funcs = &graph_functions | ||
1442 | }; | ||
1443 | |||
913 | static struct tracer graph_trace __read_mostly = { | 1444 | static struct tracer graph_trace __read_mostly = { |
914 | .name = "function_graph", | 1445 | .name = "function_graph", |
915 | .open = graph_trace_open, | 1446 | .open = graph_trace_open, |
1447 | .pipe_open = graph_trace_open, | ||
916 | .close = graph_trace_close, | 1448 | .close = graph_trace_close, |
1449 | .pipe_close = graph_trace_close, | ||
917 | .wait_pipe = poll_wait_pipe, | 1450 | .wait_pipe = poll_wait_pipe, |
918 | .init = graph_trace_init, | 1451 | .init = graph_trace_init, |
919 | .reset = graph_trace_reset, | 1452 | .reset = graph_trace_reset, |
920 | .print_line = print_graph_function, | 1453 | .print_line = print_graph_function, |
921 | .print_header = print_graph_headers, | 1454 | .print_header = print_graph_headers, |
922 | .flags = &tracer_flags, | 1455 | .flags = &tracer_flags, |
1456 | .set_flag = func_graph_set_flag, | ||
923 | #ifdef CONFIG_FTRACE_SELFTEST | 1457 | #ifdef CONFIG_FTRACE_SELFTEST |
924 | .selftest = trace_selftest_startup_function_graph, | 1458 | .selftest = trace_selftest_startup_function_graph, |
925 | #endif | 1459 | #endif |
@@ -927,6 +1461,18 @@ static struct tracer graph_trace __read_mostly = { | |||
927 | 1461 | ||
928 | static __init int init_graph_trace(void) | 1462 | static __init int init_graph_trace(void) |
929 | { | 1463 | { |
1464 | max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); | ||
1465 | |||
1466 | if (!register_ftrace_event(&graph_trace_entry_event)) { | ||
1467 | pr_warning("Warning: could not register graph trace events\n"); | ||
1468 | return 1; | ||
1469 | } | ||
1470 | |||
1471 | if (!register_ftrace_event(&graph_trace_ret_event)) { | ||
1472 | pr_warning("Warning: could not register graph trace events\n"); | ||
1473 | return 1; | ||
1474 | } | ||
1475 | |||
930 | return register_tracer(&graph_trace); | 1476 | return register_tracer(&graph_trace); |
931 | } | 1477 | } |
932 | 1478 | ||
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c deleted file mode 100644 index ca7d7c4d0c2a..000000000000 --- a/kernel/trace/trace_hw_branches.c +++ /dev/null | |||
@@ -1,309 +0,0 @@ | |||
1 | /* | ||
2 | * h/w branch tracer for x86 based on BTS | ||
3 | * | ||
4 | * Copyright (C) 2008-2009 Intel Corporation. | ||
5 | * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 | ||
6 | */ | ||
7 | #include <linux/kallsyms.h> | ||
8 | #include <linux/debugfs.h> | ||
9 | #include <linux/ftrace.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/cpu.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/fs.h> | ||
14 | |||
15 | #include <asm/ds.h> | ||
16 | |||
17 | #include "trace_output.h" | ||
18 | #include "trace.h" | ||
19 | |||
20 | |||
21 | #define BTS_BUFFER_SIZE (1 << 13) | ||
22 | |||
23 | static DEFINE_PER_CPU(struct bts_tracer *, tracer); | ||
24 | static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); | ||
25 | |||
26 | #define this_tracer per_cpu(tracer, smp_processor_id()) | ||
27 | |||
28 | static int trace_hw_branches_enabled __read_mostly; | ||
29 | static int trace_hw_branches_suspended __read_mostly; | ||
30 | static struct trace_array *hw_branch_trace __read_mostly; | ||
31 | |||
32 | |||
33 | static void bts_trace_init_cpu(int cpu) | ||
34 | { | ||
35 | per_cpu(tracer, cpu) = | ||
36 | ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, | ||
37 | NULL, (size_t)-1, BTS_KERNEL); | ||
38 | |||
39 | if (IS_ERR(per_cpu(tracer, cpu))) | ||
40 | per_cpu(tracer, cpu) = NULL; | ||
41 | } | ||
42 | |||
43 | static int bts_trace_init(struct trace_array *tr) | ||
44 | { | ||
45 | int cpu; | ||
46 | |||
47 | hw_branch_trace = tr; | ||
48 | trace_hw_branches_enabled = 0; | ||
49 | |||
50 | get_online_cpus(); | ||
51 | for_each_online_cpu(cpu) { | ||
52 | bts_trace_init_cpu(cpu); | ||
53 | |||
54 | if (likely(per_cpu(tracer, cpu))) | ||
55 | trace_hw_branches_enabled = 1; | ||
56 | } | ||
57 | trace_hw_branches_suspended = 0; | ||
58 | put_online_cpus(); | ||
59 | |||
60 | /* If we could not enable tracing on a single cpu, we fail. */ | ||
61 | return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP; | ||
62 | } | ||
63 | |||
64 | static void bts_trace_reset(struct trace_array *tr) | ||
65 | { | ||
66 | int cpu; | ||
67 | |||
68 | get_online_cpus(); | ||
69 | for_each_online_cpu(cpu) { | ||
70 | if (likely(per_cpu(tracer, cpu))) { | ||
71 | ds_release_bts(per_cpu(tracer, cpu)); | ||
72 | per_cpu(tracer, cpu) = NULL; | ||
73 | } | ||
74 | } | ||
75 | trace_hw_branches_enabled = 0; | ||
76 | trace_hw_branches_suspended = 0; | ||
77 | put_online_cpus(); | ||
78 | } | ||
79 | |||
80 | static void bts_trace_start(struct trace_array *tr) | ||
81 | { | ||
82 | int cpu; | ||
83 | |||
84 | get_online_cpus(); | ||
85 | for_each_online_cpu(cpu) | ||
86 | if (likely(per_cpu(tracer, cpu))) | ||
87 | ds_resume_bts(per_cpu(tracer, cpu)); | ||
88 | trace_hw_branches_suspended = 0; | ||
89 | put_online_cpus(); | ||
90 | } | ||
91 | |||
92 | static void bts_trace_stop(struct trace_array *tr) | ||
93 | { | ||
94 | int cpu; | ||
95 | |||
96 | get_online_cpus(); | ||
97 | for_each_online_cpu(cpu) | ||
98 | if (likely(per_cpu(tracer, cpu))) | ||
99 | ds_suspend_bts(per_cpu(tracer, cpu)); | ||
100 | trace_hw_branches_suspended = 1; | ||
101 | put_online_cpus(); | ||
102 | } | ||
103 | |||
104 | static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, | ||
105 | unsigned long action, void *hcpu) | ||
106 | { | ||
107 | int cpu = (long)hcpu; | ||
108 | |||
109 | switch (action) { | ||
110 | case CPU_ONLINE: | ||
111 | case CPU_DOWN_FAILED: | ||
112 | /* The notification is sent with interrupts enabled. */ | ||
113 | if (trace_hw_branches_enabled) { | ||
114 | bts_trace_init_cpu(cpu); | ||
115 | |||
116 | if (trace_hw_branches_suspended && | ||
117 | likely(per_cpu(tracer, cpu))) | ||
118 | ds_suspend_bts(per_cpu(tracer, cpu)); | ||
119 | } | ||
120 | break; | ||
121 | |||
122 | case CPU_DOWN_PREPARE: | ||
123 | /* The notification is sent with interrupts enabled. */ | ||
124 | if (likely(per_cpu(tracer, cpu))) { | ||
125 | ds_release_bts(per_cpu(tracer, cpu)); | ||
126 | per_cpu(tracer, cpu) = NULL; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | return NOTIFY_DONE; | ||
131 | } | ||
132 | |||
133 | static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { | ||
134 | .notifier_call = bts_hotcpu_handler | ||
135 | }; | ||
136 | |||
137 | static void bts_trace_print_header(struct seq_file *m) | ||
138 | { | ||
139 | seq_puts(m, "# CPU# TO <- FROM\n"); | ||
140 | } | ||
141 | |||
142 | static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) | ||
143 | { | ||
144 | unsigned long symflags = TRACE_ITER_SYM_OFFSET; | ||
145 | struct trace_entry *entry = iter->ent; | ||
146 | struct trace_seq *seq = &iter->seq; | ||
147 | struct hw_branch_entry *it; | ||
148 | |||
149 | trace_assign_type(it, entry); | ||
150 | |||
151 | if (entry->type == TRACE_HW_BRANCHES) { | ||
152 | if (trace_seq_printf(seq, "%4d ", iter->cpu) && | ||
153 | seq_print_ip_sym(seq, it->to, symflags) && | ||
154 | trace_seq_printf(seq, "\t <- ") && | ||
155 | seq_print_ip_sym(seq, it->from, symflags) && | ||
156 | trace_seq_printf(seq, "\n")) | ||
157 | return TRACE_TYPE_HANDLED; | ||
158 | return TRACE_TYPE_PARTIAL_LINE;; | ||
159 | } | ||
160 | return TRACE_TYPE_UNHANDLED; | ||
161 | } | ||
162 | |||
163 | void trace_hw_branch(u64 from, u64 to) | ||
164 | { | ||
165 | struct ftrace_event_call *call = &event_hw_branch; | ||
166 | struct trace_array *tr = hw_branch_trace; | ||
167 | struct ring_buffer_event *event; | ||
168 | struct hw_branch_entry *entry; | ||
169 | unsigned long irq1; | ||
170 | int cpu; | ||
171 | |||
172 | if (unlikely(!tr)) | ||
173 | return; | ||
174 | |||
175 | if (unlikely(!trace_hw_branches_enabled)) | ||
176 | return; | ||
177 | |||
178 | local_irq_save(irq1); | ||
179 | cpu = raw_smp_processor_id(); | ||
180 | if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) | ||
181 | goto out; | ||
182 | |||
183 | event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES, | ||
184 | sizeof(*entry), 0, 0); | ||
185 | if (!event) | ||
186 | goto out; | ||
187 | entry = ring_buffer_event_data(event); | ||
188 | tracing_generic_entry_update(&entry->ent, 0, from); | ||
189 | entry->ent.type = TRACE_HW_BRANCHES; | ||
190 | entry->from = from; | ||
191 | entry->to = to; | ||
192 | if (!filter_check_discard(call, entry, tr->buffer, event)) | ||
193 | trace_buffer_unlock_commit(tr, event, 0, 0); | ||
194 | |||
195 | out: | ||
196 | atomic_dec(&tr->data[cpu]->disabled); | ||
197 | local_irq_restore(irq1); | ||
198 | } | ||
199 | |||
200 | static void trace_bts_at(const struct bts_trace *trace, void *at) | ||
201 | { | ||
202 | struct bts_struct bts; | ||
203 | int err = 0; | ||
204 | |||
205 | WARN_ON_ONCE(!trace->read); | ||
206 | if (!trace->read) | ||
207 | return; | ||
208 | |||
209 | err = trace->read(this_tracer, at, &bts); | ||
210 | if (err < 0) | ||
211 | return; | ||
212 | |||
213 | switch (bts.qualifier) { | ||
214 | case BTS_BRANCH: | ||
215 | trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to); | ||
216 | break; | ||
217 | } | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * Collect the trace on the current cpu and write it into the ftrace buffer. | ||
222 | * | ||
223 | * pre: tracing must be suspended on the current cpu | ||
224 | */ | ||
225 | static void trace_bts_cpu(void *arg) | ||
226 | { | ||
227 | struct trace_array *tr = (struct trace_array *)arg; | ||
228 | const struct bts_trace *trace; | ||
229 | unsigned char *at; | ||
230 | |||
231 | if (unlikely(!tr)) | ||
232 | return; | ||
233 | |||
234 | if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled))) | ||
235 | return; | ||
236 | |||
237 | if (unlikely(!this_tracer)) | ||
238 | return; | ||
239 | |||
240 | trace = ds_read_bts(this_tracer); | ||
241 | if (!trace) | ||
242 | return; | ||
243 | |||
244 | for (at = trace->ds.top; (void *)at < trace->ds.end; | ||
245 | at += trace->ds.size) | ||
246 | trace_bts_at(trace, at); | ||
247 | |||
248 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | ||
249 | at += trace->ds.size) | ||
250 | trace_bts_at(trace, at); | ||
251 | } | ||
252 | |||
253 | static void trace_bts_prepare(struct trace_iterator *iter) | ||
254 | { | ||
255 | int cpu; | ||
256 | |||
257 | get_online_cpus(); | ||
258 | for_each_online_cpu(cpu) | ||
259 | if (likely(per_cpu(tracer, cpu))) | ||
260 | ds_suspend_bts(per_cpu(tracer, cpu)); | ||
261 | /* | ||
262 | * We need to collect the trace on the respective cpu since ftrace | ||
263 | * implicitly adds the record for the current cpu. | ||
264 | * Once that is more flexible, we could collect the data from any cpu. | ||
265 | */ | ||
266 | on_each_cpu(trace_bts_cpu, iter->tr, 1); | ||
267 | |||
268 | for_each_online_cpu(cpu) | ||
269 | if (likely(per_cpu(tracer, cpu))) | ||
270 | ds_resume_bts(per_cpu(tracer, cpu)); | ||
271 | put_online_cpus(); | ||
272 | } | ||
273 | |||
274 | static void trace_bts_close(struct trace_iterator *iter) | ||
275 | { | ||
276 | tracing_reset_online_cpus(iter->tr); | ||
277 | } | ||
278 | |||
279 | void trace_hw_branch_oops(void) | ||
280 | { | ||
281 | if (this_tracer) { | ||
282 | ds_suspend_bts_noirq(this_tracer); | ||
283 | trace_bts_cpu(hw_branch_trace); | ||
284 | ds_resume_bts_noirq(this_tracer); | ||
285 | } | ||
286 | } | ||
287 | |||
288 | struct tracer bts_tracer __read_mostly = | ||
289 | { | ||
290 | .name = "hw-branch-tracer", | ||
291 | .init = bts_trace_init, | ||
292 | .reset = bts_trace_reset, | ||
293 | .print_header = bts_trace_print_header, | ||
294 | .print_line = bts_trace_print_line, | ||
295 | .start = bts_trace_start, | ||
296 | .stop = bts_trace_stop, | ||
297 | .open = trace_bts_prepare, | ||
298 | .close = trace_bts_close, | ||
299 | #ifdef CONFIG_FTRACE_SELFTEST | ||
300 | .selftest = trace_selftest_startup_hw_branches, | ||
301 | #endif /* CONFIG_FTRACE_SELFTEST */ | ||
302 | }; | ||
303 | |||
304 | __init static int init_bts_trace(void) | ||
305 | { | ||
306 | register_hotcpu_notifier(&bts_hotcpu_notifier); | ||
307 | return register_tracer(&bts_tracer); | ||
308 | } | ||
309 | device_initcall(init_bts_trace); | ||
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b923d13e2fad..5cf8c602b880 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -34,6 +34,9 @@ static int trace_type __read_mostly; | |||
34 | 34 | ||
35 | static int save_lat_flag; | 35 | static int save_lat_flag; |
36 | 36 | ||
37 | static void stop_irqsoff_tracer(struct trace_array *tr, int graph); | ||
38 | static int start_irqsoff_tracer(struct trace_array *tr, int graph); | ||
39 | |||
37 | #ifdef CONFIG_PREEMPT_TRACER | 40 | #ifdef CONFIG_PREEMPT_TRACER |
38 | static inline int | 41 | static inline int |
39 | preempt_trace(void) | 42 | preempt_trace(void) |
@@ -55,6 +58,23 @@ irq_trace(void) | |||
55 | # define irq_trace() (0) | 58 | # define irq_trace() (0) |
56 | #endif | 59 | #endif |
57 | 60 | ||
61 | #define TRACE_DISPLAY_GRAPH 1 | ||
62 | |||
63 | static struct tracer_opt trace_opts[] = { | ||
64 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
65 | /* display latency trace as call graph */ | ||
66 | { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, | ||
67 | #endif | ||
68 | { } /* Empty entry */ | ||
69 | }; | ||
70 | |||
71 | static struct tracer_flags tracer_flags = { | ||
72 | .val = 0, | ||
73 | .opts = trace_opts, | ||
74 | }; | ||
75 | |||
76 | #define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) | ||
77 | |||
58 | /* | 78 | /* |
59 | * Sequence count - we record it when starting a measurement and | 79 | * Sequence count - we record it when starting a measurement and |
60 | * skip the latency if the sequence has changed - some other section | 80 | * skip the latency if the sequence has changed - some other section |
@@ -67,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence; | |||
67 | 87 | ||
68 | #ifdef CONFIG_FUNCTION_TRACER | 88 | #ifdef CONFIG_FUNCTION_TRACER |
69 | /* | 89 | /* |
70 | * irqsoff uses its own tracer function to keep the overhead down: | 90 | * Prologue for the preempt and irqs off function tracers. |
91 | * | ||
92 | * Returns 1 if it is OK to continue, and data->disabled is | ||
93 | * incremented. | ||
94 | * 0 if the trace is to be ignored, and data->disabled | ||
95 | * is kept the same. | ||
96 | * | ||
97 | * Note, this function is also used outside this ifdef but | ||
98 | * inside the #ifdef of the function graph tracer below. | ||
99 | * This is OK, since the function graph tracer is | ||
100 | * dependent on the function tracer. | ||
71 | */ | 101 | */ |
72 | static void | 102 | static int func_prolog_dec(struct trace_array *tr, |
73 | irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | 103 | struct trace_array_cpu **data, |
104 | unsigned long *flags) | ||
74 | { | 105 | { |
75 | struct trace_array *tr = irqsoff_trace; | ||
76 | struct trace_array_cpu *data; | ||
77 | unsigned long flags; | ||
78 | long disabled; | 106 | long disabled; |
79 | int cpu; | 107 | int cpu; |
80 | 108 | ||
@@ -86,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | |||
86 | */ | 114 | */ |
87 | cpu = raw_smp_processor_id(); | 115 | cpu = raw_smp_processor_id(); |
88 | if (likely(!per_cpu(tracing_cpu, cpu))) | 116 | if (likely(!per_cpu(tracing_cpu, cpu))) |
89 | return; | 117 | return 0; |
90 | 118 | ||
91 | local_save_flags(flags); | 119 | local_save_flags(*flags); |
92 | /* slight chance to get a false positive on tracing_cpu */ | 120 | /* slight chance to get a false positive on tracing_cpu */ |
93 | if (!irqs_disabled_flags(flags)) | 121 | if (!irqs_disabled_flags(*flags)) |
94 | return; | 122 | return 0; |
95 | 123 | ||
96 | data = tr->data[cpu]; | 124 | *data = tr->data[cpu]; |
97 | disabled = atomic_inc_return(&data->disabled); | 125 | disabled = atomic_inc_return(&(*data)->disabled); |
98 | 126 | ||
99 | if (likely(disabled == 1)) | 127 | if (likely(disabled == 1)) |
100 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | 128 | return 1; |
129 | |||
130 | atomic_dec(&(*data)->disabled); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * irqsoff uses its own tracer function to keep the overhead down: | ||
137 | */ | ||
138 | static void | ||
139 | irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | ||
140 | { | ||
141 | struct trace_array *tr = irqsoff_trace; | ||
142 | struct trace_array_cpu *data; | ||
143 | unsigned long flags; | ||
144 | |||
145 | if (!func_prolog_dec(tr, &data, &flags)) | ||
146 | return; | ||
147 | |||
148 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | ||
101 | 149 | ||
102 | atomic_dec(&data->disabled); | 150 | atomic_dec(&data->disabled); |
103 | } | 151 | } |
@@ -108,6 +156,132 @@ static struct ftrace_ops trace_ops __read_mostly = | |||
108 | }; | 156 | }; |
109 | #endif /* CONFIG_FUNCTION_TRACER */ | 157 | #endif /* CONFIG_FUNCTION_TRACER */ |
110 | 158 | ||
159 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
160 | static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) | ||
161 | { | ||
162 | int cpu; | ||
163 | |||
164 | if (!(bit & TRACE_DISPLAY_GRAPH)) | ||
165 | return -EINVAL; | ||
166 | |||
167 | if (!(is_graph() ^ set)) | ||
168 | return 0; | ||
169 | |||
170 | stop_irqsoff_tracer(irqsoff_trace, !set); | ||
171 | |||
172 | for_each_possible_cpu(cpu) | ||
173 | per_cpu(tracing_cpu, cpu) = 0; | ||
174 | |||
175 | tracing_max_latency = 0; | ||
176 | tracing_reset_online_cpus(irqsoff_trace); | ||
177 | |||
178 | return start_irqsoff_tracer(irqsoff_trace, set); | ||
179 | } | ||
180 | |||
181 | static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) | ||
182 | { | ||
183 | struct trace_array *tr = irqsoff_trace; | ||
184 | struct trace_array_cpu *data; | ||
185 | unsigned long flags; | ||
186 | int ret; | ||
187 | int pc; | ||
188 | |||
189 | if (!func_prolog_dec(tr, &data, &flags)) | ||
190 | return 0; | ||
191 | |||
192 | pc = preempt_count(); | ||
193 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
194 | atomic_dec(&data->disabled); | ||
195 | |||
196 | return ret; | ||
197 | } | ||
198 | |||
199 | static void irqsoff_graph_return(struct ftrace_graph_ret *trace) | ||
200 | { | ||
201 | struct trace_array *tr = irqsoff_trace; | ||
202 | struct trace_array_cpu *data; | ||
203 | unsigned long flags; | ||
204 | int pc; | ||
205 | |||
206 | if (!func_prolog_dec(tr, &data, &flags)) | ||
207 | return; | ||
208 | |||
209 | pc = preempt_count(); | ||
210 | __trace_graph_return(tr, trace, flags, pc); | ||
211 | atomic_dec(&data->disabled); | ||
212 | } | ||
213 | |||
214 | static void irqsoff_trace_open(struct trace_iterator *iter) | ||
215 | { | ||
216 | if (is_graph()) | ||
217 | graph_trace_open(iter); | ||
218 | |||
219 | } | ||
220 | |||
221 | static void irqsoff_trace_close(struct trace_iterator *iter) | ||
222 | { | ||
223 | if (iter->private) | ||
224 | graph_trace_close(iter); | ||
225 | } | ||
226 | |||
227 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ | ||
228 | TRACE_GRAPH_PRINT_PROC) | ||
229 | |||
230 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) | ||
231 | { | ||
232 | /* | ||
233 | * In graph mode call the graph tracer output function, | ||
234 | * otherwise go with the TRACE_FN event handler | ||
235 | */ | ||
236 | if (is_graph()) | ||
237 | return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); | ||
238 | |||
239 | return TRACE_TYPE_UNHANDLED; | ||
240 | } | ||
241 | |||
242 | static void irqsoff_print_header(struct seq_file *s) | ||
243 | { | ||
244 | if (is_graph()) | ||
245 | print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); | ||
246 | else | ||
247 | trace_default_header(s); | ||
248 | } | ||
249 | |||
250 | static void | ||
251 | __trace_function(struct trace_array *tr, | ||
252 | unsigned long ip, unsigned long parent_ip, | ||
253 | unsigned long flags, int pc) | ||
254 | { | ||
255 | if (is_graph()) | ||
256 | trace_graph_function(tr, ip, parent_ip, flags, pc); | ||
257 | else | ||
258 | trace_function(tr, ip, parent_ip, flags, pc); | ||
259 | } | ||
260 | |||
261 | #else | ||
262 | #define __trace_function trace_function | ||
263 | |||
264 | static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) | ||
265 | { | ||
266 | return -EINVAL; | ||
267 | } | ||
268 | |||
269 | static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) | ||
270 | { | ||
271 | return -1; | ||
272 | } | ||
273 | |||
274 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) | ||
275 | { | ||
276 | return TRACE_TYPE_UNHANDLED; | ||
277 | } | ||
278 | |||
279 | static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } | ||
280 | static void irqsoff_print_header(struct seq_file *s) { } | ||
281 | static void irqsoff_trace_open(struct trace_iterator *iter) { } | ||
282 | static void irqsoff_trace_close(struct trace_iterator *iter) { } | ||
283 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
284 | |||
111 | /* | 285 | /* |
112 | * Should this new latency be reported/recorded? | 286 | * Should this new latency be reported/recorded? |
113 | */ | 287 | */ |
@@ -129,15 +303,10 @@ check_critical_timing(struct trace_array *tr, | |||
129 | unsigned long parent_ip, | 303 | unsigned long parent_ip, |
130 | int cpu) | 304 | int cpu) |
131 | { | 305 | { |
132 | unsigned long latency, t0, t1; | ||
133 | cycle_t T0, T1, delta; | 306 | cycle_t T0, T1, delta; |
134 | unsigned long flags; | 307 | unsigned long flags; |
135 | int pc; | 308 | int pc; |
136 | 309 | ||
137 | /* | ||
138 | * usecs conversion is slow so we try to delay the conversion | ||
139 | * as long as possible: | ||
140 | */ | ||
141 | T0 = data->preempt_timestamp; | 310 | T0 = data->preempt_timestamp; |
142 | T1 = ftrace_now(cpu); | 311 | T1 = ftrace_now(cpu); |
143 | delta = T1-T0; | 312 | delta = T1-T0; |
@@ -155,20 +324,19 @@ check_critical_timing(struct trace_array *tr, | |||
155 | if (!report_latency(delta)) | 324 | if (!report_latency(delta)) |
156 | goto out_unlock; | 325 | goto out_unlock; |
157 | 326 | ||
158 | trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); | 327 | __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); |
159 | 328 | /* Skip 5 functions to get to the irq/preempt enable function */ | |
160 | latency = nsecs_to_usecs(delta); | 329 | __trace_stack(tr, flags, 5, pc); |
161 | 330 | ||
162 | if (data->critical_sequence != max_sequence) | 331 | if (data->critical_sequence != max_sequence) |
163 | goto out_unlock; | 332 | goto out_unlock; |
164 | 333 | ||
165 | tracing_max_latency = delta; | ||
166 | t0 = nsecs_to_usecs(T0); | ||
167 | t1 = nsecs_to_usecs(T1); | ||
168 | |||
169 | data->critical_end = parent_ip; | 334 | data->critical_end = parent_ip; |
170 | 335 | ||
171 | update_max_tr_single(tr, current, cpu); | 336 | if (likely(!is_tracing_stopped())) { |
337 | tracing_max_latency = delta; | ||
338 | update_max_tr_single(tr, current, cpu); | ||
339 | } | ||
172 | 340 | ||
173 | max_sequence++; | 341 | max_sequence++; |
174 | 342 | ||
@@ -178,8 +346,7 @@ out_unlock: | |||
178 | out: | 346 | out: |
179 | data->critical_sequence = max_sequence; | 347 | data->critical_sequence = max_sequence; |
180 | data->preempt_timestamp = ftrace_now(cpu); | 348 | data->preempt_timestamp = ftrace_now(cpu); |
181 | tracing_reset(tr, cpu); | 349 | __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); |
182 | trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); | ||
183 | } | 350 | } |
184 | 351 | ||
185 | static inline void | 352 | static inline void |
@@ -208,11 +375,10 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
208 | data->critical_sequence = max_sequence; | 375 | data->critical_sequence = max_sequence; |
209 | data->preempt_timestamp = ftrace_now(cpu); | 376 | data->preempt_timestamp = ftrace_now(cpu); |
210 | data->critical_start = parent_ip ? : ip; | 377 | data->critical_start = parent_ip ? : ip; |
211 | tracing_reset(tr, cpu); | ||
212 | 378 | ||
213 | local_save_flags(flags); | 379 | local_save_flags(flags); |
214 | 380 | ||
215 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | 381 | __trace_function(tr, ip, parent_ip, flags, preempt_count()); |
216 | 382 | ||
217 | per_cpu(tracing_cpu, cpu) = 1; | 383 | per_cpu(tracing_cpu, cpu) = 1; |
218 | 384 | ||
@@ -246,7 +412,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
246 | atomic_inc(&data->disabled); | 412 | atomic_inc(&data->disabled); |
247 | 413 | ||
248 | local_save_flags(flags); | 414 | local_save_flags(flags); |
249 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | 415 | __trace_function(tr, ip, parent_ip, flags, preempt_count()); |
250 | check_critical_timing(tr, data, parent_ip ? : ip, cpu); | 416 | check_critical_timing(tr, data, parent_ip ? : ip, cpu); |
251 | data->critical_start = 0; | 417 | data->critical_start = 0; |
252 | atomic_dec(&data->disabled); | 418 | atomic_dec(&data->disabled); |
@@ -355,19 +521,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) | |||
355 | } | 521 | } |
356 | #endif /* CONFIG_PREEMPT_TRACER */ | 522 | #endif /* CONFIG_PREEMPT_TRACER */ |
357 | 523 | ||
358 | static void start_irqsoff_tracer(struct trace_array *tr) | 524 | static int start_irqsoff_tracer(struct trace_array *tr, int graph) |
359 | { | 525 | { |
360 | register_ftrace_function(&trace_ops); | 526 | int ret = 0; |
361 | if (tracing_is_enabled()) | 527 | |
528 | if (!graph) | ||
529 | ret = register_ftrace_function(&trace_ops); | ||
530 | else | ||
531 | ret = register_ftrace_graph(&irqsoff_graph_return, | ||
532 | &irqsoff_graph_entry); | ||
533 | |||
534 | if (!ret && tracing_is_enabled()) | ||
362 | tracer_enabled = 1; | 535 | tracer_enabled = 1; |
363 | else | 536 | else |
364 | tracer_enabled = 0; | 537 | tracer_enabled = 0; |
538 | |||
539 | return ret; | ||
365 | } | 540 | } |
366 | 541 | ||
367 | static void stop_irqsoff_tracer(struct trace_array *tr) | 542 | static void stop_irqsoff_tracer(struct trace_array *tr, int graph) |
368 | { | 543 | { |
369 | tracer_enabled = 0; | 544 | tracer_enabled = 0; |
370 | unregister_ftrace_function(&trace_ops); | 545 | |
546 | if (!graph) | ||
547 | unregister_ftrace_function(&trace_ops); | ||
548 | else | ||
549 | unregister_ftrace_graph(); | ||
371 | } | 550 | } |
372 | 551 | ||
373 | static void __irqsoff_tracer_init(struct trace_array *tr) | 552 | static void __irqsoff_tracer_init(struct trace_array *tr) |
@@ -379,12 +558,15 @@ static void __irqsoff_tracer_init(struct trace_array *tr) | |||
379 | irqsoff_trace = tr; | 558 | irqsoff_trace = tr; |
380 | /* make sure that the tracer is visible */ | 559 | /* make sure that the tracer is visible */ |
381 | smp_wmb(); | 560 | smp_wmb(); |
382 | start_irqsoff_tracer(tr); | 561 | tracing_reset_online_cpus(tr); |
562 | |||
563 | if (start_irqsoff_tracer(tr, is_graph())) | ||
564 | printk(KERN_ERR "failed to start irqsoff tracer\n"); | ||
383 | } | 565 | } |
384 | 566 | ||
385 | static void irqsoff_tracer_reset(struct trace_array *tr) | 567 | static void irqsoff_tracer_reset(struct trace_array *tr) |
386 | { | 568 | { |
387 | stop_irqsoff_tracer(tr); | 569 | stop_irqsoff_tracer(tr, is_graph()); |
388 | 570 | ||
389 | if (!save_lat_flag) | 571 | if (!save_lat_flag) |
390 | trace_flags &= ~TRACE_ITER_LATENCY_FMT; | 572 | trace_flags &= ~TRACE_ITER_LATENCY_FMT; |
@@ -416,9 +598,16 @@ static struct tracer irqsoff_tracer __read_mostly = | |||
416 | .start = irqsoff_tracer_start, | 598 | .start = irqsoff_tracer_start, |
417 | .stop = irqsoff_tracer_stop, | 599 | .stop = irqsoff_tracer_stop, |
418 | .print_max = 1, | 600 | .print_max = 1, |
601 | .print_header = irqsoff_print_header, | ||
602 | .print_line = irqsoff_print_line, | ||
603 | .flags = &tracer_flags, | ||
604 | .set_flag = irqsoff_set_flag, | ||
419 | #ifdef CONFIG_FTRACE_SELFTEST | 605 | #ifdef CONFIG_FTRACE_SELFTEST |
420 | .selftest = trace_selftest_startup_irqsoff, | 606 | .selftest = trace_selftest_startup_irqsoff, |
421 | #endif | 607 | #endif |
608 | .open = irqsoff_trace_open, | ||
609 | .close = irqsoff_trace_close, | ||
610 | .use_max_tr = 1, | ||
422 | }; | 611 | }; |
423 | # define register_irqsoff(trace) register_tracer(&trace) | 612 | # define register_irqsoff(trace) register_tracer(&trace) |
424 | #else | 613 | #else |
@@ -442,9 +631,16 @@ static struct tracer preemptoff_tracer __read_mostly = | |||
442 | .start = irqsoff_tracer_start, | 631 | .start = irqsoff_tracer_start, |
443 | .stop = irqsoff_tracer_stop, | 632 | .stop = irqsoff_tracer_stop, |
444 | .print_max = 1, | 633 | .print_max = 1, |
634 | .print_header = irqsoff_print_header, | ||
635 | .print_line = irqsoff_print_line, | ||
636 | .flags = &tracer_flags, | ||
637 | .set_flag = irqsoff_set_flag, | ||
445 | #ifdef CONFIG_FTRACE_SELFTEST | 638 | #ifdef CONFIG_FTRACE_SELFTEST |
446 | .selftest = trace_selftest_startup_preemptoff, | 639 | .selftest = trace_selftest_startup_preemptoff, |
447 | #endif | 640 | #endif |
641 | .open = irqsoff_trace_open, | ||
642 | .close = irqsoff_trace_close, | ||
643 | .use_max_tr = 1, | ||
448 | }; | 644 | }; |
449 | # define register_preemptoff(trace) register_tracer(&trace) | 645 | # define register_preemptoff(trace) register_tracer(&trace) |
450 | #else | 646 | #else |
@@ -470,9 +666,16 @@ static struct tracer preemptirqsoff_tracer __read_mostly = | |||
470 | .start = irqsoff_tracer_start, | 666 | .start = irqsoff_tracer_start, |
471 | .stop = irqsoff_tracer_stop, | 667 | .stop = irqsoff_tracer_stop, |
472 | .print_max = 1, | 668 | .print_max = 1, |
669 | .print_header = irqsoff_print_header, | ||
670 | .print_line = irqsoff_print_line, | ||
671 | .flags = &tracer_flags, | ||
672 | .set_flag = irqsoff_set_flag, | ||
473 | #ifdef CONFIG_FTRACE_SELFTEST | 673 | #ifdef CONFIG_FTRACE_SELFTEST |
474 | .selftest = trace_selftest_startup_preemptirqsoff, | 674 | .selftest = trace_selftest_startup_preemptirqsoff, |
475 | #endif | 675 | #endif |
676 | .open = irqsoff_trace_open, | ||
677 | .close = irqsoff_trace_close, | ||
678 | .use_max_tr = 1, | ||
476 | }; | 679 | }; |
477 | 680 | ||
478 | # define register_preemptirqsoff(trace) register_tracer(&trace) | 681 | # define register_preemptirqsoff(trace) register_tracer(&trace) |
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c new file mode 100644 index 000000000000..3c5c5dfea0b3 --- /dev/null +++ b/kernel/trace/trace_kdb.c | |||
@@ -0,0 +1,135 @@ | |||
1 | /* | ||
2 | * kdb helper for dumping the ftrace buffer | ||
3 | * | ||
4 | * Copyright (C) 2010 Jason Wessel <jason.wessel@windriver.com> | ||
5 | * | ||
6 | * ftrace_dump_buf based on ftrace_dump: | ||
7 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> | ||
8 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | ||
9 | * | ||
10 | */ | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/kgdb.h> | ||
13 | #include <linux/kdb.h> | ||
14 | #include <linux/ftrace.h> | ||
15 | |||
16 | #include "trace.h" | ||
17 | #include "trace_output.h" | ||
18 | |||
19 | static void ftrace_dump_buf(int skip_lines, long cpu_file) | ||
20 | { | ||
21 | /* use static because iter can be a bit big for the stack */ | ||
22 | static struct trace_iterator iter; | ||
23 | unsigned int old_userobj; | ||
24 | int cnt = 0, cpu; | ||
25 | |||
26 | trace_init_global_iter(&iter); | ||
27 | |||
28 | for_each_tracing_cpu(cpu) { | ||
29 | atomic_inc(&iter.tr->data[cpu]->disabled); | ||
30 | } | ||
31 | |||
32 | old_userobj = trace_flags; | ||
33 | |||
34 | /* don't look at user memory in panic mode */ | ||
35 | trace_flags &= ~TRACE_ITER_SYM_USEROBJ; | ||
36 | |||
37 | kdb_printf("Dumping ftrace buffer:\n"); | ||
38 | |||
39 | /* reset all but tr, trace, and overruns */ | ||
40 | memset(&iter.seq, 0, | ||
41 | sizeof(struct trace_iterator) - | ||
42 | offsetof(struct trace_iterator, seq)); | ||
43 | iter.iter_flags |= TRACE_FILE_LAT_FMT; | ||
44 | iter.pos = -1; | ||
45 | |||
46 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | ||
47 | for_each_tracing_cpu(cpu) { | ||
48 | iter.buffer_iter[cpu] = | ||
49 | ring_buffer_read_prepare(iter.tr->buffer, cpu); | ||
50 | ring_buffer_read_start(iter.buffer_iter[cpu]); | ||
51 | tracing_iter_reset(&iter, cpu); | ||
52 | } | ||
53 | } else { | ||
54 | iter.cpu_file = cpu_file; | ||
55 | iter.buffer_iter[cpu_file] = | ||
56 | ring_buffer_read_prepare(iter.tr->buffer, cpu_file); | ||
57 | ring_buffer_read_start(iter.buffer_iter[cpu_file]); | ||
58 | tracing_iter_reset(&iter, cpu_file); | ||
59 | } | ||
60 | if (!trace_empty(&iter)) | ||
61 | trace_find_next_entry_inc(&iter); | ||
62 | while (!trace_empty(&iter)) { | ||
63 | if (!cnt) | ||
64 | kdb_printf("---------------------------------\n"); | ||
65 | cnt++; | ||
66 | |||
67 | if (trace_find_next_entry_inc(&iter) != NULL && !skip_lines) | ||
68 | print_trace_line(&iter); | ||
69 | if (!skip_lines) | ||
70 | trace_printk_seq(&iter.seq); | ||
71 | else | ||
72 | skip_lines--; | ||
73 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
74 | goto out; | ||
75 | } | ||
76 | |||
77 | if (!cnt) | ||
78 | kdb_printf(" (ftrace buffer empty)\n"); | ||
79 | else | ||
80 | kdb_printf("---------------------------------\n"); | ||
81 | |||
82 | out: | ||
83 | trace_flags = old_userobj; | ||
84 | |||
85 | for_each_tracing_cpu(cpu) { | ||
86 | atomic_dec(&iter.tr->data[cpu]->disabled); | ||
87 | } | ||
88 | |||
89 | for_each_tracing_cpu(cpu) | ||
90 | if (iter.buffer_iter[cpu]) | ||
91 | ring_buffer_read_finish(iter.buffer_iter[cpu]); | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * kdb_ftdump - Dump the ftrace log buffer | ||
96 | */ | ||
97 | static int kdb_ftdump(int argc, const char **argv) | ||
98 | { | ||
99 | int skip_lines = 0; | ||
100 | long cpu_file; | ||
101 | char *cp; | ||
102 | |||
103 | if (argc > 2) | ||
104 | return KDB_ARGCOUNT; | ||
105 | |||
106 | if (argc) { | ||
107 | skip_lines = simple_strtol(argv[1], &cp, 0); | ||
108 | if (*cp) | ||
109 | skip_lines = 0; | ||
110 | } | ||
111 | |||
112 | if (argc == 2) { | ||
113 | cpu_file = simple_strtol(argv[2], &cp, 0); | ||
114 | if (*cp || cpu_file >= NR_CPUS || cpu_file < 0 || | ||
115 | !cpu_online(cpu_file)) | ||
116 | return KDB_BADINT; | ||
117 | } else { | ||
118 | cpu_file = TRACE_PIPE_ALL_CPU; | ||
119 | } | ||
120 | |||
121 | kdb_trap_printk++; | ||
122 | ftrace_dump_buf(skip_lines, cpu_file); | ||
123 | kdb_trap_printk--; | ||
124 | |||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | static __init int kdb_ftrace_register(void) | ||
129 | { | ||
130 | kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", | ||
131 | "Dump ftrace log", 0, KDB_REPEAT_NONE); | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | late_initcall(kdb_ftrace_register); | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c new file mode 100644 index 000000000000..2dec9bcde8b4 --- /dev/null +++ b/kernel/trace/trace_kprobe.c | |||
@@ -0,0 +1,1847 @@ | |||
1 | /* | ||
2 | * Kprobes-based tracing events | ||
3 | * | ||
4 | * Created by Masami Hiramatsu <mhiramat@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #include <linux/module.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | #include <linux/kprobes.h> | ||
23 | #include <linux/seq_file.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/smp.h> | ||
26 | #include <linux/debugfs.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/ctype.h> | ||
30 | #include <linux/ptrace.h> | ||
31 | #include <linux/perf_event.h> | ||
32 | #include <linux/stringify.h> | ||
33 | #include <linux/limits.h> | ||
34 | #include <asm/bitsperlong.h> | ||
35 | |||
36 | #include "trace.h" | ||
37 | #include "trace_output.h" | ||
38 | |||
39 | #define MAX_TRACE_ARGS 128 | ||
40 | #define MAX_ARGSTR_LEN 63 | ||
41 | #define MAX_EVENT_NAME_LEN 64 | ||
42 | #define MAX_STRING_SIZE PATH_MAX | ||
43 | #define KPROBE_EVENT_SYSTEM "kprobes" | ||
44 | |||
45 | /* Reserved field names */ | ||
46 | #define FIELD_STRING_IP "__probe_ip" | ||
47 | #define FIELD_STRING_RETIP "__probe_ret_ip" | ||
48 | #define FIELD_STRING_FUNC "__probe_func" | ||
49 | |||
50 | const char *reserved_field_names[] = { | ||
51 | "common_type", | ||
52 | "common_flags", | ||
53 | "common_preempt_count", | ||
54 | "common_pid", | ||
55 | "common_tgid", | ||
56 | "common_lock_depth", | ||
57 | FIELD_STRING_IP, | ||
58 | FIELD_STRING_RETIP, | ||
59 | FIELD_STRING_FUNC, | ||
60 | }; | ||
61 | |||
62 | /* Printing function type */ | ||
63 | typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, | ||
64 | void *); | ||
65 | #define PRINT_TYPE_FUNC_NAME(type) print_type_##type | ||
66 | #define PRINT_TYPE_FMT_NAME(type) print_type_format_##type | ||
67 | |||
68 | /* Printing in basic type function template */ | ||
69 | #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \ | ||
70 | static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ | ||
71 | const char *name, \ | ||
72 | void *data, void *ent)\ | ||
73 | { \ | ||
74 | return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ | ||
75 | } \ | ||
76 | static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; | ||
77 | |||
78 | DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) | ||
79 | DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) | ||
80 | DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) | ||
81 | DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) | ||
82 | DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) | ||
83 | DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) | ||
84 | DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) | ||
85 | DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) | ||
86 | |||
87 | /* data_rloc: data relative location, compatible with u32 */ | ||
88 | #define make_data_rloc(len, roffs) \ | ||
89 | (((u32)(len) << 16) | ((u32)(roffs) & 0xffff)) | ||
90 | #define get_rloc_len(dl) ((u32)(dl) >> 16) | ||
91 | #define get_rloc_offs(dl) ((u32)(dl) & 0xffff) | ||
92 | |||
93 | static inline void *get_rloc_data(u32 *dl) | ||
94 | { | ||
95 | return (u8 *)dl + get_rloc_offs(*dl); | ||
96 | } | ||
97 | |||
98 | /* For data_loc conversion */ | ||
99 | static inline void *get_loc_data(u32 *dl, void *ent) | ||
100 | { | ||
101 | return (u8 *)ent + get_rloc_offs(*dl); | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Convert data_rloc to data_loc: | ||
106 | * data_rloc stores the offset from data_rloc itself, but data_loc | ||
107 | * stores the offset from event entry. | ||
108 | */ | ||
109 | #define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) | ||
110 | |||
111 | /* For defining macros, define string/string_size types */ | ||
112 | typedef u32 string; | ||
113 | typedef u32 string_size; | ||
114 | |||
115 | /* Print type function for string type */ | ||
116 | static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, | ||
117 | const char *name, | ||
118 | void *data, void *ent) | ||
119 | { | ||
120 | int len = *(u32 *)data >> 16; | ||
121 | |||
122 | if (!len) | ||
123 | return trace_seq_printf(s, " %s=(fault)", name); | ||
124 | else | ||
125 | return trace_seq_printf(s, " %s=\"%s\"", name, | ||
126 | (const char *)get_loc_data(data, ent)); | ||
127 | } | ||
128 | static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; | ||
129 | |||
130 | /* Data fetch function type */ | ||
131 | typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); | ||
132 | |||
133 | struct fetch_param { | ||
134 | fetch_func_t fn; | ||
135 | void *data; | ||
136 | }; | ||
137 | |||
138 | static __kprobes void call_fetch(struct fetch_param *fprm, | ||
139 | struct pt_regs *regs, void *dest) | ||
140 | { | ||
141 | return fprm->fn(regs, fprm->data, dest); | ||
142 | } | ||
143 | |||
144 | #define FETCH_FUNC_NAME(method, type) fetch_##method##_##type | ||
145 | /* | ||
146 | * Define macro for basic types - we don't need to define s* types, because | ||
147 | * we have to care only about bitwidth at recording time. | ||
148 | */ | ||
149 | #define DEFINE_BASIC_FETCH_FUNCS(method) \ | ||
150 | DEFINE_FETCH_##method(u8) \ | ||
151 | DEFINE_FETCH_##method(u16) \ | ||
152 | DEFINE_FETCH_##method(u32) \ | ||
153 | DEFINE_FETCH_##method(u64) | ||
154 | |||
155 | #define CHECK_FETCH_FUNCS(method, fn) \ | ||
156 | (((FETCH_FUNC_NAME(method, u8) == fn) || \ | ||
157 | (FETCH_FUNC_NAME(method, u16) == fn) || \ | ||
158 | (FETCH_FUNC_NAME(method, u32) == fn) || \ | ||
159 | (FETCH_FUNC_NAME(method, u64) == fn) || \ | ||
160 | (FETCH_FUNC_NAME(method, string) == fn) || \ | ||
161 | (FETCH_FUNC_NAME(method, string_size) == fn)) \ | ||
162 | && (fn != NULL)) | ||
163 | |||
164 | /* Data fetch function templates */ | ||
165 | #define DEFINE_FETCH_reg(type) \ | ||
166 | static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ | ||
167 | void *offset, void *dest) \ | ||
168 | { \ | ||
169 | *(type *)dest = (type)regs_get_register(regs, \ | ||
170 | (unsigned int)((unsigned long)offset)); \ | ||
171 | } | ||
172 | DEFINE_BASIC_FETCH_FUNCS(reg) | ||
173 | /* No string on the register */ | ||
174 | #define fetch_reg_string NULL | ||
175 | #define fetch_reg_string_size NULL | ||
176 | |||
177 | #define DEFINE_FETCH_stack(type) \ | ||
178 | static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ | ||
179 | void *offset, void *dest) \ | ||
180 | { \ | ||
181 | *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ | ||
182 | (unsigned int)((unsigned long)offset)); \ | ||
183 | } | ||
184 | DEFINE_BASIC_FETCH_FUNCS(stack) | ||
185 | /* No string on the stack entry */ | ||
186 | #define fetch_stack_string NULL | ||
187 | #define fetch_stack_string_size NULL | ||
188 | |||
189 | #define DEFINE_FETCH_retval(type) \ | ||
190 | static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ | ||
191 | void *dummy, void *dest) \ | ||
192 | { \ | ||
193 | *(type *)dest = (type)regs_return_value(regs); \ | ||
194 | } | ||
195 | DEFINE_BASIC_FETCH_FUNCS(retval) | ||
196 | /* No string on the retval */ | ||
197 | #define fetch_retval_string NULL | ||
198 | #define fetch_retval_string_size NULL | ||
199 | |||
200 | #define DEFINE_FETCH_memory(type) \ | ||
201 | static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ | ||
202 | void *addr, void *dest) \ | ||
203 | { \ | ||
204 | type retval; \ | ||
205 | if (probe_kernel_address(addr, retval)) \ | ||
206 | *(type *)dest = 0; \ | ||
207 | else \ | ||
208 | *(type *)dest = retval; \ | ||
209 | } | ||
210 | DEFINE_BASIC_FETCH_FUNCS(memory) | ||
211 | /* | ||
212 | * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max | ||
213 | * length and relative data location. | ||
214 | */ | ||
215 | static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, | ||
216 | void *addr, void *dest) | ||
217 | { | ||
218 | long ret; | ||
219 | int maxlen = get_rloc_len(*(u32 *)dest); | ||
220 | u8 *dst = get_rloc_data(dest); | ||
221 | u8 *src = addr; | ||
222 | mm_segment_t old_fs = get_fs(); | ||
223 | if (!maxlen) | ||
224 | return; | ||
225 | /* | ||
226 | * Try to get string again, since the string can be changed while | ||
227 | * probing. | ||
228 | */ | ||
229 | set_fs(KERNEL_DS); | ||
230 | pagefault_disable(); | ||
231 | do | ||
232 | ret = __copy_from_user_inatomic(dst++, src++, 1); | ||
233 | while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); | ||
234 | dst[-1] = '\0'; | ||
235 | pagefault_enable(); | ||
236 | set_fs(old_fs); | ||
237 | |||
238 | if (ret < 0) { /* Failed to fetch string */ | ||
239 | ((u8 *)get_rloc_data(dest))[0] = '\0'; | ||
240 | *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); | ||
241 | } else | ||
242 | *(u32 *)dest = make_data_rloc(src - (u8 *)addr, | ||
243 | get_rloc_offs(*(u32 *)dest)); | ||
244 | } | ||
245 | /* Return the length of string -- including null terminal byte */ | ||
246 | static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, | ||
247 | void *addr, void *dest) | ||
248 | { | ||
249 | int ret, len = 0; | ||
250 | u8 c; | ||
251 | mm_segment_t old_fs = get_fs(); | ||
252 | |||
253 | set_fs(KERNEL_DS); | ||
254 | pagefault_disable(); | ||
255 | do { | ||
256 | ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); | ||
257 | len++; | ||
258 | } while (c && ret == 0 && len < MAX_STRING_SIZE); | ||
259 | pagefault_enable(); | ||
260 | set_fs(old_fs); | ||
261 | |||
262 | if (ret < 0) /* Failed to check the length */ | ||
263 | *(u32 *)dest = 0; | ||
264 | else | ||
265 | *(u32 *)dest = len; | ||
266 | } | ||
267 | |||
268 | /* Memory fetching by symbol */ | ||
269 | struct symbol_cache { | ||
270 | char *symbol; | ||
271 | long offset; | ||
272 | unsigned long addr; | ||
273 | }; | ||
274 | |||
275 | static unsigned long update_symbol_cache(struct symbol_cache *sc) | ||
276 | { | ||
277 | sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); | ||
278 | if (sc->addr) | ||
279 | sc->addr += sc->offset; | ||
280 | return sc->addr; | ||
281 | } | ||
282 | |||
283 | static void free_symbol_cache(struct symbol_cache *sc) | ||
284 | { | ||
285 | kfree(sc->symbol); | ||
286 | kfree(sc); | ||
287 | } | ||
288 | |||
289 | static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) | ||
290 | { | ||
291 | struct symbol_cache *sc; | ||
292 | |||
293 | if (!sym || strlen(sym) == 0) | ||
294 | return NULL; | ||
295 | sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); | ||
296 | if (!sc) | ||
297 | return NULL; | ||
298 | |||
299 | sc->symbol = kstrdup(sym, GFP_KERNEL); | ||
300 | if (!sc->symbol) { | ||
301 | kfree(sc); | ||
302 | return NULL; | ||
303 | } | ||
304 | sc->offset = offset; | ||
305 | |||
306 | update_symbol_cache(sc); | ||
307 | return sc; | ||
308 | } | ||
309 | |||
310 | #define DEFINE_FETCH_symbol(type) \ | ||
311 | static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ | ||
312 | void *data, void *dest) \ | ||
313 | { \ | ||
314 | struct symbol_cache *sc = data; \ | ||
315 | if (sc->addr) \ | ||
316 | fetch_memory_##type(regs, (void *)sc->addr, dest); \ | ||
317 | else \ | ||
318 | *(type *)dest = 0; \ | ||
319 | } | ||
320 | DEFINE_BASIC_FETCH_FUNCS(symbol) | ||
321 | DEFINE_FETCH_symbol(string) | ||
322 | DEFINE_FETCH_symbol(string_size) | ||
323 | |||
324 | /* Dereference memory access function */ | ||
325 | struct deref_fetch_param { | ||
326 | struct fetch_param orig; | ||
327 | long offset; | ||
328 | }; | ||
329 | |||
330 | #define DEFINE_FETCH_deref(type) \ | ||
331 | static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ | ||
332 | void *data, void *dest) \ | ||
333 | { \ | ||
334 | struct deref_fetch_param *dprm = data; \ | ||
335 | unsigned long addr; \ | ||
336 | call_fetch(&dprm->orig, regs, &addr); \ | ||
337 | if (addr) { \ | ||
338 | addr += dprm->offset; \ | ||
339 | fetch_memory_##type(regs, (void *)addr, dest); \ | ||
340 | } else \ | ||
341 | *(type *)dest = 0; \ | ||
342 | } | ||
343 | DEFINE_BASIC_FETCH_FUNCS(deref) | ||
344 | DEFINE_FETCH_deref(string) | ||
345 | DEFINE_FETCH_deref(string_size) | ||
346 | |||
347 | static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) | ||
348 | { | ||
349 | if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) | ||
350 | free_deref_fetch_param(data->orig.data); | ||
351 | else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) | ||
352 | free_symbol_cache(data->orig.data); | ||
353 | kfree(data); | ||
354 | } | ||
355 | |||
356 | /* Default (unsigned long) fetch type */ | ||
357 | #define __DEFAULT_FETCH_TYPE(t) u##t | ||
358 | #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) | ||
359 | #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) | ||
360 | #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) | ||
361 | |||
362 | /* Fetch types */ | ||
363 | enum { | ||
364 | FETCH_MTD_reg = 0, | ||
365 | FETCH_MTD_stack, | ||
366 | FETCH_MTD_retval, | ||
367 | FETCH_MTD_memory, | ||
368 | FETCH_MTD_symbol, | ||
369 | FETCH_MTD_deref, | ||
370 | FETCH_MTD_END, | ||
371 | }; | ||
372 | |||
373 | #define ASSIGN_FETCH_FUNC(method, type) \ | ||
374 | [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) | ||
375 | |||
376 | #define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \ | ||
377 | {.name = _name, \ | ||
378 | .size = _size, \ | ||
379 | .is_signed = sign, \ | ||
380 | .print = PRINT_TYPE_FUNC_NAME(ptype), \ | ||
381 | .fmt = PRINT_TYPE_FMT_NAME(ptype), \ | ||
382 | .fmttype = _fmttype, \ | ||
383 | .fetch = { \ | ||
384 | ASSIGN_FETCH_FUNC(reg, ftype), \ | ||
385 | ASSIGN_FETCH_FUNC(stack, ftype), \ | ||
386 | ASSIGN_FETCH_FUNC(retval, ftype), \ | ||
387 | ASSIGN_FETCH_FUNC(memory, ftype), \ | ||
388 | ASSIGN_FETCH_FUNC(symbol, ftype), \ | ||
389 | ASSIGN_FETCH_FUNC(deref, ftype), \ | ||
390 | } \ | ||
391 | } | ||
392 | |||
393 | #define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \ | ||
394 | __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) | ||
395 | |||
396 | #define FETCH_TYPE_STRING 0 | ||
397 | #define FETCH_TYPE_STRSIZE 1 | ||
398 | |||
399 | /* Fetch type information table */ | ||
400 | static const struct fetch_type { | ||
401 | const char *name; /* Name of type */ | ||
402 | size_t size; /* Byte size of type */ | ||
403 | int is_signed; /* Signed flag */ | ||
404 | print_type_func_t print; /* Print functions */ | ||
405 | const char *fmt; /* Fromat string */ | ||
406 | const char *fmttype; /* Name in format file */ | ||
407 | /* Fetch functions */ | ||
408 | fetch_func_t fetch[FETCH_MTD_END]; | ||
409 | } fetch_type_table[] = { | ||
410 | /* Special types */ | ||
411 | [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, | ||
412 | sizeof(u32), 1, "__data_loc char[]"), | ||
413 | [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, | ||
414 | string_size, sizeof(u32), 0, "u32"), | ||
415 | /* Basic types */ | ||
416 | ASSIGN_FETCH_TYPE(u8, u8, 0), | ||
417 | ASSIGN_FETCH_TYPE(u16, u16, 0), | ||
418 | ASSIGN_FETCH_TYPE(u32, u32, 0), | ||
419 | ASSIGN_FETCH_TYPE(u64, u64, 0), | ||
420 | ASSIGN_FETCH_TYPE(s8, u8, 1), | ||
421 | ASSIGN_FETCH_TYPE(s16, u16, 1), | ||
422 | ASSIGN_FETCH_TYPE(s32, u32, 1), | ||
423 | ASSIGN_FETCH_TYPE(s64, u64, 1), | ||
424 | }; | ||
425 | |||
426 | static const struct fetch_type *find_fetch_type(const char *type) | ||
427 | { | ||
428 | int i; | ||
429 | |||
430 | if (!type) | ||
431 | type = DEFAULT_FETCH_TYPE_STR; | ||
432 | |||
433 | for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) | ||
434 | if (strcmp(type, fetch_type_table[i].name) == 0) | ||
435 | return &fetch_type_table[i]; | ||
436 | return NULL; | ||
437 | } | ||
438 | |||
439 | /* Special function : only accept unsigned long */ | ||
440 | static __kprobes void fetch_stack_address(struct pt_regs *regs, | ||
441 | void *dummy, void *dest) | ||
442 | { | ||
443 | *(unsigned long *)dest = kernel_stack_pointer(regs); | ||
444 | } | ||
445 | |||
446 | static fetch_func_t get_fetch_size_function(const struct fetch_type *type, | ||
447 | fetch_func_t orig_fn) | ||
448 | { | ||
449 | int i; | ||
450 | |||
451 | if (type != &fetch_type_table[FETCH_TYPE_STRING]) | ||
452 | return NULL; /* Only string type needs size function */ | ||
453 | for (i = 0; i < FETCH_MTD_END; i++) | ||
454 | if (type->fetch[i] == orig_fn) | ||
455 | return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; | ||
456 | |||
457 | WARN_ON(1); /* This should not happen */ | ||
458 | return NULL; | ||
459 | } | ||
460 | |||
461 | /** | ||
462 | * Kprobe event core functions | ||
463 | */ | ||
464 | |||
465 | struct probe_arg { | ||
466 | struct fetch_param fetch; | ||
467 | struct fetch_param fetch_size; | ||
468 | unsigned int offset; /* Offset from argument entry */ | ||
469 | const char *name; /* Name of this argument */ | ||
470 | const char *comm; /* Command of this argument */ | ||
471 | const struct fetch_type *type; /* Type of this argument */ | ||
472 | }; | ||
473 | |||
474 | /* Flags for trace_probe */ | ||
475 | #define TP_FLAG_TRACE 1 | ||
476 | #define TP_FLAG_PROFILE 2 | ||
477 | |||
478 | struct trace_probe { | ||
479 | struct list_head list; | ||
480 | struct kretprobe rp; /* Use rp.kp for kprobe use */ | ||
481 | unsigned long nhit; | ||
482 | unsigned int flags; /* For TP_FLAG_* */ | ||
483 | const char *symbol; /* symbol name */ | ||
484 | struct ftrace_event_class class; | ||
485 | struct ftrace_event_call call; | ||
486 | ssize_t size; /* trace entry size */ | ||
487 | unsigned int nr_args; | ||
488 | struct probe_arg args[]; | ||
489 | }; | ||
490 | |||
491 | #define SIZEOF_TRACE_PROBE(n) \ | ||
492 | (offsetof(struct trace_probe, args) + \ | ||
493 | (sizeof(struct probe_arg) * (n))) | ||
494 | |||
495 | |||
496 | static __kprobes int probe_is_return(struct trace_probe *tp) | ||
497 | { | ||
498 | return tp->rp.handler != NULL; | ||
499 | } | ||
500 | |||
501 | static __kprobes const char *probe_symbol(struct trace_probe *tp) | ||
502 | { | ||
503 | return tp->symbol ? tp->symbol : "unknown"; | ||
504 | } | ||
505 | |||
506 | static int register_probe_event(struct trace_probe *tp); | ||
507 | static void unregister_probe_event(struct trace_probe *tp); | ||
508 | |||
509 | static DEFINE_MUTEX(probe_lock); | ||
510 | static LIST_HEAD(probe_list); | ||
511 | |||
512 | static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); | ||
513 | static int kretprobe_dispatcher(struct kretprobe_instance *ri, | ||
514 | struct pt_regs *regs); | ||
515 | |||
516 | /* Check the name is good for event/group/fields */ | ||
517 | static int is_good_name(const char *name) | ||
518 | { | ||
519 | if (!isalpha(*name) && *name != '_') | ||
520 | return 0; | ||
521 | while (*++name != '\0') { | ||
522 | if (!isalpha(*name) && !isdigit(*name) && *name != '_') | ||
523 | return 0; | ||
524 | } | ||
525 | return 1; | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Allocate new trace_probe and initialize it (including kprobes). | ||
530 | */ | ||
531 | static struct trace_probe *alloc_trace_probe(const char *group, | ||
532 | const char *event, | ||
533 | void *addr, | ||
534 | const char *symbol, | ||
535 | unsigned long offs, | ||
536 | int nargs, int is_return) | ||
537 | { | ||
538 | struct trace_probe *tp; | ||
539 | int ret = -ENOMEM; | ||
540 | |||
541 | tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); | ||
542 | if (!tp) | ||
543 | return ERR_PTR(ret); | ||
544 | |||
545 | if (symbol) { | ||
546 | tp->symbol = kstrdup(symbol, GFP_KERNEL); | ||
547 | if (!tp->symbol) | ||
548 | goto error; | ||
549 | tp->rp.kp.symbol_name = tp->symbol; | ||
550 | tp->rp.kp.offset = offs; | ||
551 | } else | ||
552 | tp->rp.kp.addr = addr; | ||
553 | |||
554 | if (is_return) | ||
555 | tp->rp.handler = kretprobe_dispatcher; | ||
556 | else | ||
557 | tp->rp.kp.pre_handler = kprobe_dispatcher; | ||
558 | |||
559 | if (!event || !is_good_name(event)) { | ||
560 | ret = -EINVAL; | ||
561 | goto error; | ||
562 | } | ||
563 | |||
564 | tp->call.class = &tp->class; | ||
565 | tp->call.name = kstrdup(event, GFP_KERNEL); | ||
566 | if (!tp->call.name) | ||
567 | goto error; | ||
568 | |||
569 | if (!group || !is_good_name(group)) { | ||
570 | ret = -EINVAL; | ||
571 | goto error; | ||
572 | } | ||
573 | |||
574 | tp->class.system = kstrdup(group, GFP_KERNEL); | ||
575 | if (!tp->class.system) | ||
576 | goto error; | ||
577 | |||
578 | INIT_LIST_HEAD(&tp->list); | ||
579 | return tp; | ||
580 | error: | ||
581 | kfree(tp->call.name); | ||
582 | kfree(tp->symbol); | ||
583 | kfree(tp); | ||
584 | return ERR_PTR(ret); | ||
585 | } | ||
586 | |||
587 | static void free_probe_arg(struct probe_arg *arg) | ||
588 | { | ||
589 | if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) | ||
590 | free_deref_fetch_param(arg->fetch.data); | ||
591 | else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) | ||
592 | free_symbol_cache(arg->fetch.data); | ||
593 | kfree(arg->name); | ||
594 | kfree(arg->comm); | ||
595 | } | ||
596 | |||
597 | static void free_trace_probe(struct trace_probe *tp) | ||
598 | { | ||
599 | int i; | ||
600 | |||
601 | for (i = 0; i < tp->nr_args; i++) | ||
602 | free_probe_arg(&tp->args[i]); | ||
603 | |||
604 | kfree(tp->call.class->system); | ||
605 | kfree(tp->call.name); | ||
606 | kfree(tp->symbol); | ||
607 | kfree(tp); | ||
608 | } | ||
609 | |||
610 | static struct trace_probe *find_probe_event(const char *event, | ||
611 | const char *group) | ||
612 | { | ||
613 | struct trace_probe *tp; | ||
614 | |||
615 | list_for_each_entry(tp, &probe_list, list) | ||
616 | if (strcmp(tp->call.name, event) == 0 && | ||
617 | strcmp(tp->call.class->system, group) == 0) | ||
618 | return tp; | ||
619 | return NULL; | ||
620 | } | ||
621 | |||
622 | /* Unregister a trace_probe and probe_event: call with locking probe_lock */ | ||
623 | static void unregister_trace_probe(struct trace_probe *tp) | ||
624 | { | ||
625 | if (probe_is_return(tp)) | ||
626 | unregister_kretprobe(&tp->rp); | ||
627 | else | ||
628 | unregister_kprobe(&tp->rp.kp); | ||
629 | list_del(&tp->list); | ||
630 | unregister_probe_event(tp); | ||
631 | } | ||
632 | |||
633 | /* Register a trace_probe and probe_event */ | ||
634 | static int register_trace_probe(struct trace_probe *tp) | ||
635 | { | ||
636 | struct trace_probe *old_tp; | ||
637 | int ret; | ||
638 | |||
639 | mutex_lock(&probe_lock); | ||
640 | |||
641 | /* register as an event */ | ||
642 | old_tp = find_probe_event(tp->call.name, tp->call.class->system); | ||
643 | if (old_tp) { | ||
644 | /* delete old event */ | ||
645 | unregister_trace_probe(old_tp); | ||
646 | free_trace_probe(old_tp); | ||
647 | } | ||
648 | ret = register_probe_event(tp); | ||
649 | if (ret) { | ||
650 | pr_warning("Failed to register probe event(%d)\n", ret); | ||
651 | goto end; | ||
652 | } | ||
653 | |||
654 | tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; | ||
655 | if (probe_is_return(tp)) | ||
656 | ret = register_kretprobe(&tp->rp); | ||
657 | else | ||
658 | ret = register_kprobe(&tp->rp.kp); | ||
659 | |||
660 | if (ret) { | ||
661 | pr_warning("Could not insert probe(%d)\n", ret); | ||
662 | if (ret == -EILSEQ) { | ||
663 | pr_warning("Probing address(0x%p) is not an " | ||
664 | "instruction boundary.\n", | ||
665 | tp->rp.kp.addr); | ||
666 | ret = -EINVAL; | ||
667 | } | ||
668 | unregister_probe_event(tp); | ||
669 | } else | ||
670 | list_add_tail(&tp->list, &probe_list); | ||
671 | end: | ||
672 | mutex_unlock(&probe_lock); | ||
673 | return ret; | ||
674 | } | ||
675 | |||
676 | /* Split symbol and offset. */ | ||
677 | static int split_symbol_offset(char *symbol, unsigned long *offset) | ||
678 | { | ||
679 | char *tmp; | ||
680 | int ret; | ||
681 | |||
682 | if (!offset) | ||
683 | return -EINVAL; | ||
684 | |||
685 | tmp = strchr(symbol, '+'); | ||
686 | if (tmp) { | ||
687 | /* skip sign because strict_strtol doesn't accept '+' */ | ||
688 | ret = strict_strtoul(tmp + 1, 0, offset); | ||
689 | if (ret) | ||
690 | return ret; | ||
691 | *tmp = '\0'; | ||
692 | } else | ||
693 | *offset = 0; | ||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | #define PARAM_MAX_ARGS 16 | ||
698 | #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) | ||
699 | |||
700 | static int parse_probe_vars(char *arg, const struct fetch_type *t, | ||
701 | struct fetch_param *f, int is_return) | ||
702 | { | ||
703 | int ret = 0; | ||
704 | unsigned long param; | ||
705 | |||
706 | if (strcmp(arg, "retval") == 0) { | ||
707 | if (is_return) | ||
708 | f->fn = t->fetch[FETCH_MTD_retval]; | ||
709 | else | ||
710 | ret = -EINVAL; | ||
711 | } else if (strncmp(arg, "stack", 5) == 0) { | ||
712 | if (arg[5] == '\0') { | ||
713 | if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) | ||
714 | f->fn = fetch_stack_address; | ||
715 | else | ||
716 | ret = -EINVAL; | ||
717 | } else if (isdigit(arg[5])) { | ||
718 | ret = strict_strtoul(arg + 5, 10, ¶m); | ||
719 | if (ret || param > PARAM_MAX_STACK) | ||
720 | ret = -EINVAL; | ||
721 | else { | ||
722 | f->fn = t->fetch[FETCH_MTD_stack]; | ||
723 | f->data = (void *)param; | ||
724 | } | ||
725 | } else | ||
726 | ret = -EINVAL; | ||
727 | } else | ||
728 | ret = -EINVAL; | ||
729 | return ret; | ||
730 | } | ||
731 | |||
732 | /* Recursive argument parser */ | ||
733 | static int __parse_probe_arg(char *arg, const struct fetch_type *t, | ||
734 | struct fetch_param *f, int is_return) | ||
735 | { | ||
736 | int ret = 0; | ||
737 | unsigned long param; | ||
738 | long offset; | ||
739 | char *tmp; | ||
740 | |||
741 | switch (arg[0]) { | ||
742 | case '$': | ||
743 | ret = parse_probe_vars(arg + 1, t, f, is_return); | ||
744 | break; | ||
745 | case '%': /* named register */ | ||
746 | ret = regs_query_register_offset(arg + 1); | ||
747 | if (ret >= 0) { | ||
748 | f->fn = t->fetch[FETCH_MTD_reg]; | ||
749 | f->data = (void *)(unsigned long)ret; | ||
750 | ret = 0; | ||
751 | } | ||
752 | break; | ||
753 | case '@': /* memory or symbol */ | ||
754 | if (isdigit(arg[1])) { | ||
755 | ret = strict_strtoul(arg + 1, 0, ¶m); | ||
756 | if (ret) | ||
757 | break; | ||
758 | f->fn = t->fetch[FETCH_MTD_memory]; | ||
759 | f->data = (void *)param; | ||
760 | } else { | ||
761 | ret = split_symbol_offset(arg + 1, &offset); | ||
762 | if (ret) | ||
763 | break; | ||
764 | f->data = alloc_symbol_cache(arg + 1, offset); | ||
765 | if (f->data) | ||
766 | f->fn = t->fetch[FETCH_MTD_symbol]; | ||
767 | } | ||
768 | break; | ||
769 | case '+': /* deref memory */ | ||
770 | case '-': | ||
771 | tmp = strchr(arg, '('); | ||
772 | if (!tmp) | ||
773 | break; | ||
774 | *tmp = '\0'; | ||
775 | ret = strict_strtol(arg + 1, 0, &offset); | ||
776 | if (ret) | ||
777 | break; | ||
778 | if (arg[0] == '-') | ||
779 | offset = -offset; | ||
780 | arg = tmp + 1; | ||
781 | tmp = strrchr(arg, ')'); | ||
782 | if (tmp) { | ||
783 | struct deref_fetch_param *dprm; | ||
784 | const struct fetch_type *t2 = find_fetch_type(NULL); | ||
785 | *tmp = '\0'; | ||
786 | dprm = kzalloc(sizeof(struct deref_fetch_param), | ||
787 | GFP_KERNEL); | ||
788 | if (!dprm) | ||
789 | return -ENOMEM; | ||
790 | dprm->offset = offset; | ||
791 | ret = __parse_probe_arg(arg, t2, &dprm->orig, | ||
792 | is_return); | ||
793 | if (ret) | ||
794 | kfree(dprm); | ||
795 | else { | ||
796 | f->fn = t->fetch[FETCH_MTD_deref]; | ||
797 | f->data = (void *)dprm; | ||
798 | } | ||
799 | } | ||
800 | break; | ||
801 | } | ||
802 | if (!ret && !f->fn) { /* Parsed, but do not find fetch method */ | ||
803 | pr_info("%s type has no corresponding fetch method.\n", | ||
804 | t->name); | ||
805 | ret = -EINVAL; | ||
806 | } | ||
807 | return ret; | ||
808 | } | ||
809 | |||
810 | /* String length checking wrapper */ | ||
811 | static int parse_probe_arg(char *arg, struct trace_probe *tp, | ||
812 | struct probe_arg *parg, int is_return) | ||
813 | { | ||
814 | const char *t; | ||
815 | int ret; | ||
816 | |||
817 | if (strlen(arg) > MAX_ARGSTR_LEN) { | ||
818 | pr_info("Argument is too long.: %s\n", arg); | ||
819 | return -ENOSPC; | ||
820 | } | ||
821 | parg->comm = kstrdup(arg, GFP_KERNEL); | ||
822 | if (!parg->comm) { | ||
823 | pr_info("Failed to allocate memory for command '%s'.\n", arg); | ||
824 | return -ENOMEM; | ||
825 | } | ||
826 | t = strchr(parg->comm, ':'); | ||
827 | if (t) { | ||
828 | arg[t - parg->comm] = '\0'; | ||
829 | t++; | ||
830 | } | ||
831 | parg->type = find_fetch_type(t); | ||
832 | if (!parg->type) { | ||
833 | pr_info("Unsupported type: %s\n", t); | ||
834 | return -EINVAL; | ||
835 | } | ||
836 | parg->offset = tp->size; | ||
837 | tp->size += parg->type->size; | ||
838 | ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); | ||
839 | if (ret >= 0) { | ||
840 | parg->fetch_size.fn = get_fetch_size_function(parg->type, | ||
841 | parg->fetch.fn); | ||
842 | parg->fetch_size.data = parg->fetch.data; | ||
843 | } | ||
844 | return ret; | ||
845 | } | ||
846 | |||
847 | /* Return 1 if name is reserved or already used by another argument */ | ||
848 | static int conflict_field_name(const char *name, | ||
849 | struct probe_arg *args, int narg) | ||
850 | { | ||
851 | int i; | ||
852 | for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) | ||
853 | if (strcmp(reserved_field_names[i], name) == 0) | ||
854 | return 1; | ||
855 | for (i = 0; i < narg; i++) | ||
856 | if (strcmp(args[i].name, name) == 0) | ||
857 | return 1; | ||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | static int create_trace_probe(int argc, char **argv) | ||
862 | { | ||
863 | /* | ||
864 | * Argument syntax: | ||
865 | * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] | ||
866 | * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] | ||
867 | * Fetch args: | ||
868 | * $retval : fetch return value | ||
869 | * $stack : fetch stack address | ||
870 | * $stackN : fetch Nth of stack (N:0-) | ||
871 | * @ADDR : fetch memory at ADDR (ADDR should be in kernel) | ||
872 | * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) | ||
873 | * %REG : fetch register REG | ||
874 | * Dereferencing memory fetch: | ||
875 | * +|-offs(ARG) : fetch memory at ARG +|- offs address. | ||
876 | * Alias name of args: | ||
877 | * NAME=FETCHARG : set NAME as alias of FETCHARG. | ||
878 | * Type of args: | ||
879 | * FETCHARG:TYPE : use TYPE instead of unsigned long. | ||
880 | */ | ||
881 | struct trace_probe *tp; | ||
882 | int i, ret = 0; | ||
883 | int is_return = 0, is_delete = 0; | ||
884 | char *symbol = NULL, *event = NULL, *group = NULL; | ||
885 | char *arg; | ||
886 | unsigned long offset = 0; | ||
887 | void *addr = NULL; | ||
888 | char buf[MAX_EVENT_NAME_LEN]; | ||
889 | |||
890 | /* argc must be >= 1 */ | ||
891 | if (argv[0][0] == 'p') | ||
892 | is_return = 0; | ||
893 | else if (argv[0][0] == 'r') | ||
894 | is_return = 1; | ||
895 | else if (argv[0][0] == '-') | ||
896 | is_delete = 1; | ||
897 | else { | ||
898 | pr_info("Probe definition must be started with 'p', 'r' or" | ||
899 | " '-'.\n"); | ||
900 | return -EINVAL; | ||
901 | } | ||
902 | |||
903 | if (argv[0][1] == ':') { | ||
904 | event = &argv[0][2]; | ||
905 | if (strchr(event, '/')) { | ||
906 | group = event; | ||
907 | event = strchr(group, '/') + 1; | ||
908 | event[-1] = '\0'; | ||
909 | if (strlen(group) == 0) { | ||
910 | pr_info("Group name is not specified\n"); | ||
911 | return -EINVAL; | ||
912 | } | ||
913 | } | ||
914 | if (strlen(event) == 0) { | ||
915 | pr_info("Event name is not specified\n"); | ||
916 | return -EINVAL; | ||
917 | } | ||
918 | } | ||
919 | if (!group) | ||
920 | group = KPROBE_EVENT_SYSTEM; | ||
921 | |||
922 | if (is_delete) { | ||
923 | if (!event) { | ||
924 | pr_info("Delete command needs an event name.\n"); | ||
925 | return -EINVAL; | ||
926 | } | ||
927 | mutex_lock(&probe_lock); | ||
928 | tp = find_probe_event(event, group); | ||
929 | if (!tp) { | ||
930 | mutex_unlock(&probe_lock); | ||
931 | pr_info("Event %s/%s doesn't exist.\n", group, event); | ||
932 | return -ENOENT; | ||
933 | } | ||
934 | /* delete an event */ | ||
935 | unregister_trace_probe(tp); | ||
936 | free_trace_probe(tp); | ||
937 | mutex_unlock(&probe_lock); | ||
938 | return 0; | ||
939 | } | ||
940 | |||
941 | if (argc < 2) { | ||
942 | pr_info("Probe point is not specified.\n"); | ||
943 | return -EINVAL; | ||
944 | } | ||
945 | if (isdigit(argv[1][0])) { | ||
946 | if (is_return) { | ||
947 | pr_info("Return probe point must be a symbol.\n"); | ||
948 | return -EINVAL; | ||
949 | } | ||
950 | /* an address specified */ | ||
951 | ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); | ||
952 | if (ret) { | ||
953 | pr_info("Failed to parse address.\n"); | ||
954 | return ret; | ||
955 | } | ||
956 | } else { | ||
957 | /* a symbol specified */ | ||
958 | symbol = argv[1]; | ||
959 | /* TODO: support .init module functions */ | ||
960 | ret = split_symbol_offset(symbol, &offset); | ||
961 | if (ret) { | ||
962 | pr_info("Failed to parse symbol.\n"); | ||
963 | return ret; | ||
964 | } | ||
965 | if (offset && is_return) { | ||
966 | pr_info("Return probe must be used without offset.\n"); | ||
967 | return -EINVAL; | ||
968 | } | ||
969 | } | ||
970 | argc -= 2; argv += 2; | ||
971 | |||
972 | /* setup a probe */ | ||
973 | if (!event) { | ||
974 | /* Make a new event name */ | ||
975 | if (symbol) | ||
976 | snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", | ||
977 | is_return ? 'r' : 'p', symbol, offset); | ||
978 | else | ||
979 | snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", | ||
980 | is_return ? 'r' : 'p', addr); | ||
981 | event = buf; | ||
982 | } | ||
983 | tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, | ||
984 | is_return); | ||
985 | if (IS_ERR(tp)) { | ||
986 | pr_info("Failed to allocate trace_probe.(%d)\n", | ||
987 | (int)PTR_ERR(tp)); | ||
988 | return PTR_ERR(tp); | ||
989 | } | ||
990 | |||
991 | /* parse arguments */ | ||
992 | ret = 0; | ||
993 | for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { | ||
994 | /* Increment count for freeing args in error case */ | ||
995 | tp->nr_args++; | ||
996 | |||
997 | /* Parse argument name */ | ||
998 | arg = strchr(argv[i], '='); | ||
999 | if (arg) { | ||
1000 | *arg++ = '\0'; | ||
1001 | tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); | ||
1002 | } else { | ||
1003 | arg = argv[i]; | ||
1004 | /* If argument name is omitted, set "argN" */ | ||
1005 | snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); | ||
1006 | tp->args[i].name = kstrdup(buf, GFP_KERNEL); | ||
1007 | } | ||
1008 | |||
1009 | if (!tp->args[i].name) { | ||
1010 | pr_info("Failed to allocate argument[%d] name.\n", i); | ||
1011 | ret = -ENOMEM; | ||
1012 | goto error; | ||
1013 | } | ||
1014 | |||
1015 | if (!is_good_name(tp->args[i].name)) { | ||
1016 | pr_info("Invalid argument[%d] name: %s\n", | ||
1017 | i, tp->args[i].name); | ||
1018 | ret = -EINVAL; | ||
1019 | goto error; | ||
1020 | } | ||
1021 | |||
1022 | if (conflict_field_name(tp->args[i].name, tp->args, i)) { | ||
1023 | pr_info("Argument[%d] name '%s' conflicts with " | ||
1024 | "another field.\n", i, argv[i]); | ||
1025 | ret = -EINVAL; | ||
1026 | goto error; | ||
1027 | } | ||
1028 | |||
1029 | /* Parse fetch argument */ | ||
1030 | ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); | ||
1031 | if (ret) { | ||
1032 | pr_info("Parse error at argument[%d]. (%d)\n", i, ret); | ||
1033 | goto error; | ||
1034 | } | ||
1035 | } | ||
1036 | |||
1037 | ret = register_trace_probe(tp); | ||
1038 | if (ret) | ||
1039 | goto error; | ||
1040 | return 0; | ||
1041 | |||
1042 | error: | ||
1043 | free_trace_probe(tp); | ||
1044 | return ret; | ||
1045 | } | ||
1046 | |||
1047 | static void cleanup_all_probes(void) | ||
1048 | { | ||
1049 | struct trace_probe *tp; | ||
1050 | |||
1051 | mutex_lock(&probe_lock); | ||
1052 | /* TODO: Use batch unregistration */ | ||
1053 | while (!list_empty(&probe_list)) { | ||
1054 | tp = list_entry(probe_list.next, struct trace_probe, list); | ||
1055 | unregister_trace_probe(tp); | ||
1056 | free_trace_probe(tp); | ||
1057 | } | ||
1058 | mutex_unlock(&probe_lock); | ||
1059 | } | ||
1060 | |||
1061 | |||
1062 | /* Probes listing interfaces */ | ||
1063 | static void *probes_seq_start(struct seq_file *m, loff_t *pos) | ||
1064 | { | ||
1065 | mutex_lock(&probe_lock); | ||
1066 | return seq_list_start(&probe_list, *pos); | ||
1067 | } | ||
1068 | |||
1069 | static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) | ||
1070 | { | ||
1071 | return seq_list_next(v, &probe_list, pos); | ||
1072 | } | ||
1073 | |||
1074 | static void probes_seq_stop(struct seq_file *m, void *v) | ||
1075 | { | ||
1076 | mutex_unlock(&probe_lock); | ||
1077 | } | ||
1078 | |||
1079 | static int probes_seq_show(struct seq_file *m, void *v) | ||
1080 | { | ||
1081 | struct trace_probe *tp = v; | ||
1082 | int i; | ||
1083 | |||
1084 | seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); | ||
1085 | seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); | ||
1086 | |||
1087 | if (!tp->symbol) | ||
1088 | seq_printf(m, " 0x%p", tp->rp.kp.addr); | ||
1089 | else if (tp->rp.kp.offset) | ||
1090 | seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); | ||
1091 | else | ||
1092 | seq_printf(m, " %s", probe_symbol(tp)); | ||
1093 | |||
1094 | for (i = 0; i < tp->nr_args; i++) | ||
1095 | seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); | ||
1096 | seq_printf(m, "\n"); | ||
1097 | |||
1098 | return 0; | ||
1099 | } | ||
1100 | |||
1101 | static const struct seq_operations probes_seq_op = { | ||
1102 | .start = probes_seq_start, | ||
1103 | .next = probes_seq_next, | ||
1104 | .stop = probes_seq_stop, | ||
1105 | .show = probes_seq_show | ||
1106 | }; | ||
1107 | |||
1108 | static int probes_open(struct inode *inode, struct file *file) | ||
1109 | { | ||
1110 | if ((file->f_mode & FMODE_WRITE) && | ||
1111 | (file->f_flags & O_TRUNC)) | ||
1112 | cleanup_all_probes(); | ||
1113 | |||
1114 | return seq_open(file, &probes_seq_op); | ||
1115 | } | ||
1116 | |||
1117 | static int command_trace_probe(const char *buf) | ||
1118 | { | ||
1119 | char **argv; | ||
1120 | int argc = 0, ret = 0; | ||
1121 | |||
1122 | argv = argv_split(GFP_KERNEL, buf, &argc); | ||
1123 | if (!argv) | ||
1124 | return -ENOMEM; | ||
1125 | |||
1126 | if (argc) | ||
1127 | ret = create_trace_probe(argc, argv); | ||
1128 | |||
1129 | argv_free(argv); | ||
1130 | return ret; | ||
1131 | } | ||
1132 | |||
1133 | #define WRITE_BUFSIZE 128 | ||
1134 | |||
1135 | static ssize_t probes_write(struct file *file, const char __user *buffer, | ||
1136 | size_t count, loff_t *ppos) | ||
1137 | { | ||
1138 | char *kbuf, *tmp; | ||
1139 | int ret; | ||
1140 | size_t done; | ||
1141 | size_t size; | ||
1142 | |||
1143 | kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); | ||
1144 | if (!kbuf) | ||
1145 | return -ENOMEM; | ||
1146 | |||
1147 | ret = done = 0; | ||
1148 | while (done < count) { | ||
1149 | size = count - done; | ||
1150 | if (size >= WRITE_BUFSIZE) | ||
1151 | size = WRITE_BUFSIZE - 1; | ||
1152 | if (copy_from_user(kbuf, buffer + done, size)) { | ||
1153 | ret = -EFAULT; | ||
1154 | goto out; | ||
1155 | } | ||
1156 | kbuf[size] = '\0'; | ||
1157 | tmp = strchr(kbuf, '\n'); | ||
1158 | if (tmp) { | ||
1159 | *tmp = '\0'; | ||
1160 | size = tmp - kbuf + 1; | ||
1161 | } else if (done + size < count) { | ||
1162 | pr_warning("Line length is too long: " | ||
1163 | "Should be less than %d.", WRITE_BUFSIZE); | ||
1164 | ret = -EINVAL; | ||
1165 | goto out; | ||
1166 | } | ||
1167 | done += size; | ||
1168 | /* Remove comments */ | ||
1169 | tmp = strchr(kbuf, '#'); | ||
1170 | if (tmp) | ||
1171 | *tmp = '\0'; | ||
1172 | |||
1173 | ret = command_trace_probe(kbuf); | ||
1174 | if (ret) | ||
1175 | goto out; | ||
1176 | } | ||
1177 | ret = done; | ||
1178 | out: | ||
1179 | kfree(kbuf); | ||
1180 | return ret; | ||
1181 | } | ||
1182 | |||
1183 | static const struct file_operations kprobe_events_ops = { | ||
1184 | .owner = THIS_MODULE, | ||
1185 | .open = probes_open, | ||
1186 | .read = seq_read, | ||
1187 | .llseek = seq_lseek, | ||
1188 | .release = seq_release, | ||
1189 | .write = probes_write, | ||
1190 | }; | ||
1191 | |||
1192 | /* Probes profiling interfaces */ | ||
1193 | static int probes_profile_seq_show(struct seq_file *m, void *v) | ||
1194 | { | ||
1195 | struct trace_probe *tp = v; | ||
1196 | |||
1197 | seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, | ||
1198 | tp->rp.kp.nmissed); | ||
1199 | |||
1200 | return 0; | ||
1201 | } | ||
1202 | |||
1203 | static const struct seq_operations profile_seq_op = { | ||
1204 | .start = probes_seq_start, | ||
1205 | .next = probes_seq_next, | ||
1206 | .stop = probes_seq_stop, | ||
1207 | .show = probes_profile_seq_show | ||
1208 | }; | ||
1209 | |||
1210 | static int profile_open(struct inode *inode, struct file *file) | ||
1211 | { | ||
1212 | return seq_open(file, &profile_seq_op); | ||
1213 | } | ||
1214 | |||
1215 | static const struct file_operations kprobe_profile_ops = { | ||
1216 | .owner = THIS_MODULE, | ||
1217 | .open = profile_open, | ||
1218 | .read = seq_read, | ||
1219 | .llseek = seq_lseek, | ||
1220 | .release = seq_release, | ||
1221 | }; | ||
1222 | |||
1223 | /* Sum up total data length for dynamic arraies (strings) */ | ||
1224 | static __kprobes int __get_data_size(struct trace_probe *tp, | ||
1225 | struct pt_regs *regs) | ||
1226 | { | ||
1227 | int i, ret = 0; | ||
1228 | u32 len; | ||
1229 | |||
1230 | for (i = 0; i < tp->nr_args; i++) | ||
1231 | if (unlikely(tp->args[i].fetch_size.fn)) { | ||
1232 | call_fetch(&tp->args[i].fetch_size, regs, &len); | ||
1233 | ret += len; | ||
1234 | } | ||
1235 | |||
1236 | return ret; | ||
1237 | } | ||
1238 | |||
1239 | /* Store the value of each argument */ | ||
1240 | static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, | ||
1241 | struct pt_regs *regs, | ||
1242 | u8 *data, int maxlen) | ||
1243 | { | ||
1244 | int i; | ||
1245 | u32 end = tp->size; | ||
1246 | u32 *dl; /* Data (relative) location */ | ||
1247 | |||
1248 | for (i = 0; i < tp->nr_args; i++) { | ||
1249 | if (unlikely(tp->args[i].fetch_size.fn)) { | ||
1250 | /* | ||
1251 | * First, we set the relative location and | ||
1252 | * maximum data length to *dl | ||
1253 | */ | ||
1254 | dl = (u32 *)(data + tp->args[i].offset); | ||
1255 | *dl = make_data_rloc(maxlen, end - tp->args[i].offset); | ||
1256 | /* Then try to fetch string or dynamic array data */ | ||
1257 | call_fetch(&tp->args[i].fetch, regs, dl); | ||
1258 | /* Reduce maximum length */ | ||
1259 | end += get_rloc_len(*dl); | ||
1260 | maxlen -= get_rloc_len(*dl); | ||
1261 | /* Trick here, convert data_rloc to data_loc */ | ||
1262 | *dl = convert_rloc_to_loc(*dl, | ||
1263 | ent_size + tp->args[i].offset); | ||
1264 | } else | ||
1265 | /* Just fetching data normally */ | ||
1266 | call_fetch(&tp->args[i].fetch, regs, | ||
1267 | data + tp->args[i].offset); | ||
1268 | } | ||
1269 | } | ||
1270 | |||
1271 | /* Kprobe handler */ | ||
1272 | static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | ||
1273 | { | ||
1274 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | ||
1275 | struct kprobe_trace_entry_head *entry; | ||
1276 | struct ring_buffer_event *event; | ||
1277 | struct ring_buffer *buffer; | ||
1278 | int size, dsize, pc; | ||
1279 | unsigned long irq_flags; | ||
1280 | struct ftrace_event_call *call = &tp->call; | ||
1281 | |||
1282 | tp->nhit++; | ||
1283 | |||
1284 | local_save_flags(irq_flags); | ||
1285 | pc = preempt_count(); | ||
1286 | |||
1287 | dsize = __get_data_size(tp, regs); | ||
1288 | size = sizeof(*entry) + tp->size + dsize; | ||
1289 | |||
1290 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, | ||
1291 | size, irq_flags, pc); | ||
1292 | if (!event) | ||
1293 | return; | ||
1294 | |||
1295 | entry = ring_buffer_event_data(event); | ||
1296 | entry->ip = (unsigned long)kp->addr; | ||
1297 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | ||
1298 | |||
1299 | if (!filter_current_check_discard(buffer, call, entry, event)) | ||
1300 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | ||
1301 | } | ||
1302 | |||
1303 | /* Kretprobe handler */ | ||
1304 | static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, | ||
1305 | struct pt_regs *regs) | ||
1306 | { | ||
1307 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | ||
1308 | struct kretprobe_trace_entry_head *entry; | ||
1309 | struct ring_buffer_event *event; | ||
1310 | struct ring_buffer *buffer; | ||
1311 | int size, pc, dsize; | ||
1312 | unsigned long irq_flags; | ||
1313 | struct ftrace_event_call *call = &tp->call; | ||
1314 | |||
1315 | local_save_flags(irq_flags); | ||
1316 | pc = preempt_count(); | ||
1317 | |||
1318 | dsize = __get_data_size(tp, regs); | ||
1319 | size = sizeof(*entry) + tp->size + dsize; | ||
1320 | |||
1321 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, | ||
1322 | size, irq_flags, pc); | ||
1323 | if (!event) | ||
1324 | return; | ||
1325 | |||
1326 | entry = ring_buffer_event_data(event); | ||
1327 | entry->func = (unsigned long)tp->rp.kp.addr; | ||
1328 | entry->ret_ip = (unsigned long)ri->ret_addr; | ||
1329 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | ||
1330 | |||
1331 | if (!filter_current_check_discard(buffer, call, entry, event)) | ||
1332 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | ||
1333 | } | ||
1334 | |||
1335 | /* Event entry printers */ | ||
1336 | enum print_line_t | ||
1337 | print_kprobe_event(struct trace_iterator *iter, int flags, | ||
1338 | struct trace_event *event) | ||
1339 | { | ||
1340 | struct kprobe_trace_entry_head *field; | ||
1341 | struct trace_seq *s = &iter->seq; | ||
1342 | struct trace_probe *tp; | ||
1343 | u8 *data; | ||
1344 | int i; | ||
1345 | |||
1346 | field = (struct kprobe_trace_entry_head *)iter->ent; | ||
1347 | tp = container_of(event, struct trace_probe, call.event); | ||
1348 | |||
1349 | if (!trace_seq_printf(s, "%s: (", tp->call.name)) | ||
1350 | goto partial; | ||
1351 | |||
1352 | if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) | ||
1353 | goto partial; | ||
1354 | |||
1355 | if (!trace_seq_puts(s, ")")) | ||
1356 | goto partial; | ||
1357 | |||
1358 | data = (u8 *)&field[1]; | ||
1359 | for (i = 0; i < tp->nr_args; i++) | ||
1360 | if (!tp->args[i].type->print(s, tp->args[i].name, | ||
1361 | data + tp->args[i].offset, field)) | ||
1362 | goto partial; | ||
1363 | |||
1364 | if (!trace_seq_puts(s, "\n")) | ||
1365 | goto partial; | ||
1366 | |||
1367 | return TRACE_TYPE_HANDLED; | ||
1368 | partial: | ||
1369 | return TRACE_TYPE_PARTIAL_LINE; | ||
1370 | } | ||
1371 | |||
1372 | enum print_line_t | ||
1373 | print_kretprobe_event(struct trace_iterator *iter, int flags, | ||
1374 | struct trace_event *event) | ||
1375 | { | ||
1376 | struct kretprobe_trace_entry_head *field; | ||
1377 | struct trace_seq *s = &iter->seq; | ||
1378 | struct trace_probe *tp; | ||
1379 | u8 *data; | ||
1380 | int i; | ||
1381 | |||
1382 | field = (struct kretprobe_trace_entry_head *)iter->ent; | ||
1383 | tp = container_of(event, struct trace_probe, call.event); | ||
1384 | |||
1385 | if (!trace_seq_printf(s, "%s: (", tp->call.name)) | ||
1386 | goto partial; | ||
1387 | |||
1388 | if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) | ||
1389 | goto partial; | ||
1390 | |||
1391 | if (!trace_seq_puts(s, " <- ")) | ||
1392 | goto partial; | ||
1393 | |||
1394 | if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) | ||
1395 | goto partial; | ||
1396 | |||
1397 | if (!trace_seq_puts(s, ")")) | ||
1398 | goto partial; | ||
1399 | |||
1400 | data = (u8 *)&field[1]; | ||
1401 | for (i = 0; i < tp->nr_args; i++) | ||
1402 | if (!tp->args[i].type->print(s, tp->args[i].name, | ||
1403 | data + tp->args[i].offset, field)) | ||
1404 | goto partial; | ||
1405 | |||
1406 | if (!trace_seq_puts(s, "\n")) | ||
1407 | goto partial; | ||
1408 | |||
1409 | return TRACE_TYPE_HANDLED; | ||
1410 | partial: | ||
1411 | return TRACE_TYPE_PARTIAL_LINE; | ||
1412 | } | ||
1413 | |||
1414 | static int probe_event_enable(struct ftrace_event_call *call) | ||
1415 | { | ||
1416 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1417 | |||
1418 | tp->flags |= TP_FLAG_TRACE; | ||
1419 | if (probe_is_return(tp)) | ||
1420 | return enable_kretprobe(&tp->rp); | ||
1421 | else | ||
1422 | return enable_kprobe(&tp->rp.kp); | ||
1423 | } | ||
1424 | |||
1425 | static void probe_event_disable(struct ftrace_event_call *call) | ||
1426 | { | ||
1427 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1428 | |||
1429 | tp->flags &= ~TP_FLAG_TRACE; | ||
1430 | if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { | ||
1431 | if (probe_is_return(tp)) | ||
1432 | disable_kretprobe(&tp->rp); | ||
1433 | else | ||
1434 | disable_kprobe(&tp->rp.kp); | ||
1435 | } | ||
1436 | } | ||
1437 | |||
1438 | #undef DEFINE_FIELD | ||
1439 | #define DEFINE_FIELD(type, item, name, is_signed) \ | ||
1440 | do { \ | ||
1441 | ret = trace_define_field(event_call, #type, name, \ | ||
1442 | offsetof(typeof(field), item), \ | ||
1443 | sizeof(field.item), is_signed, \ | ||
1444 | FILTER_OTHER); \ | ||
1445 | if (ret) \ | ||
1446 | return ret; \ | ||
1447 | } while (0) | ||
1448 | |||
1449 | static int kprobe_event_define_fields(struct ftrace_event_call *event_call) | ||
1450 | { | ||
1451 | int ret, i; | ||
1452 | struct kprobe_trace_entry_head field; | ||
1453 | struct trace_probe *tp = (struct trace_probe *)event_call->data; | ||
1454 | |||
1455 | DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); | ||
1456 | /* Set argument names as fields */ | ||
1457 | for (i = 0; i < tp->nr_args; i++) { | ||
1458 | ret = trace_define_field(event_call, tp->args[i].type->fmttype, | ||
1459 | tp->args[i].name, | ||
1460 | sizeof(field) + tp->args[i].offset, | ||
1461 | tp->args[i].type->size, | ||
1462 | tp->args[i].type->is_signed, | ||
1463 | FILTER_OTHER); | ||
1464 | if (ret) | ||
1465 | return ret; | ||
1466 | } | ||
1467 | return 0; | ||
1468 | } | ||
1469 | |||
1470 | static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) | ||
1471 | { | ||
1472 | int ret, i; | ||
1473 | struct kretprobe_trace_entry_head field; | ||
1474 | struct trace_probe *tp = (struct trace_probe *)event_call->data; | ||
1475 | |||
1476 | DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); | ||
1477 | DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); | ||
1478 | /* Set argument names as fields */ | ||
1479 | for (i = 0; i < tp->nr_args; i++) { | ||
1480 | ret = trace_define_field(event_call, tp->args[i].type->fmttype, | ||
1481 | tp->args[i].name, | ||
1482 | sizeof(field) + tp->args[i].offset, | ||
1483 | tp->args[i].type->size, | ||
1484 | tp->args[i].type->is_signed, | ||
1485 | FILTER_OTHER); | ||
1486 | if (ret) | ||
1487 | return ret; | ||
1488 | } | ||
1489 | return 0; | ||
1490 | } | ||
1491 | |||
1492 | static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) | ||
1493 | { | ||
1494 | int i; | ||
1495 | int pos = 0; | ||
1496 | |||
1497 | const char *fmt, *arg; | ||
1498 | |||
1499 | if (!probe_is_return(tp)) { | ||
1500 | fmt = "(%lx)"; | ||
1501 | arg = "REC->" FIELD_STRING_IP; | ||
1502 | } else { | ||
1503 | fmt = "(%lx <- %lx)"; | ||
1504 | arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; | ||
1505 | } | ||
1506 | |||
1507 | /* When len=0, we just calculate the needed length */ | ||
1508 | #define LEN_OR_ZERO (len ? len - pos : 0) | ||
1509 | |||
1510 | pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); | ||
1511 | |||
1512 | for (i = 0; i < tp->nr_args; i++) { | ||
1513 | pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", | ||
1514 | tp->args[i].name, tp->args[i].type->fmt); | ||
1515 | } | ||
1516 | |||
1517 | pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); | ||
1518 | |||
1519 | for (i = 0; i < tp->nr_args; i++) { | ||
1520 | if (strcmp(tp->args[i].type->name, "string") == 0) | ||
1521 | pos += snprintf(buf + pos, LEN_OR_ZERO, | ||
1522 | ", __get_str(%s)", | ||
1523 | tp->args[i].name); | ||
1524 | else | ||
1525 | pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", | ||
1526 | tp->args[i].name); | ||
1527 | } | ||
1528 | |||
1529 | #undef LEN_OR_ZERO | ||
1530 | |||
1531 | /* return the length of print_fmt */ | ||
1532 | return pos; | ||
1533 | } | ||
1534 | |||
1535 | static int set_print_fmt(struct trace_probe *tp) | ||
1536 | { | ||
1537 | int len; | ||
1538 | char *print_fmt; | ||
1539 | |||
1540 | /* First: called with 0 length to calculate the needed length */ | ||
1541 | len = __set_print_fmt(tp, NULL, 0); | ||
1542 | print_fmt = kmalloc(len + 1, GFP_KERNEL); | ||
1543 | if (!print_fmt) | ||
1544 | return -ENOMEM; | ||
1545 | |||
1546 | /* Second: actually write the @print_fmt */ | ||
1547 | __set_print_fmt(tp, print_fmt, len + 1); | ||
1548 | tp->call.print_fmt = print_fmt; | ||
1549 | |||
1550 | return 0; | ||
1551 | } | ||
1552 | |||
1553 | #ifdef CONFIG_PERF_EVENTS | ||
1554 | |||
1555 | /* Kprobe profile handler */ | ||
1556 | static __kprobes void kprobe_perf_func(struct kprobe *kp, | ||
1557 | struct pt_regs *regs) | ||
1558 | { | ||
1559 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | ||
1560 | struct ftrace_event_call *call = &tp->call; | ||
1561 | struct kprobe_trace_entry_head *entry; | ||
1562 | struct hlist_head *head; | ||
1563 | int size, __size, dsize; | ||
1564 | int rctx; | ||
1565 | |||
1566 | dsize = __get_data_size(tp, regs); | ||
1567 | __size = sizeof(*entry) + tp->size + dsize; | ||
1568 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | ||
1569 | size -= sizeof(u32); | ||
1570 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
1571 | "profile buffer not large enough")) | ||
1572 | return; | ||
1573 | |||
1574 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | ||
1575 | if (!entry) | ||
1576 | return; | ||
1577 | |||
1578 | entry->ip = (unsigned long)kp->addr; | ||
1579 | memset(&entry[1], 0, dsize); | ||
1580 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | ||
1581 | |||
1582 | head = this_cpu_ptr(call->perf_events); | ||
1583 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); | ||
1584 | } | ||
1585 | |||
1586 | /* Kretprobe profile handler */ | ||
1587 | static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | ||
1588 | struct pt_regs *regs) | ||
1589 | { | ||
1590 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | ||
1591 | struct ftrace_event_call *call = &tp->call; | ||
1592 | struct kretprobe_trace_entry_head *entry; | ||
1593 | struct hlist_head *head; | ||
1594 | int size, __size, dsize; | ||
1595 | int rctx; | ||
1596 | |||
1597 | dsize = __get_data_size(tp, regs); | ||
1598 | __size = sizeof(*entry) + tp->size + dsize; | ||
1599 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | ||
1600 | size -= sizeof(u32); | ||
1601 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
1602 | "profile buffer not large enough")) | ||
1603 | return; | ||
1604 | |||
1605 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | ||
1606 | if (!entry) | ||
1607 | return; | ||
1608 | |||
1609 | entry->func = (unsigned long)tp->rp.kp.addr; | ||
1610 | entry->ret_ip = (unsigned long)ri->ret_addr; | ||
1611 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | ||
1612 | |||
1613 | head = this_cpu_ptr(call->perf_events); | ||
1614 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); | ||
1615 | } | ||
1616 | |||
1617 | static int probe_perf_enable(struct ftrace_event_call *call) | ||
1618 | { | ||
1619 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1620 | |||
1621 | tp->flags |= TP_FLAG_PROFILE; | ||
1622 | |||
1623 | if (probe_is_return(tp)) | ||
1624 | return enable_kretprobe(&tp->rp); | ||
1625 | else | ||
1626 | return enable_kprobe(&tp->rp.kp); | ||
1627 | } | ||
1628 | |||
1629 | static void probe_perf_disable(struct ftrace_event_call *call) | ||
1630 | { | ||
1631 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1632 | |||
1633 | tp->flags &= ~TP_FLAG_PROFILE; | ||
1634 | |||
1635 | if (!(tp->flags & TP_FLAG_TRACE)) { | ||
1636 | if (probe_is_return(tp)) | ||
1637 | disable_kretprobe(&tp->rp); | ||
1638 | else | ||
1639 | disable_kprobe(&tp->rp.kp); | ||
1640 | } | ||
1641 | } | ||
1642 | #endif /* CONFIG_PERF_EVENTS */ | ||
1643 | |||
1644 | static __kprobes | ||
1645 | int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) | ||
1646 | { | ||
1647 | switch (type) { | ||
1648 | case TRACE_REG_REGISTER: | ||
1649 | return probe_event_enable(event); | ||
1650 | case TRACE_REG_UNREGISTER: | ||
1651 | probe_event_disable(event); | ||
1652 | return 0; | ||
1653 | |||
1654 | #ifdef CONFIG_PERF_EVENTS | ||
1655 | case TRACE_REG_PERF_REGISTER: | ||
1656 | return probe_perf_enable(event); | ||
1657 | case TRACE_REG_PERF_UNREGISTER: | ||
1658 | probe_perf_disable(event); | ||
1659 | return 0; | ||
1660 | #endif | ||
1661 | } | ||
1662 | return 0; | ||
1663 | } | ||
1664 | |||
1665 | static __kprobes | ||
1666 | int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) | ||
1667 | { | ||
1668 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | ||
1669 | |||
1670 | if (tp->flags & TP_FLAG_TRACE) | ||
1671 | kprobe_trace_func(kp, regs); | ||
1672 | #ifdef CONFIG_PERF_EVENTS | ||
1673 | if (tp->flags & TP_FLAG_PROFILE) | ||
1674 | kprobe_perf_func(kp, regs); | ||
1675 | #endif | ||
1676 | return 0; /* We don't tweek kernel, so just return 0 */ | ||
1677 | } | ||
1678 | |||
1679 | static __kprobes | ||
1680 | int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) | ||
1681 | { | ||
1682 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | ||
1683 | |||
1684 | if (tp->flags & TP_FLAG_TRACE) | ||
1685 | kretprobe_trace_func(ri, regs); | ||
1686 | #ifdef CONFIG_PERF_EVENTS | ||
1687 | if (tp->flags & TP_FLAG_PROFILE) | ||
1688 | kretprobe_perf_func(ri, regs); | ||
1689 | #endif | ||
1690 | return 0; /* We don't tweek kernel, so just return 0 */ | ||
1691 | } | ||
1692 | |||
1693 | static struct trace_event_functions kretprobe_funcs = { | ||
1694 | .trace = print_kretprobe_event | ||
1695 | }; | ||
1696 | |||
1697 | static struct trace_event_functions kprobe_funcs = { | ||
1698 | .trace = print_kprobe_event | ||
1699 | }; | ||
1700 | |||
1701 | static int register_probe_event(struct trace_probe *tp) | ||
1702 | { | ||
1703 | struct ftrace_event_call *call = &tp->call; | ||
1704 | int ret; | ||
1705 | |||
1706 | /* Initialize ftrace_event_call */ | ||
1707 | INIT_LIST_HEAD(&call->class->fields); | ||
1708 | if (probe_is_return(tp)) { | ||
1709 | call->event.funcs = &kretprobe_funcs; | ||
1710 | call->class->define_fields = kretprobe_event_define_fields; | ||
1711 | } else { | ||
1712 | call->event.funcs = &kprobe_funcs; | ||
1713 | call->class->define_fields = kprobe_event_define_fields; | ||
1714 | } | ||
1715 | if (set_print_fmt(tp) < 0) | ||
1716 | return -ENOMEM; | ||
1717 | ret = register_ftrace_event(&call->event); | ||
1718 | if (!ret) { | ||
1719 | kfree(call->print_fmt); | ||
1720 | return -ENODEV; | ||
1721 | } | ||
1722 | call->flags = 0; | ||
1723 | call->class->reg = kprobe_register; | ||
1724 | call->data = tp; | ||
1725 | ret = trace_add_event_call(call); | ||
1726 | if (ret) { | ||
1727 | pr_info("Failed to register kprobe event: %s\n", call->name); | ||
1728 | kfree(call->print_fmt); | ||
1729 | unregister_ftrace_event(&call->event); | ||
1730 | } | ||
1731 | return ret; | ||
1732 | } | ||
1733 | |||
1734 | static void unregister_probe_event(struct trace_probe *tp) | ||
1735 | { | ||
1736 | /* tp->event is unregistered in trace_remove_event_call() */ | ||
1737 | trace_remove_event_call(&tp->call); | ||
1738 | kfree(tp->call.print_fmt); | ||
1739 | } | ||
1740 | |||
1741 | /* Make a debugfs interface for controling probe points */ | ||
1742 | static __init int init_kprobe_trace(void) | ||
1743 | { | ||
1744 | struct dentry *d_tracer; | ||
1745 | struct dentry *entry; | ||
1746 | |||
1747 | d_tracer = tracing_init_dentry(); | ||
1748 | if (!d_tracer) | ||
1749 | return 0; | ||
1750 | |||
1751 | entry = debugfs_create_file("kprobe_events", 0644, d_tracer, | ||
1752 | NULL, &kprobe_events_ops); | ||
1753 | |||
1754 | /* Event list interface */ | ||
1755 | if (!entry) | ||
1756 | pr_warning("Could not create debugfs " | ||
1757 | "'kprobe_events' entry\n"); | ||
1758 | |||
1759 | /* Profile interface */ | ||
1760 | entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, | ||
1761 | NULL, &kprobe_profile_ops); | ||
1762 | |||
1763 | if (!entry) | ||
1764 | pr_warning("Could not create debugfs " | ||
1765 | "'kprobe_profile' entry\n"); | ||
1766 | return 0; | ||
1767 | } | ||
1768 | fs_initcall(init_kprobe_trace); | ||
1769 | |||
1770 | |||
1771 | #ifdef CONFIG_FTRACE_STARTUP_TEST | ||
1772 | |||
1773 | static int kprobe_trace_selftest_target(int a1, int a2, int a3, | ||
1774 | int a4, int a5, int a6) | ||
1775 | { | ||
1776 | return a1 + a2 + a3 + a4 + a5 + a6; | ||
1777 | } | ||
1778 | |||
1779 | static __init int kprobe_trace_self_tests_init(void) | ||
1780 | { | ||
1781 | int ret, warn = 0; | ||
1782 | int (*target)(int, int, int, int, int, int); | ||
1783 | struct trace_probe *tp; | ||
1784 | |||
1785 | target = kprobe_trace_selftest_target; | ||
1786 | |||
1787 | pr_info("Testing kprobe tracing: "); | ||
1788 | |||
1789 | ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " | ||
1790 | "$stack $stack0 +0($stack)"); | ||
1791 | if (WARN_ON_ONCE(ret)) { | ||
1792 | pr_warning("error on probing function entry.\n"); | ||
1793 | warn++; | ||
1794 | } else { | ||
1795 | /* Enable trace point */ | ||
1796 | tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); | ||
1797 | if (WARN_ON_ONCE(tp == NULL)) { | ||
1798 | pr_warning("error on getting new probe.\n"); | ||
1799 | warn++; | ||
1800 | } else | ||
1801 | probe_event_enable(&tp->call); | ||
1802 | } | ||
1803 | |||
1804 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " | ||
1805 | "$retval"); | ||
1806 | if (WARN_ON_ONCE(ret)) { | ||
1807 | pr_warning("error on probing function return.\n"); | ||
1808 | warn++; | ||
1809 | } else { | ||
1810 | /* Enable trace point */ | ||
1811 | tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); | ||
1812 | if (WARN_ON_ONCE(tp == NULL)) { | ||
1813 | pr_warning("error on getting new probe.\n"); | ||
1814 | warn++; | ||
1815 | } else | ||
1816 | probe_event_enable(&tp->call); | ||
1817 | } | ||
1818 | |||
1819 | if (warn) | ||
1820 | goto end; | ||
1821 | |||
1822 | ret = target(1, 2, 3, 4, 5, 6); | ||
1823 | |||
1824 | ret = command_trace_probe("-:testprobe"); | ||
1825 | if (WARN_ON_ONCE(ret)) { | ||
1826 | pr_warning("error on deleting a probe.\n"); | ||
1827 | warn++; | ||
1828 | } | ||
1829 | |||
1830 | ret = command_trace_probe("-:testprobe2"); | ||
1831 | if (WARN_ON_ONCE(ret)) { | ||
1832 | pr_warning("error on deleting a probe.\n"); | ||
1833 | warn++; | ||
1834 | } | ||
1835 | |||
1836 | end: | ||
1837 | cleanup_all_probes(); | ||
1838 | if (warn) | ||
1839 | pr_cont("NG: Some tests are failed. Please check them.\n"); | ||
1840 | else | ||
1841 | pr_cont("OK\n"); | ||
1842 | return 0; | ||
1843 | } | ||
1844 | |||
1845 | late_initcall(kprobe_trace_self_tests_init); | ||
1846 | |||
1847 | #endif | ||
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index d53b45ed0806..017fa376505d 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/mmiotrace.h> | 10 | #include <linux/mmiotrace.h> |
11 | #include <linux/pci.h> | 11 | #include <linux/pci.h> |
12 | #include <linux/slab.h> | ||
12 | #include <linux/time.h> | 13 | #include <linux/time.h> |
13 | 14 | ||
14 | #include <asm/atomic.h> | 15 | #include <asm/atomic.h> |
@@ -307,11 +308,13 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, | |||
307 | struct trace_array_cpu *data, | 308 | struct trace_array_cpu *data, |
308 | struct mmiotrace_rw *rw) | 309 | struct mmiotrace_rw *rw) |
309 | { | 310 | { |
311 | struct ftrace_event_call *call = &event_mmiotrace_rw; | ||
312 | struct ring_buffer *buffer = tr->buffer; | ||
310 | struct ring_buffer_event *event; | 313 | struct ring_buffer_event *event; |
311 | struct trace_mmiotrace_rw *entry; | 314 | struct trace_mmiotrace_rw *entry; |
312 | int pc = preempt_count(); | 315 | int pc = preempt_count(); |
313 | 316 | ||
314 | event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW, | 317 | event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW, |
315 | sizeof(*entry), 0, pc); | 318 | sizeof(*entry), 0, pc); |
316 | if (!event) { | 319 | if (!event) { |
317 | atomic_inc(&dropped_count); | 320 | atomic_inc(&dropped_count); |
@@ -319,7 +322,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, | |||
319 | } | 322 | } |
320 | entry = ring_buffer_event_data(event); | 323 | entry = ring_buffer_event_data(event); |
321 | entry->rw = *rw; | 324 | entry->rw = *rw; |
322 | trace_buffer_unlock_commit(tr, event, 0, pc); | 325 | |
326 | if (!filter_check_discard(call, entry, buffer, event)) | ||
327 | trace_buffer_unlock_commit(buffer, event, 0, pc); | ||
323 | } | 328 | } |
324 | 329 | ||
325 | void mmio_trace_rw(struct mmiotrace_rw *rw) | 330 | void mmio_trace_rw(struct mmiotrace_rw *rw) |
@@ -333,11 +338,13 @@ static void __trace_mmiotrace_map(struct trace_array *tr, | |||
333 | struct trace_array_cpu *data, | 338 | struct trace_array_cpu *data, |
334 | struct mmiotrace_map *map) | 339 | struct mmiotrace_map *map) |
335 | { | 340 | { |
341 | struct ftrace_event_call *call = &event_mmiotrace_map; | ||
342 | struct ring_buffer *buffer = tr->buffer; | ||
336 | struct ring_buffer_event *event; | 343 | struct ring_buffer_event *event; |
337 | struct trace_mmiotrace_map *entry; | 344 | struct trace_mmiotrace_map *entry; |
338 | int pc = preempt_count(); | 345 | int pc = preempt_count(); |
339 | 346 | ||
340 | event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP, | 347 | event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP, |
341 | sizeof(*entry), 0, pc); | 348 | sizeof(*entry), 0, pc); |
342 | if (!event) { | 349 | if (!event) { |
343 | atomic_inc(&dropped_count); | 350 | atomic_inc(&dropped_count); |
@@ -345,7 +352,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr, | |||
345 | } | 352 | } |
346 | entry = ring_buffer_event_data(event); | 353 | entry = ring_buffer_event_data(event); |
347 | entry->map = *map; | 354 | entry->map = *map; |
348 | trace_buffer_unlock_commit(tr, event, 0, pc); | 355 | |
356 | if (!filter_check_discard(call, entry, buffer, event)) | ||
357 | trace_buffer_unlock_commit(buffer, event, 0, pc); | ||
349 | } | 358 | } |
350 | 359 | ||
351 | void mmio_trace_mapping(struct mmiotrace_map *map) | 360 | void mmio_trace_mapping(struct mmiotrace_map *map) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 7938f3ae93e3..02272baa2206 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -16,21 +16,25 @@ | |||
16 | 16 | ||
17 | DECLARE_RWSEM(trace_event_mutex); | 17 | DECLARE_RWSEM(trace_event_mutex); |
18 | 18 | ||
19 | DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq); | ||
20 | EXPORT_PER_CPU_SYMBOL(ftrace_event_seq); | ||
21 | |||
22 | static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; | 19 | static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; |
23 | 20 | ||
24 | static int next_event_type = __TRACE_LAST_TYPE + 1; | 21 | static int next_event_type = __TRACE_LAST_TYPE + 1; |
25 | 22 | ||
26 | void trace_print_seq(struct seq_file *m, struct trace_seq *s) | 23 | int trace_print_seq(struct seq_file *m, struct trace_seq *s) |
27 | { | 24 | { |
28 | int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; | 25 | int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; |
26 | int ret; | ||
29 | 27 | ||
30 | s->buffer[len] = 0; | 28 | ret = seq_write(m, s->buffer, len); |
31 | seq_puts(m, s->buffer); | ||
32 | 29 | ||
33 | trace_seq_init(s); | 30 | /* |
31 | * Only reset this buffer if we successfully wrote to the | ||
32 | * seq_file buffer. | ||
33 | */ | ||
34 | if (!ret) | ||
35 | trace_seq_init(s); | ||
36 | |||
37 | return ret; | ||
34 | } | 38 | } |
35 | 39 | ||
36 | enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) | 40 | enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) |
@@ -70,6 +74,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) | |||
70 | * @s: trace sequence descriptor | 74 | * @s: trace sequence descriptor |
71 | * @fmt: printf format string | 75 | * @fmt: printf format string |
72 | * | 76 | * |
77 | * It returns 0 if the trace oversizes the buffer's free | ||
78 | * space, 1 otherwise. | ||
79 | * | ||
73 | * The tracer may use either sequence operations or its own | 80 | * The tracer may use either sequence operations or its own |
74 | * copy to user routines. To simplify formating of a trace | 81 | * copy to user routines. To simplify formating of a trace |
75 | * trace_seq_printf is used to store strings into a special | 82 | * trace_seq_printf is used to store strings into a special |
@@ -83,7 +90,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) | |||
83 | va_list ap; | 90 | va_list ap; |
84 | int ret; | 91 | int ret; |
85 | 92 | ||
86 | if (!len) | 93 | if (s->full || !len) |
87 | return 0; | 94 | return 0; |
88 | 95 | ||
89 | va_start(ap, fmt); | 96 | va_start(ap, fmt); |
@@ -91,12 +98,14 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) | |||
91 | va_end(ap); | 98 | va_end(ap); |
92 | 99 | ||
93 | /* If we can't write it all, don't bother writing anything */ | 100 | /* If we can't write it all, don't bother writing anything */ |
94 | if (ret >= len) | 101 | if (ret >= len) { |
102 | s->full = 1; | ||
95 | return 0; | 103 | return 0; |
104 | } | ||
96 | 105 | ||
97 | s->len += ret; | 106 | s->len += ret; |
98 | 107 | ||
99 | return len; | 108 | return 1; |
100 | } | 109 | } |
101 | EXPORT_SYMBOL_GPL(trace_seq_printf); | 110 | EXPORT_SYMBOL_GPL(trace_seq_printf); |
102 | 111 | ||
@@ -117,14 +126,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) | |||
117 | int len = (PAGE_SIZE - 1) - s->len; | 126 | int len = (PAGE_SIZE - 1) - s->len; |
118 | int ret; | 127 | int ret; |
119 | 128 | ||
120 | if (!len) | 129 | if (s->full || !len) |
121 | return 0; | 130 | return 0; |
122 | 131 | ||
123 | ret = vsnprintf(s->buffer + s->len, len, fmt, args); | 132 | ret = vsnprintf(s->buffer + s->len, len, fmt, args); |
124 | 133 | ||
125 | /* If we can't write it all, don't bother writing anything */ | 134 | /* If we can't write it all, don't bother writing anything */ |
126 | if (ret >= len) | 135 | if (ret >= len) { |
136 | s->full = 1; | ||
127 | return 0; | 137 | return 0; |
138 | } | ||
128 | 139 | ||
129 | s->len += ret; | 140 | s->len += ret; |
130 | 141 | ||
@@ -137,14 +148,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) | |||
137 | int len = (PAGE_SIZE - 1) - s->len; | 148 | int len = (PAGE_SIZE - 1) - s->len; |
138 | int ret; | 149 | int ret; |
139 | 150 | ||
140 | if (!len) | 151 | if (s->full || !len) |
141 | return 0; | 152 | return 0; |
142 | 153 | ||
143 | ret = bstr_printf(s->buffer + s->len, len, fmt, binary); | 154 | ret = bstr_printf(s->buffer + s->len, len, fmt, binary); |
144 | 155 | ||
145 | /* If we can't write it all, don't bother writing anything */ | 156 | /* If we can't write it all, don't bother writing anything */ |
146 | if (ret >= len) | 157 | if (ret >= len) { |
158 | s->full = 1; | ||
147 | return 0; | 159 | return 0; |
160 | } | ||
148 | 161 | ||
149 | s->len += ret; | 162 | s->len += ret; |
150 | 163 | ||
@@ -165,9 +178,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str) | |||
165 | { | 178 | { |
166 | int len = strlen(str); | 179 | int len = strlen(str); |
167 | 180 | ||
168 | if (len > ((PAGE_SIZE - 1) - s->len)) | 181 | if (s->full) |
169 | return 0; | 182 | return 0; |
170 | 183 | ||
184 | if (len > ((PAGE_SIZE - 1) - s->len)) { | ||
185 | s->full = 1; | ||
186 | return 0; | ||
187 | } | ||
188 | |||
171 | memcpy(s->buffer + s->len, str, len); | 189 | memcpy(s->buffer + s->len, str, len); |
172 | s->len += len; | 190 | s->len += len; |
173 | 191 | ||
@@ -176,19 +194,30 @@ int trace_seq_puts(struct trace_seq *s, const char *str) | |||
176 | 194 | ||
177 | int trace_seq_putc(struct trace_seq *s, unsigned char c) | 195 | int trace_seq_putc(struct trace_seq *s, unsigned char c) |
178 | { | 196 | { |
179 | if (s->len >= (PAGE_SIZE - 1)) | 197 | if (s->full) |
198 | return 0; | ||
199 | |||
200 | if (s->len >= (PAGE_SIZE - 1)) { | ||
201 | s->full = 1; | ||
180 | return 0; | 202 | return 0; |
203 | } | ||
181 | 204 | ||
182 | s->buffer[s->len++] = c; | 205 | s->buffer[s->len++] = c; |
183 | 206 | ||
184 | return 1; | 207 | return 1; |
185 | } | 208 | } |
209 | EXPORT_SYMBOL(trace_seq_putc); | ||
186 | 210 | ||
187 | int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) | 211 | int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) |
188 | { | 212 | { |
189 | if (len > ((PAGE_SIZE - 1) - s->len)) | 213 | if (s->full) |
190 | return 0; | 214 | return 0; |
191 | 215 | ||
216 | if (len > ((PAGE_SIZE - 1) - s->len)) { | ||
217 | s->full = 1; | ||
218 | return 0; | ||
219 | } | ||
220 | |||
192 | memcpy(s->buffer + s->len, mem, len); | 221 | memcpy(s->buffer + s->len, mem, len); |
193 | s->len += len; | 222 | s->len += len; |
194 | 223 | ||
@@ -201,6 +230,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) | |||
201 | const unsigned char *data = mem; | 230 | const unsigned char *data = mem; |
202 | int i, j; | 231 | int i, j; |
203 | 232 | ||
233 | if (s->full) | ||
234 | return 0; | ||
235 | |||
204 | #ifdef __BIG_ENDIAN | 236 | #ifdef __BIG_ENDIAN |
205 | for (i = 0, j = 0; i < len; i++) { | 237 | for (i = 0, j = 0; i < len; i++) { |
206 | #else | 238 | #else |
@@ -218,9 +250,14 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) | |||
218 | { | 250 | { |
219 | void *ret; | 251 | void *ret; |
220 | 252 | ||
221 | if (len > ((PAGE_SIZE - 1) - s->len)) | 253 | if (s->full) |
222 | return NULL; | 254 | return NULL; |
223 | 255 | ||
256 | if (len > ((PAGE_SIZE - 1) - s->len)) { | ||
257 | s->full = 1; | ||
258 | return NULL; | ||
259 | } | ||
260 | |||
224 | ret = s->buffer + s->len; | 261 | ret = s->buffer + s->len; |
225 | s->len += len; | 262 | s->len += len; |
226 | 263 | ||
@@ -231,8 +268,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path) | |||
231 | { | 268 | { |
232 | unsigned char *p; | 269 | unsigned char *p; |
233 | 270 | ||
234 | if (s->len >= (PAGE_SIZE - 1)) | 271 | if (s->full) |
272 | return 0; | ||
273 | |||
274 | if (s->len >= (PAGE_SIZE - 1)) { | ||
275 | s->full = 1; | ||
235 | return 0; | 276 | return 0; |
277 | } | ||
278 | |||
236 | p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); | 279 | p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); |
237 | if (!IS_ERR(p)) { | 280 | if (!IS_ERR(p)) { |
238 | p = mangle_path(s->buffer + s->len, p, "\n"); | 281 | p = mangle_path(s->buffer + s->len, p, "\n"); |
@@ -245,6 +288,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path) | |||
245 | return 1; | 288 | return 1; |
246 | } | 289 | } |
247 | 290 | ||
291 | s->full = 1; | ||
248 | return 0; | 292 | return 0; |
249 | } | 293 | } |
250 | 294 | ||
@@ -309,6 +353,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, | |||
309 | } | 353 | } |
310 | EXPORT_SYMBOL(ftrace_print_symbols_seq); | 354 | EXPORT_SYMBOL(ftrace_print_symbols_seq); |
311 | 355 | ||
356 | const char * | ||
357 | ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) | ||
358 | { | ||
359 | int i; | ||
360 | const char *ret = p->buffer + p->len; | ||
361 | |||
362 | for (i = 0; i < buf_len; i++) | ||
363 | trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]); | ||
364 | |||
365 | trace_seq_putc(p, 0); | ||
366 | |||
367 | return ret; | ||
368 | } | ||
369 | EXPORT_SYMBOL(ftrace_print_hex_seq); | ||
370 | |||
312 | #ifdef CONFIG_KRETPROBES | 371 | #ifdef CONFIG_KRETPROBES |
313 | static inline const char *kretprobed(const char *name) | 372 | static inline const char *kretprobed(const char *name) |
314 | { | 373 | { |
@@ -371,6 +430,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, | |||
371 | unsigned long vmstart = 0; | 430 | unsigned long vmstart = 0; |
372 | int ret = 1; | 431 | int ret = 1; |
373 | 432 | ||
433 | if (s->full) | ||
434 | return 0; | ||
435 | |||
374 | if (mm) { | 436 | if (mm) { |
375 | const struct vm_area_struct *vma; | 437 | const struct vm_area_struct *vma; |
376 | 438 | ||
@@ -408,7 +470,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, | |||
408 | * since individual threads might have already quit! | 470 | * since individual threads might have already quit! |
409 | */ | 471 | */ |
410 | rcu_read_lock(); | 472 | rcu_read_lock(); |
411 | task = find_task_by_vpid(entry->ent.tgid); | 473 | task = find_task_by_vpid(entry->tgid); |
412 | if (task) | 474 | if (task) |
413 | mm = get_task_mm(task); | 475 | mm = get_task_mm(task); |
414 | rcu_read_unlock(); | 476 | rcu_read_unlock(); |
@@ -461,18 +523,23 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) | |||
461 | return ret; | 523 | return ret; |
462 | } | 524 | } |
463 | 525 | ||
464 | static int | 526 | /** |
465 | lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) | 527 | * trace_print_lat_fmt - print the irq, preempt and lockdep fields |
528 | * @s: trace seq struct to write to | ||
529 | * @entry: The trace entry field from the ring buffer | ||
530 | * | ||
531 | * Prints the generic fields of irqs off, in hard or softirq, preempt | ||
532 | * count and lock depth. | ||
533 | */ | ||
534 | int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | ||
466 | { | 535 | { |
467 | int hardirq, softirq; | 536 | int hardirq, softirq; |
468 | char comm[TASK_COMM_LEN]; | 537 | int ret; |
469 | 538 | ||
470 | trace_find_cmdline(entry->pid, comm); | ||
471 | hardirq = entry->flags & TRACE_FLAG_HARDIRQ; | 539 | hardirq = entry->flags & TRACE_FLAG_HARDIRQ; |
472 | softirq = entry->flags & TRACE_FLAG_SOFTIRQ; | 540 | softirq = entry->flags & TRACE_FLAG_SOFTIRQ; |
473 | 541 | ||
474 | if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c", | 542 | if (!trace_seq_printf(s, "%c%c%c", |
475 | comm, entry->pid, cpu, | ||
476 | (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : | 543 | (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : |
477 | (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? | 544 | (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? |
478 | 'X' : '.', | 545 | 'X' : '.', |
@@ -483,8 +550,31 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) | |||
483 | return 0; | 550 | return 0; |
484 | 551 | ||
485 | if (entry->preempt_count) | 552 | if (entry->preempt_count) |
486 | return trace_seq_printf(s, "%x", entry->preempt_count); | 553 | ret = trace_seq_printf(s, "%x", entry->preempt_count); |
487 | return trace_seq_puts(s, "."); | 554 | else |
555 | ret = trace_seq_putc(s, '.'); | ||
556 | |||
557 | if (!ret) | ||
558 | return 0; | ||
559 | |||
560 | if (entry->lock_depth < 0) | ||
561 | return trace_seq_putc(s, '.'); | ||
562 | |||
563 | return trace_seq_printf(s, "%d", entry->lock_depth); | ||
564 | } | ||
565 | |||
566 | static int | ||
567 | lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) | ||
568 | { | ||
569 | char comm[TASK_COMM_LEN]; | ||
570 | |||
571 | trace_find_cmdline(entry->pid, comm); | ||
572 | |||
573 | if (!trace_seq_printf(s, "%8.8s-%-5d %3d", | ||
574 | comm, entry->pid, cpu)) | ||
575 | return 0; | ||
576 | |||
577 | return trace_print_lat_fmt(s, entry); | ||
488 | } | 578 | } |
489 | 579 | ||
490 | static unsigned long preempt_mark_thresh = 100; | 580 | static unsigned long preempt_mark_thresh = 100; |
@@ -649,6 +739,9 @@ int register_ftrace_event(struct trace_event *event) | |||
649 | if (WARN_ON(!event)) | 739 | if (WARN_ON(!event)) |
650 | goto out; | 740 | goto out; |
651 | 741 | ||
742 | if (WARN_ON(!event->funcs)) | ||
743 | goto out; | ||
744 | |||
652 | INIT_LIST_HEAD(&event->list); | 745 | INIT_LIST_HEAD(&event->list); |
653 | 746 | ||
654 | if (!event->type) { | 747 | if (!event->type) { |
@@ -681,14 +774,14 @@ int register_ftrace_event(struct trace_event *event) | |||
681 | goto out; | 774 | goto out; |
682 | } | 775 | } |
683 | 776 | ||
684 | if (event->trace == NULL) | 777 | if (event->funcs->trace == NULL) |
685 | event->trace = trace_nop_print; | 778 | event->funcs->trace = trace_nop_print; |
686 | if (event->raw == NULL) | 779 | if (event->funcs->raw == NULL) |
687 | event->raw = trace_nop_print; | 780 | event->funcs->raw = trace_nop_print; |
688 | if (event->hex == NULL) | 781 | if (event->funcs->hex == NULL) |
689 | event->hex = trace_nop_print; | 782 | event->funcs->hex = trace_nop_print; |
690 | if (event->binary == NULL) | 783 | if (event->funcs->binary == NULL) |
691 | event->binary = trace_nop_print; | 784 | event->funcs->binary = trace_nop_print; |
692 | 785 | ||
693 | key = event->type & (EVENT_HASHSIZE - 1); | 786 | key = event->type & (EVENT_HASHSIZE - 1); |
694 | 787 | ||
@@ -730,13 +823,15 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event); | |||
730 | * Standard events | 823 | * Standard events |
731 | */ | 824 | */ |
732 | 825 | ||
733 | enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags) | 826 | enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, |
827 | struct trace_event *event) | ||
734 | { | 828 | { |
735 | return TRACE_TYPE_HANDLED; | 829 | return TRACE_TYPE_HANDLED; |
736 | } | 830 | } |
737 | 831 | ||
738 | /* TRACE_FN */ | 832 | /* TRACE_FN */ |
739 | static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) | 833 | static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, |
834 | struct trace_event *event) | ||
740 | { | 835 | { |
741 | struct ftrace_entry *field; | 836 | struct ftrace_entry *field; |
742 | struct trace_seq *s = &iter->seq; | 837 | struct trace_seq *s = &iter->seq; |
@@ -763,7 +858,8 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) | |||
763 | return TRACE_TYPE_PARTIAL_LINE; | 858 | return TRACE_TYPE_PARTIAL_LINE; |
764 | } | 859 | } |
765 | 860 | ||
766 | static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags) | 861 | static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags, |
862 | struct trace_event *event) | ||
767 | { | 863 | { |
768 | struct ftrace_entry *field; | 864 | struct ftrace_entry *field; |
769 | 865 | ||
@@ -777,7 +873,8 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags) | |||
777 | return TRACE_TYPE_HANDLED; | 873 | return TRACE_TYPE_HANDLED; |
778 | } | 874 | } |
779 | 875 | ||
780 | static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags) | 876 | static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags, |
877 | struct trace_event *event) | ||
781 | { | 878 | { |
782 | struct ftrace_entry *field; | 879 | struct ftrace_entry *field; |
783 | struct trace_seq *s = &iter->seq; | 880 | struct trace_seq *s = &iter->seq; |
@@ -790,7 +887,8 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags) | |||
790 | return TRACE_TYPE_HANDLED; | 887 | return TRACE_TYPE_HANDLED; |
791 | } | 888 | } |
792 | 889 | ||
793 | static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) | 890 | static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags, |
891 | struct trace_event *event) | ||
794 | { | 892 | { |
795 | struct ftrace_entry *field; | 893 | struct ftrace_entry *field; |
796 | struct trace_seq *s = &iter->seq; | 894 | struct trace_seq *s = &iter->seq; |
@@ -803,14 +901,18 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) | |||
803 | return TRACE_TYPE_HANDLED; | 901 | return TRACE_TYPE_HANDLED; |
804 | } | 902 | } |
805 | 903 | ||
806 | static struct trace_event trace_fn_event = { | 904 | static struct trace_event_functions trace_fn_funcs = { |
807 | .type = TRACE_FN, | ||
808 | .trace = trace_fn_trace, | 905 | .trace = trace_fn_trace, |
809 | .raw = trace_fn_raw, | 906 | .raw = trace_fn_raw, |
810 | .hex = trace_fn_hex, | 907 | .hex = trace_fn_hex, |
811 | .binary = trace_fn_bin, | 908 | .binary = trace_fn_bin, |
812 | }; | 909 | }; |
813 | 910 | ||
911 | static struct trace_event trace_fn_event = { | ||
912 | .type = TRACE_FN, | ||
913 | .funcs = &trace_fn_funcs, | ||
914 | }; | ||
915 | |||
814 | /* TRACE_CTX an TRACE_WAKE */ | 916 | /* TRACE_CTX an TRACE_WAKE */ |
815 | static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, | 917 | static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, |
816 | char *delim) | 918 | char *delim) |
@@ -839,13 +941,14 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, | |||
839 | return TRACE_TYPE_HANDLED; | 941 | return TRACE_TYPE_HANDLED; |
840 | } | 942 | } |
841 | 943 | ||
842 | static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags) | 944 | static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags, |
945 | struct trace_event *event) | ||
843 | { | 946 | { |
844 | return trace_ctxwake_print(iter, "==>"); | 947 | return trace_ctxwake_print(iter, "==>"); |
845 | } | 948 | } |
846 | 949 | ||
847 | static enum print_line_t trace_wake_print(struct trace_iterator *iter, | 950 | static enum print_line_t trace_wake_print(struct trace_iterator *iter, |
848 | int flags) | 951 | int flags, struct trace_event *event) |
849 | { | 952 | { |
850 | return trace_ctxwake_print(iter, " +"); | 953 | return trace_ctxwake_print(iter, " +"); |
851 | } | 954 | } |
@@ -858,7 +961,7 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) | |||
858 | trace_assign_type(field, iter->ent); | 961 | trace_assign_type(field, iter->ent); |
859 | 962 | ||
860 | if (!S) | 963 | if (!S) |
861 | task_state_char(field->prev_state); | 964 | S = task_state_char(field->prev_state); |
862 | T = task_state_char(field->next_state); | 965 | T = task_state_char(field->next_state); |
863 | if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", | 966 | if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", |
864 | field->prev_pid, | 967 | field->prev_pid, |
@@ -873,12 +976,14 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) | |||
873 | return TRACE_TYPE_HANDLED; | 976 | return TRACE_TYPE_HANDLED; |
874 | } | 977 | } |
875 | 978 | ||
876 | static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags) | 979 | static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags, |
980 | struct trace_event *event) | ||
877 | { | 981 | { |
878 | return trace_ctxwake_raw(iter, 0); | 982 | return trace_ctxwake_raw(iter, 0); |
879 | } | 983 | } |
880 | 984 | ||
881 | static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags) | 985 | static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags, |
986 | struct trace_event *event) | ||
882 | { | 987 | { |
883 | return trace_ctxwake_raw(iter, '+'); | 988 | return trace_ctxwake_raw(iter, '+'); |
884 | } | 989 | } |
@@ -893,7 +998,7 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) | |||
893 | trace_assign_type(field, iter->ent); | 998 | trace_assign_type(field, iter->ent); |
894 | 999 | ||
895 | if (!S) | 1000 | if (!S) |
896 | task_state_char(field->prev_state); | 1001 | S = task_state_char(field->prev_state); |
897 | T = task_state_char(field->next_state); | 1002 | T = task_state_char(field->next_state); |
898 | 1003 | ||
899 | SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); | 1004 | SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); |
@@ -907,18 +1012,20 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) | |||
907 | return TRACE_TYPE_HANDLED; | 1012 | return TRACE_TYPE_HANDLED; |
908 | } | 1013 | } |
909 | 1014 | ||
910 | static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags) | 1015 | static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags, |
1016 | struct trace_event *event) | ||
911 | { | 1017 | { |
912 | return trace_ctxwake_hex(iter, 0); | 1018 | return trace_ctxwake_hex(iter, 0); |
913 | } | 1019 | } |
914 | 1020 | ||
915 | static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags) | 1021 | static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags, |
1022 | struct trace_event *event) | ||
916 | { | 1023 | { |
917 | return trace_ctxwake_hex(iter, '+'); | 1024 | return trace_ctxwake_hex(iter, '+'); |
918 | } | 1025 | } |
919 | 1026 | ||
920 | static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, | 1027 | static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, |
921 | int flags) | 1028 | int flags, struct trace_event *event) |
922 | { | 1029 | { |
923 | struct ctx_switch_entry *field; | 1030 | struct ctx_switch_entry *field; |
924 | struct trace_seq *s = &iter->seq; | 1031 | struct trace_seq *s = &iter->seq; |
@@ -935,81 +1042,34 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, | |||
935 | return TRACE_TYPE_HANDLED; | 1042 | return TRACE_TYPE_HANDLED; |
936 | } | 1043 | } |
937 | 1044 | ||
938 | static struct trace_event trace_ctx_event = { | 1045 | static struct trace_event_functions trace_ctx_funcs = { |
939 | .type = TRACE_CTX, | ||
940 | .trace = trace_ctx_print, | 1046 | .trace = trace_ctx_print, |
941 | .raw = trace_ctx_raw, | 1047 | .raw = trace_ctx_raw, |
942 | .hex = trace_ctx_hex, | 1048 | .hex = trace_ctx_hex, |
943 | .binary = trace_ctxwake_bin, | 1049 | .binary = trace_ctxwake_bin, |
944 | }; | 1050 | }; |
945 | 1051 | ||
946 | static struct trace_event trace_wake_event = { | 1052 | static struct trace_event trace_ctx_event = { |
947 | .type = TRACE_WAKE, | 1053 | .type = TRACE_CTX, |
1054 | .funcs = &trace_ctx_funcs, | ||
1055 | }; | ||
1056 | |||
1057 | static struct trace_event_functions trace_wake_funcs = { | ||
948 | .trace = trace_wake_print, | 1058 | .trace = trace_wake_print, |
949 | .raw = trace_wake_raw, | 1059 | .raw = trace_wake_raw, |
950 | .hex = trace_wake_hex, | 1060 | .hex = trace_wake_hex, |
951 | .binary = trace_ctxwake_bin, | 1061 | .binary = trace_ctxwake_bin, |
952 | }; | 1062 | }; |
953 | 1063 | ||
954 | /* TRACE_SPECIAL */ | 1064 | static struct trace_event trace_wake_event = { |
955 | static enum print_line_t trace_special_print(struct trace_iterator *iter, | 1065 | .type = TRACE_WAKE, |
956 | int flags) | 1066 | .funcs = &trace_wake_funcs, |
957 | { | ||
958 | struct special_entry *field; | ||
959 | |||
960 | trace_assign_type(field, iter->ent); | ||
961 | |||
962 | if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n", | ||
963 | field->arg1, | ||
964 | field->arg2, | ||
965 | field->arg3)) | ||
966 | return TRACE_TYPE_PARTIAL_LINE; | ||
967 | |||
968 | return TRACE_TYPE_HANDLED; | ||
969 | } | ||
970 | |||
971 | static enum print_line_t trace_special_hex(struct trace_iterator *iter, | ||
972 | int flags) | ||
973 | { | ||
974 | struct special_entry *field; | ||
975 | struct trace_seq *s = &iter->seq; | ||
976 | |||
977 | trace_assign_type(field, iter->ent); | ||
978 | |||
979 | SEQ_PUT_HEX_FIELD_RET(s, field->arg1); | ||
980 | SEQ_PUT_HEX_FIELD_RET(s, field->arg2); | ||
981 | SEQ_PUT_HEX_FIELD_RET(s, field->arg3); | ||
982 | |||
983 | return TRACE_TYPE_HANDLED; | ||
984 | } | ||
985 | |||
986 | static enum print_line_t trace_special_bin(struct trace_iterator *iter, | ||
987 | int flags) | ||
988 | { | ||
989 | struct special_entry *field; | ||
990 | struct trace_seq *s = &iter->seq; | ||
991 | |||
992 | trace_assign_type(field, iter->ent); | ||
993 | |||
994 | SEQ_PUT_FIELD_RET(s, field->arg1); | ||
995 | SEQ_PUT_FIELD_RET(s, field->arg2); | ||
996 | SEQ_PUT_FIELD_RET(s, field->arg3); | ||
997 | |||
998 | return TRACE_TYPE_HANDLED; | ||
999 | } | ||
1000 | |||
1001 | static struct trace_event trace_special_event = { | ||
1002 | .type = TRACE_SPECIAL, | ||
1003 | .trace = trace_special_print, | ||
1004 | .raw = trace_special_print, | ||
1005 | .hex = trace_special_hex, | ||
1006 | .binary = trace_special_bin, | ||
1007 | }; | 1067 | }; |
1008 | 1068 | ||
1009 | /* TRACE_STACK */ | 1069 | /* TRACE_STACK */ |
1010 | 1070 | ||
1011 | static enum print_line_t trace_stack_print(struct trace_iterator *iter, | 1071 | static enum print_line_t trace_stack_print(struct trace_iterator *iter, |
1012 | int flags) | 1072 | int flags, struct trace_event *event) |
1013 | { | 1073 | { |
1014 | struct stack_entry *field; | 1074 | struct stack_entry *field; |
1015 | struct trace_seq *s = &iter->seq; | 1075 | struct trace_seq *s = &iter->seq; |
@@ -1037,17 +1097,18 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, | |||
1037 | return TRACE_TYPE_PARTIAL_LINE; | 1097 | return TRACE_TYPE_PARTIAL_LINE; |
1038 | } | 1098 | } |
1039 | 1099 | ||
1100 | static struct trace_event_functions trace_stack_funcs = { | ||
1101 | .trace = trace_stack_print, | ||
1102 | }; | ||
1103 | |||
1040 | static struct trace_event trace_stack_event = { | 1104 | static struct trace_event trace_stack_event = { |
1041 | .type = TRACE_STACK, | 1105 | .type = TRACE_STACK, |
1042 | .trace = trace_stack_print, | 1106 | .funcs = &trace_stack_funcs, |
1043 | .raw = trace_special_print, | ||
1044 | .hex = trace_special_hex, | ||
1045 | .binary = trace_special_bin, | ||
1046 | }; | 1107 | }; |
1047 | 1108 | ||
1048 | /* TRACE_USER_STACK */ | 1109 | /* TRACE_USER_STACK */ |
1049 | static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, | 1110 | static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, |
1050 | int flags) | 1111 | int flags, struct trace_event *event) |
1051 | { | 1112 | { |
1052 | struct userstack_entry *field; | 1113 | struct userstack_entry *field; |
1053 | struct trace_seq *s = &iter->seq; | 1114 | struct trace_seq *s = &iter->seq; |
@@ -1066,17 +1127,19 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, | |||
1066 | return TRACE_TYPE_PARTIAL_LINE; | 1127 | return TRACE_TYPE_PARTIAL_LINE; |
1067 | } | 1128 | } |
1068 | 1129 | ||
1130 | static struct trace_event_functions trace_user_stack_funcs = { | ||
1131 | .trace = trace_user_stack_print, | ||
1132 | }; | ||
1133 | |||
1069 | static struct trace_event trace_user_stack_event = { | 1134 | static struct trace_event trace_user_stack_event = { |
1070 | .type = TRACE_USER_STACK, | 1135 | .type = TRACE_USER_STACK, |
1071 | .trace = trace_user_stack_print, | 1136 | .funcs = &trace_user_stack_funcs, |
1072 | .raw = trace_special_print, | ||
1073 | .hex = trace_special_hex, | ||
1074 | .binary = trace_special_bin, | ||
1075 | }; | 1137 | }; |
1076 | 1138 | ||
1077 | /* TRACE_BPRINT */ | 1139 | /* TRACE_BPRINT */ |
1078 | static enum print_line_t | 1140 | static enum print_line_t |
1079 | trace_bprint_print(struct trace_iterator *iter, int flags) | 1141 | trace_bprint_print(struct trace_iterator *iter, int flags, |
1142 | struct trace_event *event) | ||
1080 | { | 1143 | { |
1081 | struct trace_entry *entry = iter->ent; | 1144 | struct trace_entry *entry = iter->ent; |
1082 | struct trace_seq *s = &iter->seq; | 1145 | struct trace_seq *s = &iter->seq; |
@@ -1101,7 +1164,8 @@ trace_bprint_print(struct trace_iterator *iter, int flags) | |||
1101 | 1164 | ||
1102 | 1165 | ||
1103 | static enum print_line_t | 1166 | static enum print_line_t |
1104 | trace_bprint_raw(struct trace_iterator *iter, int flags) | 1167 | trace_bprint_raw(struct trace_iterator *iter, int flags, |
1168 | struct trace_event *event) | ||
1105 | { | 1169 | { |
1106 | struct bprint_entry *field; | 1170 | struct bprint_entry *field; |
1107 | struct trace_seq *s = &iter->seq; | 1171 | struct trace_seq *s = &iter->seq; |
@@ -1120,16 +1184,19 @@ trace_bprint_raw(struct trace_iterator *iter, int flags) | |||
1120 | return TRACE_TYPE_PARTIAL_LINE; | 1184 | return TRACE_TYPE_PARTIAL_LINE; |
1121 | } | 1185 | } |
1122 | 1186 | ||
1187 | static struct trace_event_functions trace_bprint_funcs = { | ||
1188 | .trace = trace_bprint_print, | ||
1189 | .raw = trace_bprint_raw, | ||
1190 | }; | ||
1123 | 1191 | ||
1124 | static struct trace_event trace_bprint_event = { | 1192 | static struct trace_event trace_bprint_event = { |
1125 | .type = TRACE_BPRINT, | 1193 | .type = TRACE_BPRINT, |
1126 | .trace = trace_bprint_print, | 1194 | .funcs = &trace_bprint_funcs, |
1127 | .raw = trace_bprint_raw, | ||
1128 | }; | 1195 | }; |
1129 | 1196 | ||
1130 | /* TRACE_PRINT */ | 1197 | /* TRACE_PRINT */ |
1131 | static enum print_line_t trace_print_print(struct trace_iterator *iter, | 1198 | static enum print_line_t trace_print_print(struct trace_iterator *iter, |
1132 | int flags) | 1199 | int flags, struct trace_event *event) |
1133 | { | 1200 | { |
1134 | struct print_entry *field; | 1201 | struct print_entry *field; |
1135 | struct trace_seq *s = &iter->seq; | 1202 | struct trace_seq *s = &iter->seq; |
@@ -1148,7 +1215,8 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, | |||
1148 | return TRACE_TYPE_PARTIAL_LINE; | 1215 | return TRACE_TYPE_PARTIAL_LINE; |
1149 | } | 1216 | } |
1150 | 1217 | ||
1151 | static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) | 1218 | static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, |
1219 | struct trace_event *event) | ||
1152 | { | 1220 | { |
1153 | struct print_entry *field; | 1221 | struct print_entry *field; |
1154 | 1222 | ||
@@ -1163,18 +1231,21 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) | |||
1163 | return TRACE_TYPE_PARTIAL_LINE; | 1231 | return TRACE_TYPE_PARTIAL_LINE; |
1164 | } | 1232 | } |
1165 | 1233 | ||
1166 | static struct trace_event trace_print_event = { | 1234 | static struct trace_event_functions trace_print_funcs = { |
1167 | .type = TRACE_PRINT, | ||
1168 | .trace = trace_print_print, | 1235 | .trace = trace_print_print, |
1169 | .raw = trace_print_raw, | 1236 | .raw = trace_print_raw, |
1170 | }; | 1237 | }; |
1171 | 1238 | ||
1239 | static struct trace_event trace_print_event = { | ||
1240 | .type = TRACE_PRINT, | ||
1241 | .funcs = &trace_print_funcs, | ||
1242 | }; | ||
1243 | |||
1172 | 1244 | ||
1173 | static struct trace_event *events[] __initdata = { | 1245 | static struct trace_event *events[] __initdata = { |
1174 | &trace_fn_event, | 1246 | &trace_fn_event, |
1175 | &trace_ctx_event, | 1247 | &trace_ctx_event, |
1176 | &trace_wake_event, | 1248 | &trace_wake_event, |
1177 | &trace_special_event, | ||
1178 | &trace_stack_event, | 1249 | &trace_stack_event, |
1179 | &trace_user_stack_event, | 1250 | &trace_user_stack_event, |
1180 | &trace_bprint_event, | 1251 | &trace_bprint_event, |
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index d38bec4a9c30..c038eba0492b 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h | |||
@@ -25,7 +25,9 @@ extern void trace_event_read_unlock(void); | |||
25 | extern struct trace_event *ftrace_find_event(int type); | 25 | extern struct trace_event *ftrace_find_event(int type); |
26 | 26 | ||
27 | extern enum print_line_t trace_nop_print(struct trace_iterator *iter, | 27 | extern enum print_line_t trace_nop_print(struct trace_iterator *iter, |
28 | int flags); | 28 | int flags, struct trace_event *event); |
29 | extern int | ||
30 | trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); | ||
29 | 31 | ||
30 | /* used by module unregistering */ | 32 | /* used by module unregistering */ |
31 | extern int __unregister_ftrace_event(struct trace_event *event); | 33 | extern int __unregister_ftrace_event(struct trace_event *event); |
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c deleted file mode 100644 index 8a30d9874cd4..000000000000 --- a/kernel/trace/trace_power.c +++ /dev/null | |||
@@ -1,214 +0,0 @@ | |||
1 | /* | ||
2 | * ring buffer based C-state tracer | ||
3 | * | ||
4 | * Arjan van de Ven <arjan@linux.intel.com> | ||
5 | * Copyright (C) 2008 Intel Corporation | ||
6 | * | ||
7 | * Much is borrowed from trace_boot.c which is | ||
8 | * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | #include <linux/debugfs.h> | ||
14 | #include <trace/power.h> | ||
15 | #include <linux/kallsyms.h> | ||
16 | #include <linux/module.h> | ||
17 | |||
18 | #include "trace.h" | ||
19 | #include "trace_output.h" | ||
20 | |||
21 | static struct trace_array *power_trace; | ||
22 | static int __read_mostly trace_power_enabled; | ||
23 | |||
24 | static void probe_power_start(struct power_trace *it, unsigned int type, | ||
25 | unsigned int level) | ||
26 | { | ||
27 | if (!trace_power_enabled) | ||
28 | return; | ||
29 | |||
30 | memset(it, 0, sizeof(struct power_trace)); | ||
31 | it->state = level; | ||
32 | it->type = type; | ||
33 | it->stamp = ktime_get(); | ||
34 | } | ||
35 | |||
36 | |||
37 | static void probe_power_end(struct power_trace *it) | ||
38 | { | ||
39 | struct ftrace_event_call *call = &event_power; | ||
40 | struct ring_buffer_event *event; | ||
41 | struct trace_power *entry; | ||
42 | struct trace_array_cpu *data; | ||
43 | struct trace_array *tr = power_trace; | ||
44 | |||
45 | if (!trace_power_enabled) | ||
46 | return; | ||
47 | |||
48 | preempt_disable(); | ||
49 | it->end = ktime_get(); | ||
50 | data = tr->data[smp_processor_id()]; | ||
51 | |||
52 | event = trace_buffer_lock_reserve(tr, TRACE_POWER, | ||
53 | sizeof(*entry), 0, 0); | ||
54 | if (!event) | ||
55 | goto out; | ||
56 | entry = ring_buffer_event_data(event); | ||
57 | entry->state_data = *it; | ||
58 | if (!filter_check_discard(call, entry, tr->buffer, event)) | ||
59 | trace_buffer_unlock_commit(tr, event, 0, 0); | ||
60 | out: | ||
61 | preempt_enable(); | ||
62 | } | ||
63 | |||
64 | static void probe_power_mark(struct power_trace *it, unsigned int type, | ||
65 | unsigned int level) | ||
66 | { | ||
67 | struct ftrace_event_call *call = &event_power; | ||
68 | struct ring_buffer_event *event; | ||
69 | struct trace_power *entry; | ||
70 | struct trace_array_cpu *data; | ||
71 | struct trace_array *tr = power_trace; | ||
72 | |||
73 | if (!trace_power_enabled) | ||
74 | return; | ||
75 | |||
76 | memset(it, 0, sizeof(struct power_trace)); | ||
77 | it->state = level; | ||
78 | it->type = type; | ||
79 | it->stamp = ktime_get(); | ||
80 | preempt_disable(); | ||
81 | it->end = it->stamp; | ||
82 | data = tr->data[smp_processor_id()]; | ||
83 | |||
84 | event = trace_buffer_lock_reserve(tr, TRACE_POWER, | ||
85 | sizeof(*entry), 0, 0); | ||
86 | if (!event) | ||
87 | goto out; | ||
88 | entry = ring_buffer_event_data(event); | ||
89 | entry->state_data = *it; | ||
90 | if (!filter_check_discard(call, entry, tr->buffer, event)) | ||
91 | trace_buffer_unlock_commit(tr, event, 0, 0); | ||
92 | out: | ||
93 | preempt_enable(); | ||
94 | } | ||
95 | |||
96 | static int tracing_power_register(void) | ||
97 | { | ||
98 | int ret; | ||
99 | |||
100 | ret = register_trace_power_start(probe_power_start); | ||
101 | if (ret) { | ||
102 | pr_info("power trace: Couldn't activate tracepoint" | ||
103 | " probe to trace_power_start\n"); | ||
104 | return ret; | ||
105 | } | ||
106 | ret = register_trace_power_end(probe_power_end); | ||
107 | if (ret) { | ||
108 | pr_info("power trace: Couldn't activate tracepoint" | ||
109 | " probe to trace_power_end\n"); | ||
110 | goto fail_start; | ||
111 | } | ||
112 | ret = register_trace_power_mark(probe_power_mark); | ||
113 | if (ret) { | ||
114 | pr_info("power trace: Couldn't activate tracepoint" | ||
115 | " probe to trace_power_mark\n"); | ||
116 | goto fail_end; | ||
117 | } | ||
118 | return ret; | ||
119 | fail_end: | ||
120 | unregister_trace_power_end(probe_power_end); | ||
121 | fail_start: | ||
122 | unregister_trace_power_start(probe_power_start); | ||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | static void start_power_trace(struct trace_array *tr) | ||
127 | { | ||
128 | trace_power_enabled = 1; | ||
129 | } | ||
130 | |||
131 | static void stop_power_trace(struct trace_array *tr) | ||
132 | { | ||
133 | trace_power_enabled = 0; | ||
134 | } | ||
135 | |||
136 | static void power_trace_reset(struct trace_array *tr) | ||
137 | { | ||
138 | trace_power_enabled = 0; | ||
139 | unregister_trace_power_start(probe_power_start); | ||
140 | unregister_trace_power_end(probe_power_end); | ||
141 | unregister_trace_power_mark(probe_power_mark); | ||
142 | } | ||
143 | |||
144 | |||
145 | static int power_trace_init(struct trace_array *tr) | ||
146 | { | ||
147 | int cpu; | ||
148 | power_trace = tr; | ||
149 | |||
150 | trace_power_enabled = 1; | ||
151 | tracing_power_register(); | ||
152 | |||
153 | for_each_cpu(cpu, cpu_possible_mask) | ||
154 | tracing_reset(tr, cpu); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | static enum print_line_t power_print_line(struct trace_iterator *iter) | ||
159 | { | ||
160 | int ret = 0; | ||
161 | struct trace_entry *entry = iter->ent; | ||
162 | struct trace_power *field ; | ||
163 | struct power_trace *it; | ||
164 | struct trace_seq *s = &iter->seq; | ||
165 | struct timespec stamp; | ||
166 | struct timespec duration; | ||
167 | |||
168 | trace_assign_type(field, entry); | ||
169 | it = &field->state_data; | ||
170 | stamp = ktime_to_timespec(it->stamp); | ||
171 | duration = ktime_to_timespec(ktime_sub(it->end, it->stamp)); | ||
172 | |||
173 | if (entry->type == TRACE_POWER) { | ||
174 | if (it->type == POWER_CSTATE) | ||
175 | ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n", | ||
176 | stamp.tv_sec, | ||
177 | stamp.tv_nsec, | ||
178 | it->state, iter->cpu, | ||
179 | duration.tv_sec, | ||
180 | duration.tv_nsec); | ||
181 | if (it->type == POWER_PSTATE) | ||
182 | ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n", | ||
183 | stamp.tv_sec, | ||
184 | stamp.tv_nsec, | ||
185 | it->state, iter->cpu); | ||
186 | if (!ret) | ||
187 | return TRACE_TYPE_PARTIAL_LINE; | ||
188 | return TRACE_TYPE_HANDLED; | ||
189 | } | ||
190 | return TRACE_TYPE_UNHANDLED; | ||
191 | } | ||
192 | |||
193 | static void power_print_header(struct seq_file *s) | ||
194 | { | ||
195 | seq_puts(s, "# TIMESTAMP STATE EVENT\n"); | ||
196 | seq_puts(s, "# | | |\n"); | ||
197 | } | ||
198 | |||
199 | static struct tracer power_tracer __read_mostly = | ||
200 | { | ||
201 | .name = "power", | ||
202 | .init = power_trace_init, | ||
203 | .start = start_power_trace, | ||
204 | .stop = stop_power_trace, | ||
205 | .reset = power_trace_reset, | ||
206 | .print_line = power_print_line, | ||
207 | .print_header = power_print_header, | ||
208 | }; | ||
209 | |||
210 | static int init_power_trace(void) | ||
211 | { | ||
212 | return register_tracer(&power_tracer); | ||
213 | } | ||
214 | device_initcall(init_power_trace); | ||
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 9bece9687b62..2547d8813cf0 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | #include <linux/string.h> | 12 | #include <linux/string.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/marker.h> | ||
15 | #include <linux/mutex.h> | 14 | #include <linux/mutex.h> |
16 | #include <linux/ctype.h> | 15 | #include <linux/ctype.h> |
17 | #include <linux/list.h> | 16 | #include <linux/list.h> |
@@ -155,25 +154,19 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) | |||
155 | EXPORT_SYMBOL_GPL(__ftrace_vprintk); | 154 | EXPORT_SYMBOL_GPL(__ftrace_vprintk); |
156 | 155 | ||
157 | static void * | 156 | static void * |
158 | t_next(struct seq_file *m, void *v, loff_t *pos) | 157 | t_start(struct seq_file *m, loff_t *pos) |
159 | { | 158 | { |
160 | const char **fmt = m->private; | 159 | const char **fmt = __start___trace_bprintk_fmt + *pos; |
161 | const char **next = fmt; | ||
162 | |||
163 | (*pos)++; | ||
164 | 160 | ||
165 | if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt) | 161 | if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt) |
166 | return NULL; | 162 | return NULL; |
167 | |||
168 | next = fmt; | ||
169 | m->private = ++next; | ||
170 | |||
171 | return fmt; | 163 | return fmt; |
172 | } | 164 | } |
173 | 165 | ||
174 | static void *t_start(struct seq_file *m, loff_t *pos) | 166 | static void *t_next(struct seq_file *m, void * v, loff_t *pos) |
175 | { | 167 | { |
176 | return t_next(m, NULL, pos); | 168 | (*pos)++; |
169 | return t_start(m, pos); | ||
177 | } | 170 | } |
178 | 171 | ||
179 | static int t_show(struct seq_file *m, void *v) | 172 | static int t_show(struct seq_file *m, void *v) |
@@ -182,7 +175,7 @@ static int t_show(struct seq_file *m, void *v) | |||
182 | const char *str = *fmt; | 175 | const char *str = *fmt; |
183 | int i; | 176 | int i; |
184 | 177 | ||
185 | seq_printf(m, "0x%lx : \"", (unsigned long)fmt); | 178 | seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt); |
186 | 179 | ||
187 | /* | 180 | /* |
188 | * Tabs and new lines need to be converted. | 181 | * Tabs and new lines need to be converted. |
@@ -224,15 +217,7 @@ static const struct seq_operations show_format_seq_ops = { | |||
224 | static int | 217 | static int |
225 | ftrace_formats_open(struct inode *inode, struct file *file) | 218 | ftrace_formats_open(struct inode *inode, struct file *file) |
226 | { | 219 | { |
227 | int ret; | 220 | return seq_open(file, &show_format_seq_ops); |
228 | |||
229 | ret = seq_open(file, &show_format_seq_ops); | ||
230 | if (!ret) { | ||
231 | struct seq_file *m = file->private_data; | ||
232 | |||
233 | m->private = __start___trace_bprintk_fmt; | ||
234 | } | ||
235 | return ret; | ||
236 | } | 221 | } |
237 | 222 | ||
238 | static const struct file_operations ftrace_formats_fops = { | 223 | static const struct file_operations ftrace_formats_fops = { |
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index a98106dd979c..8f758d070c43 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c | |||
@@ -20,9 +20,37 @@ static int sched_ref; | |||
20 | static DEFINE_MUTEX(sched_register_mutex); | 20 | static DEFINE_MUTEX(sched_register_mutex); |
21 | static int sched_stopped; | 21 | static int sched_stopped; |
22 | 22 | ||
23 | |||
24 | void | ||
25 | tracing_sched_switch_trace(struct trace_array *tr, | ||
26 | struct task_struct *prev, | ||
27 | struct task_struct *next, | ||
28 | unsigned long flags, int pc) | ||
29 | { | ||
30 | struct ftrace_event_call *call = &event_context_switch; | ||
31 | struct ring_buffer *buffer = tr->buffer; | ||
32 | struct ring_buffer_event *event; | ||
33 | struct ctx_switch_entry *entry; | ||
34 | |||
35 | event = trace_buffer_lock_reserve(buffer, TRACE_CTX, | ||
36 | sizeof(*entry), flags, pc); | ||
37 | if (!event) | ||
38 | return; | ||
39 | entry = ring_buffer_event_data(event); | ||
40 | entry->prev_pid = prev->pid; | ||
41 | entry->prev_prio = prev->prio; | ||
42 | entry->prev_state = prev->state; | ||
43 | entry->next_pid = next->pid; | ||
44 | entry->next_prio = next->prio; | ||
45 | entry->next_state = next->state; | ||
46 | entry->next_cpu = task_cpu(next); | ||
47 | |||
48 | if (!filter_check_discard(call, entry, buffer, event)) | ||
49 | trace_buffer_unlock_commit(buffer, event, flags, pc); | ||
50 | } | ||
51 | |||
23 | static void | 52 | static void |
24 | probe_sched_switch(struct rq *__rq, struct task_struct *prev, | 53 | probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) |
25 | struct task_struct *next) | ||
26 | { | 54 | { |
27 | struct trace_array_cpu *data; | 55 | struct trace_array_cpu *data; |
28 | unsigned long flags; | 56 | unsigned long flags; |
@@ -49,8 +77,38 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, | |||
49 | local_irq_restore(flags); | 77 | local_irq_restore(flags); |
50 | } | 78 | } |
51 | 79 | ||
80 | void | ||
81 | tracing_sched_wakeup_trace(struct trace_array *tr, | ||
82 | struct task_struct *wakee, | ||
83 | struct task_struct *curr, | ||
84 | unsigned long flags, int pc) | ||
85 | { | ||
86 | struct ftrace_event_call *call = &event_wakeup; | ||
87 | struct ring_buffer_event *event; | ||
88 | struct ctx_switch_entry *entry; | ||
89 | struct ring_buffer *buffer = tr->buffer; | ||
90 | |||
91 | event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, | ||
92 | sizeof(*entry), flags, pc); | ||
93 | if (!event) | ||
94 | return; | ||
95 | entry = ring_buffer_event_data(event); | ||
96 | entry->prev_pid = curr->pid; | ||
97 | entry->prev_prio = curr->prio; | ||
98 | entry->prev_state = curr->state; | ||
99 | entry->next_pid = wakee->pid; | ||
100 | entry->next_prio = wakee->prio; | ||
101 | entry->next_state = wakee->state; | ||
102 | entry->next_cpu = task_cpu(wakee); | ||
103 | |||
104 | if (!filter_check_discard(call, entry, buffer, event)) | ||
105 | ring_buffer_unlock_commit(buffer, event); | ||
106 | ftrace_trace_stack(tr->buffer, flags, 6, pc); | ||
107 | ftrace_trace_userstack(tr->buffer, flags, pc); | ||
108 | } | ||
109 | |||
52 | static void | 110 | static void |
53 | probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) | 111 | probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) |
54 | { | 112 | { |
55 | struct trace_array_cpu *data; | 113 | struct trace_array_cpu *data; |
56 | unsigned long flags; | 114 | unsigned long flags; |
@@ -80,21 +138,21 @@ static int tracing_sched_register(void) | |||
80 | { | 138 | { |
81 | int ret; | 139 | int ret; |
82 | 140 | ||
83 | ret = register_trace_sched_wakeup(probe_sched_wakeup); | 141 | ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL); |
84 | if (ret) { | 142 | if (ret) { |
85 | pr_info("wakeup trace: Couldn't activate tracepoint" | 143 | pr_info("wakeup trace: Couldn't activate tracepoint" |
86 | " probe to kernel_sched_wakeup\n"); | 144 | " probe to kernel_sched_wakeup\n"); |
87 | return ret; | 145 | return ret; |
88 | } | 146 | } |
89 | 147 | ||
90 | ret = register_trace_sched_wakeup_new(probe_sched_wakeup); | 148 | ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL); |
91 | if (ret) { | 149 | if (ret) { |
92 | pr_info("wakeup trace: Couldn't activate tracepoint" | 150 | pr_info("wakeup trace: Couldn't activate tracepoint" |
93 | " probe to kernel_sched_wakeup_new\n"); | 151 | " probe to kernel_sched_wakeup_new\n"); |
94 | goto fail_deprobe; | 152 | goto fail_deprobe; |
95 | } | 153 | } |
96 | 154 | ||
97 | ret = register_trace_sched_switch(probe_sched_switch); | 155 | ret = register_trace_sched_switch(probe_sched_switch, NULL); |
98 | if (ret) { | 156 | if (ret) { |
99 | pr_info("sched trace: Couldn't activate tracepoint" | 157 | pr_info("sched trace: Couldn't activate tracepoint" |
100 | " probe to kernel_sched_switch\n"); | 158 | " probe to kernel_sched_switch\n"); |
@@ -103,17 +161,17 @@ static int tracing_sched_register(void) | |||
103 | 161 | ||
104 | return ret; | 162 | return ret; |
105 | fail_deprobe_wake_new: | 163 | fail_deprobe_wake_new: |
106 | unregister_trace_sched_wakeup_new(probe_sched_wakeup); | 164 | unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL); |
107 | fail_deprobe: | 165 | fail_deprobe: |
108 | unregister_trace_sched_wakeup(probe_sched_wakeup); | 166 | unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); |
109 | return ret; | 167 | return ret; |
110 | } | 168 | } |
111 | 169 | ||
112 | static void tracing_sched_unregister(void) | 170 | static void tracing_sched_unregister(void) |
113 | { | 171 | { |
114 | unregister_trace_sched_switch(probe_sched_switch); | 172 | unregister_trace_sched_switch(probe_sched_switch, NULL); |
115 | unregister_trace_sched_wakeup_new(probe_sched_wakeup); | 173 | unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL); |
116 | unregister_trace_sched_wakeup(probe_sched_wakeup); | 174 | unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); |
117 | } | 175 | } |
118 | 176 | ||
119 | static void tracing_start_sched_switch(void) | 177 | static void tracing_start_sched_switch(void) |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index eacb27225173..7319559ed59f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -24,66 +24,106 @@ static int __read_mostly tracer_enabled; | |||
24 | 24 | ||
25 | static struct task_struct *wakeup_task; | 25 | static struct task_struct *wakeup_task; |
26 | static int wakeup_cpu; | 26 | static int wakeup_cpu; |
27 | static int wakeup_current_cpu; | ||
27 | static unsigned wakeup_prio = -1; | 28 | static unsigned wakeup_prio = -1; |
28 | static int wakeup_rt; | 29 | static int wakeup_rt; |
29 | 30 | ||
30 | static raw_spinlock_t wakeup_lock = | 31 | static arch_spinlock_t wakeup_lock = |
31 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 32 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
32 | 33 | ||
34 | static void wakeup_reset(struct trace_array *tr); | ||
33 | static void __wakeup_reset(struct trace_array *tr); | 35 | static void __wakeup_reset(struct trace_array *tr); |
36 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace); | ||
37 | static void wakeup_graph_return(struct ftrace_graph_ret *trace); | ||
34 | 38 | ||
35 | static int save_lat_flag; | 39 | static int save_lat_flag; |
36 | 40 | ||
41 | #define TRACE_DISPLAY_GRAPH 1 | ||
42 | |||
43 | static struct tracer_opt trace_opts[] = { | ||
44 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
45 | /* display latency trace as call graph */ | ||
46 | { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, | ||
47 | #endif | ||
48 | { } /* Empty entry */ | ||
49 | }; | ||
50 | |||
51 | static struct tracer_flags tracer_flags = { | ||
52 | .val = 0, | ||
53 | .opts = trace_opts, | ||
54 | }; | ||
55 | |||
56 | #define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) | ||
57 | |||
37 | #ifdef CONFIG_FUNCTION_TRACER | 58 | #ifdef CONFIG_FUNCTION_TRACER |
59 | |||
38 | /* | 60 | /* |
39 | * irqsoff uses its own tracer function to keep the overhead down: | 61 | * Prologue for the wakeup function tracers. |
62 | * | ||
63 | * Returns 1 if it is OK to continue, and preemption | ||
64 | * is disabled and data->disabled is incremented. | ||
65 | * 0 if the trace is to be ignored, and preemption | ||
66 | * is not disabled and data->disabled is | ||
67 | * kept the same. | ||
68 | * | ||
69 | * Note, this function is also used outside this ifdef but | ||
70 | * inside the #ifdef of the function graph tracer below. | ||
71 | * This is OK, since the function graph tracer is | ||
72 | * dependent on the function tracer. | ||
40 | */ | 73 | */ |
41 | static void | 74 | static int |
42 | wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | 75 | func_prolog_preempt_disable(struct trace_array *tr, |
76 | struct trace_array_cpu **data, | ||
77 | int *pc) | ||
43 | { | 78 | { |
44 | struct trace_array *tr = wakeup_trace; | ||
45 | struct trace_array_cpu *data; | ||
46 | unsigned long flags; | ||
47 | long disabled; | 79 | long disabled; |
48 | int resched; | ||
49 | int cpu; | 80 | int cpu; |
50 | int pc; | ||
51 | 81 | ||
52 | if (likely(!wakeup_task)) | 82 | if (likely(!wakeup_task)) |
53 | return; | 83 | return 0; |
54 | 84 | ||
55 | pc = preempt_count(); | 85 | *pc = preempt_count(); |
56 | resched = ftrace_preempt_disable(); | 86 | preempt_disable_notrace(); |
57 | 87 | ||
58 | cpu = raw_smp_processor_id(); | 88 | cpu = raw_smp_processor_id(); |
59 | data = tr->data[cpu]; | 89 | if (cpu != wakeup_current_cpu) |
60 | disabled = atomic_inc_return(&data->disabled); | 90 | goto out_enable; |
91 | |||
92 | *data = tr->data[cpu]; | ||
93 | disabled = atomic_inc_return(&(*data)->disabled); | ||
61 | if (unlikely(disabled != 1)) | 94 | if (unlikely(disabled != 1)) |
62 | goto out; | 95 | goto out; |
63 | 96 | ||
64 | local_irq_save(flags); | 97 | return 1; |
65 | __raw_spin_lock(&wakeup_lock); | ||
66 | 98 | ||
67 | if (unlikely(!wakeup_task)) | 99 | out: |
68 | goto unlock; | 100 | atomic_dec(&(*data)->disabled); |
69 | 101 | ||
70 | /* | 102 | out_enable: |
71 | * The task can't disappear because it needs to | 103 | preempt_enable_notrace(); |
72 | * wake up first, and we have the wakeup_lock. | 104 | return 0; |
73 | */ | 105 | } |
74 | if (task_cpu(wakeup_task) != cpu) | ||
75 | goto unlock; | ||
76 | 106 | ||
77 | trace_function(tr, ip, parent_ip, flags, pc); | 107 | /* |
108 | * wakeup uses its own tracer function to keep the overhead down: | ||
109 | */ | ||
110 | static void | ||
111 | wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | ||
112 | { | ||
113 | struct trace_array *tr = wakeup_trace; | ||
114 | struct trace_array_cpu *data; | ||
115 | unsigned long flags; | ||
116 | int pc; | ||
117 | |||
118 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
119 | return; | ||
78 | 120 | ||
79 | unlock: | 121 | local_irq_save(flags); |
80 | __raw_spin_unlock(&wakeup_lock); | 122 | trace_function(tr, ip, parent_ip, flags, pc); |
81 | local_irq_restore(flags); | 123 | local_irq_restore(flags); |
82 | 124 | ||
83 | out: | ||
84 | atomic_dec(&data->disabled); | 125 | atomic_dec(&data->disabled); |
85 | 126 | preempt_enable_notrace(); | |
86 | ftrace_preempt_enable(resched); | ||
87 | } | 127 | } |
88 | 128 | ||
89 | static struct ftrace_ops trace_ops __read_mostly = | 129 | static struct ftrace_ops trace_ops __read_mostly = |
@@ -92,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly = | |||
92 | }; | 132 | }; |
93 | #endif /* CONFIG_FUNCTION_TRACER */ | 133 | #endif /* CONFIG_FUNCTION_TRACER */ |
94 | 134 | ||
135 | static int start_func_tracer(int graph) | ||
136 | { | ||
137 | int ret; | ||
138 | |||
139 | if (!graph) | ||
140 | ret = register_ftrace_function(&trace_ops); | ||
141 | else | ||
142 | ret = register_ftrace_graph(&wakeup_graph_return, | ||
143 | &wakeup_graph_entry); | ||
144 | |||
145 | if (!ret && tracing_is_enabled()) | ||
146 | tracer_enabled = 1; | ||
147 | else | ||
148 | tracer_enabled = 0; | ||
149 | |||
150 | return ret; | ||
151 | } | ||
152 | |||
153 | static void stop_func_tracer(int graph) | ||
154 | { | ||
155 | tracer_enabled = 0; | ||
156 | |||
157 | if (!graph) | ||
158 | unregister_ftrace_function(&trace_ops); | ||
159 | else | ||
160 | unregister_ftrace_graph(); | ||
161 | } | ||
162 | |||
163 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
164 | static int wakeup_set_flag(u32 old_flags, u32 bit, int set) | ||
165 | { | ||
166 | |||
167 | if (!(bit & TRACE_DISPLAY_GRAPH)) | ||
168 | return -EINVAL; | ||
169 | |||
170 | if (!(is_graph() ^ set)) | ||
171 | return 0; | ||
172 | |||
173 | stop_func_tracer(!set); | ||
174 | |||
175 | wakeup_reset(wakeup_trace); | ||
176 | tracing_max_latency = 0; | ||
177 | |||
178 | return start_func_tracer(set); | ||
179 | } | ||
180 | |||
181 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace) | ||
182 | { | ||
183 | struct trace_array *tr = wakeup_trace; | ||
184 | struct trace_array_cpu *data; | ||
185 | unsigned long flags; | ||
186 | int pc, ret = 0; | ||
187 | |||
188 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
189 | return 0; | ||
190 | |||
191 | local_save_flags(flags); | ||
192 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
193 | atomic_dec(&data->disabled); | ||
194 | preempt_enable_notrace(); | ||
195 | |||
196 | return ret; | ||
197 | } | ||
198 | |||
199 | static void wakeup_graph_return(struct ftrace_graph_ret *trace) | ||
200 | { | ||
201 | struct trace_array *tr = wakeup_trace; | ||
202 | struct trace_array_cpu *data; | ||
203 | unsigned long flags; | ||
204 | int pc; | ||
205 | |||
206 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
207 | return; | ||
208 | |||
209 | local_save_flags(flags); | ||
210 | __trace_graph_return(tr, trace, flags, pc); | ||
211 | atomic_dec(&data->disabled); | ||
212 | |||
213 | preempt_enable_notrace(); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | static void wakeup_trace_open(struct trace_iterator *iter) | ||
218 | { | ||
219 | if (is_graph()) | ||
220 | graph_trace_open(iter); | ||
221 | } | ||
222 | |||
223 | static void wakeup_trace_close(struct trace_iterator *iter) | ||
224 | { | ||
225 | if (iter->private) | ||
226 | graph_trace_close(iter); | ||
227 | } | ||
228 | |||
229 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) | ||
230 | |||
231 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | ||
232 | { | ||
233 | /* | ||
234 | * In graph mode call the graph tracer output function, | ||
235 | * otherwise go with the TRACE_FN event handler | ||
236 | */ | ||
237 | if (is_graph()) | ||
238 | return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); | ||
239 | |||
240 | return TRACE_TYPE_UNHANDLED; | ||
241 | } | ||
242 | |||
243 | static void wakeup_print_header(struct seq_file *s) | ||
244 | { | ||
245 | if (is_graph()) | ||
246 | print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); | ||
247 | else | ||
248 | trace_default_header(s); | ||
249 | } | ||
250 | |||
251 | static void | ||
252 | __trace_function(struct trace_array *tr, | ||
253 | unsigned long ip, unsigned long parent_ip, | ||
254 | unsigned long flags, int pc) | ||
255 | { | ||
256 | if (is_graph()) | ||
257 | trace_graph_function(tr, ip, parent_ip, flags, pc); | ||
258 | else | ||
259 | trace_function(tr, ip, parent_ip, flags, pc); | ||
260 | } | ||
261 | #else | ||
262 | #define __trace_function trace_function | ||
263 | |||
264 | static int wakeup_set_flag(u32 old_flags, u32 bit, int set) | ||
265 | { | ||
266 | return -EINVAL; | ||
267 | } | ||
268 | |||
269 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace) | ||
270 | { | ||
271 | return -1; | ||
272 | } | ||
273 | |||
274 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | ||
275 | { | ||
276 | return TRACE_TYPE_UNHANDLED; | ||
277 | } | ||
278 | |||
279 | static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } | ||
280 | static void wakeup_print_header(struct seq_file *s) { } | ||
281 | static void wakeup_trace_open(struct trace_iterator *iter) { } | ||
282 | static void wakeup_trace_close(struct trace_iterator *iter) { } | ||
283 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
284 | |||
95 | /* | 285 | /* |
96 | * Should this new latency be reported/recorded? | 286 | * Should this new latency be reported/recorded? |
97 | */ | 287 | */ |
@@ -107,11 +297,19 @@ static int report_latency(cycle_t delta) | |||
107 | return 1; | 297 | return 1; |
108 | } | 298 | } |
109 | 299 | ||
300 | static void | ||
301 | probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu) | ||
302 | { | ||
303 | if (task != wakeup_task) | ||
304 | return; | ||
305 | |||
306 | wakeup_current_cpu = cpu; | ||
307 | } | ||
308 | |||
110 | static void notrace | 309 | static void notrace |
111 | probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | 310 | probe_wakeup_sched_switch(void *ignore, |
112 | struct task_struct *next) | 311 | struct task_struct *prev, struct task_struct *next) |
113 | { | 312 | { |
114 | unsigned long latency = 0, t0 = 0, t1 = 0; | ||
115 | struct trace_array_cpu *data; | 313 | struct trace_array_cpu *data; |
116 | cycle_t T0, T1, delta; | 314 | cycle_t T0, T1, delta; |
117 | unsigned long flags; | 315 | unsigned long flags; |
@@ -145,7 +343,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | |||
145 | goto out; | 343 | goto out; |
146 | 344 | ||
147 | local_irq_save(flags); | 345 | local_irq_save(flags); |
148 | __raw_spin_lock(&wakeup_lock); | 346 | arch_spin_lock(&wakeup_lock); |
149 | 347 | ||
150 | /* We could race with grabbing wakeup_lock */ | 348 | /* We could race with grabbing wakeup_lock */ |
151 | if (unlikely(!tracer_enabled || next != wakeup_task)) | 349 | if (unlikely(!tracer_enabled || next != wakeup_task)) |
@@ -154,13 +352,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | |||
154 | /* The task we are waiting for is waking up */ | 352 | /* The task we are waiting for is waking up */ |
155 | data = wakeup_trace->data[wakeup_cpu]; | 353 | data = wakeup_trace->data[wakeup_cpu]; |
156 | 354 | ||
157 | trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); | 355 | __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); |
158 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); | 356 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); |
159 | 357 | ||
160 | /* | ||
161 | * usecs conversion is slow so we try to delay the conversion | ||
162 | * as long as possible: | ||
163 | */ | ||
164 | T0 = data->preempt_timestamp; | 358 | T0 = data->preempt_timestamp; |
165 | T1 = ftrace_now(cpu); | 359 | T1 = ftrace_now(cpu); |
166 | delta = T1-T0; | 360 | delta = T1-T0; |
@@ -168,17 +362,14 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | |||
168 | if (!report_latency(delta)) | 362 | if (!report_latency(delta)) |
169 | goto out_unlock; | 363 | goto out_unlock; |
170 | 364 | ||
171 | latency = nsecs_to_usecs(delta); | 365 | if (likely(!is_tracing_stopped())) { |
172 | 366 | tracing_max_latency = delta; | |
173 | tracing_max_latency = delta; | 367 | update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); |
174 | t0 = nsecs_to_usecs(T0); | 368 | } |
175 | t1 = nsecs_to_usecs(T1); | ||
176 | |||
177 | update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); | ||
178 | 369 | ||
179 | out_unlock: | 370 | out_unlock: |
180 | __wakeup_reset(wakeup_trace); | 371 | __wakeup_reset(wakeup_trace); |
181 | __raw_spin_unlock(&wakeup_lock); | 372 | arch_spin_unlock(&wakeup_lock); |
182 | local_irq_restore(flags); | 373 | local_irq_restore(flags); |
183 | out: | 374 | out: |
184 | atomic_dec(&wakeup_trace->data[cpu]->disabled); | 375 | atomic_dec(&wakeup_trace->data[cpu]->disabled); |
@@ -186,11 +377,6 @@ out: | |||
186 | 377 | ||
187 | static void __wakeup_reset(struct trace_array *tr) | 378 | static void __wakeup_reset(struct trace_array *tr) |
188 | { | 379 | { |
189 | int cpu; | ||
190 | |||
191 | for_each_possible_cpu(cpu) | ||
192 | tracing_reset(tr, cpu); | ||
193 | |||
194 | wakeup_cpu = -1; | 380 | wakeup_cpu = -1; |
195 | wakeup_prio = -1; | 381 | wakeup_prio = -1; |
196 | 382 | ||
@@ -204,15 +390,17 @@ static void wakeup_reset(struct trace_array *tr) | |||
204 | { | 390 | { |
205 | unsigned long flags; | 391 | unsigned long flags; |
206 | 392 | ||
393 | tracing_reset_online_cpus(tr); | ||
394 | |||
207 | local_irq_save(flags); | 395 | local_irq_save(flags); |
208 | __raw_spin_lock(&wakeup_lock); | 396 | arch_spin_lock(&wakeup_lock); |
209 | __wakeup_reset(tr); | 397 | __wakeup_reset(tr); |
210 | __raw_spin_unlock(&wakeup_lock); | 398 | arch_spin_unlock(&wakeup_lock); |
211 | local_irq_restore(flags); | 399 | local_irq_restore(flags); |
212 | } | 400 | } |
213 | 401 | ||
214 | static void | 402 | static void |
215 | probe_wakeup(struct rq *rq, struct task_struct *p, int success) | 403 | probe_wakeup(void *ignore, struct task_struct *p, int success) |
216 | { | 404 | { |
217 | struct trace_array_cpu *data; | 405 | struct trace_array_cpu *data; |
218 | int cpu = smp_processor_id(); | 406 | int cpu = smp_processor_id(); |
@@ -237,7 +425,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) | |||
237 | goto out; | 425 | goto out; |
238 | 426 | ||
239 | /* interrupts should be off from try_to_wake_up */ | 427 | /* interrupts should be off from try_to_wake_up */ |
240 | __raw_spin_lock(&wakeup_lock); | 428 | arch_spin_lock(&wakeup_lock); |
241 | 429 | ||
242 | /* check for races. */ | 430 | /* check for races. */ |
243 | if (!tracer_enabled || p->prio >= wakeup_prio) | 431 | if (!tracer_enabled || p->prio >= wakeup_prio) |
@@ -247,6 +435,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) | |||
247 | __wakeup_reset(wakeup_trace); | 435 | __wakeup_reset(wakeup_trace); |
248 | 436 | ||
249 | wakeup_cpu = task_cpu(p); | 437 | wakeup_cpu = task_cpu(p); |
438 | wakeup_current_cpu = wakeup_cpu; | ||
250 | wakeup_prio = p->prio; | 439 | wakeup_prio = p->prio; |
251 | 440 | ||
252 | wakeup_task = p; | 441 | wakeup_task = p; |
@@ -263,10 +452,10 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) | |||
263 | * is not called by an assembly function (where as schedule is) | 452 | * is not called by an assembly function (where as schedule is) |
264 | * it should be safe to use it here. | 453 | * it should be safe to use it here. |
265 | */ | 454 | */ |
266 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); | 455 | __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); |
267 | 456 | ||
268 | out_locked: | 457 | out_locked: |
269 | __raw_spin_unlock(&wakeup_lock); | 458 | arch_spin_unlock(&wakeup_lock); |
270 | out: | 459 | out: |
271 | atomic_dec(&wakeup_trace->data[cpu]->disabled); | 460 | atomic_dec(&wakeup_trace->data[cpu]->disabled); |
272 | } | 461 | } |
@@ -275,27 +464,34 @@ static void start_wakeup_tracer(struct trace_array *tr) | |||
275 | { | 464 | { |
276 | int ret; | 465 | int ret; |
277 | 466 | ||
278 | ret = register_trace_sched_wakeup(probe_wakeup); | 467 | ret = register_trace_sched_wakeup(probe_wakeup, NULL); |
279 | if (ret) { | 468 | if (ret) { |
280 | pr_info("wakeup trace: Couldn't activate tracepoint" | 469 | pr_info("wakeup trace: Couldn't activate tracepoint" |
281 | " probe to kernel_sched_wakeup\n"); | 470 | " probe to kernel_sched_wakeup\n"); |
282 | return; | 471 | return; |
283 | } | 472 | } |
284 | 473 | ||
285 | ret = register_trace_sched_wakeup_new(probe_wakeup); | 474 | ret = register_trace_sched_wakeup_new(probe_wakeup, NULL); |
286 | if (ret) { | 475 | if (ret) { |
287 | pr_info("wakeup trace: Couldn't activate tracepoint" | 476 | pr_info("wakeup trace: Couldn't activate tracepoint" |
288 | " probe to kernel_sched_wakeup_new\n"); | 477 | " probe to kernel_sched_wakeup_new\n"); |
289 | goto fail_deprobe; | 478 | goto fail_deprobe; |
290 | } | 479 | } |
291 | 480 | ||
292 | ret = register_trace_sched_switch(probe_wakeup_sched_switch); | 481 | ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL); |
293 | if (ret) { | 482 | if (ret) { |
294 | pr_info("sched trace: Couldn't activate tracepoint" | 483 | pr_info("sched trace: Couldn't activate tracepoint" |
295 | " probe to kernel_sched_switch\n"); | 484 | " probe to kernel_sched_switch\n"); |
296 | goto fail_deprobe_wake_new; | 485 | goto fail_deprobe_wake_new; |
297 | } | 486 | } |
298 | 487 | ||
488 | ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL); | ||
489 | if (ret) { | ||
490 | pr_info("wakeup trace: Couldn't activate tracepoint" | ||
491 | " probe to kernel_sched_migrate_task\n"); | ||
492 | return; | ||
493 | } | ||
494 | |||
299 | wakeup_reset(tr); | 495 | wakeup_reset(tr); |
300 | 496 | ||
301 | /* | 497 | /* |
@@ -307,27 +503,24 @@ static void start_wakeup_tracer(struct trace_array *tr) | |||
307 | */ | 503 | */ |
308 | smp_wmb(); | 504 | smp_wmb(); |
309 | 505 | ||
310 | register_ftrace_function(&trace_ops); | 506 | if (start_func_tracer(is_graph())) |
311 | 507 | printk(KERN_ERR "failed to start wakeup tracer\n"); | |
312 | if (tracing_is_enabled()) | ||
313 | tracer_enabled = 1; | ||
314 | else | ||
315 | tracer_enabled = 0; | ||
316 | 508 | ||
317 | return; | 509 | return; |
318 | fail_deprobe_wake_new: | 510 | fail_deprobe_wake_new: |
319 | unregister_trace_sched_wakeup_new(probe_wakeup); | 511 | unregister_trace_sched_wakeup_new(probe_wakeup, NULL); |
320 | fail_deprobe: | 512 | fail_deprobe: |
321 | unregister_trace_sched_wakeup(probe_wakeup); | 513 | unregister_trace_sched_wakeup(probe_wakeup, NULL); |
322 | } | 514 | } |
323 | 515 | ||
324 | static void stop_wakeup_tracer(struct trace_array *tr) | 516 | static void stop_wakeup_tracer(struct trace_array *tr) |
325 | { | 517 | { |
326 | tracer_enabled = 0; | 518 | tracer_enabled = 0; |
327 | unregister_ftrace_function(&trace_ops); | 519 | stop_func_tracer(is_graph()); |
328 | unregister_trace_sched_switch(probe_wakeup_sched_switch); | 520 | unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); |
329 | unregister_trace_sched_wakeup_new(probe_wakeup); | 521 | unregister_trace_sched_wakeup_new(probe_wakeup, NULL); |
330 | unregister_trace_sched_wakeup(probe_wakeup); | 522 | unregister_trace_sched_wakeup(probe_wakeup, NULL); |
523 | unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL); | ||
331 | } | 524 | } |
332 | 525 | ||
333 | static int __wakeup_tracer_init(struct trace_array *tr) | 526 | static int __wakeup_tracer_init(struct trace_array *tr) |
@@ -382,9 +575,16 @@ static struct tracer wakeup_tracer __read_mostly = | |||
382 | .start = wakeup_tracer_start, | 575 | .start = wakeup_tracer_start, |
383 | .stop = wakeup_tracer_stop, | 576 | .stop = wakeup_tracer_stop, |
384 | .print_max = 1, | 577 | .print_max = 1, |
578 | .print_header = wakeup_print_header, | ||
579 | .print_line = wakeup_print_line, | ||
580 | .flags = &tracer_flags, | ||
581 | .set_flag = wakeup_set_flag, | ||
385 | #ifdef CONFIG_FTRACE_SELFTEST | 582 | #ifdef CONFIG_FTRACE_SELFTEST |
386 | .selftest = trace_selftest_startup_wakeup, | 583 | .selftest = trace_selftest_startup_wakeup, |
387 | #endif | 584 | #endif |
585 | .open = wakeup_trace_open, | ||
586 | .close = wakeup_trace_close, | ||
587 | .use_max_tr = 1, | ||
388 | }; | 588 | }; |
389 | 589 | ||
390 | static struct tracer wakeup_rt_tracer __read_mostly = | 590 | static struct tracer wakeup_rt_tracer __read_mostly = |
@@ -396,9 +596,16 @@ static struct tracer wakeup_rt_tracer __read_mostly = | |||
396 | .stop = wakeup_tracer_stop, | 596 | .stop = wakeup_tracer_stop, |
397 | .wait_pipe = poll_wait_pipe, | 597 | .wait_pipe = poll_wait_pipe, |
398 | .print_max = 1, | 598 | .print_max = 1, |
599 | .print_header = wakeup_print_header, | ||
600 | .print_line = wakeup_print_line, | ||
601 | .flags = &tracer_flags, | ||
602 | .set_flag = wakeup_set_flag, | ||
399 | #ifdef CONFIG_FTRACE_SELFTEST | 603 | #ifdef CONFIG_FTRACE_SELFTEST |
400 | .selftest = trace_selftest_startup_wakeup, | 604 | .selftest = trace_selftest_startup_wakeup, |
401 | #endif | 605 | #endif |
606 | .open = wakeup_trace_open, | ||
607 | .close = wakeup_trace_close, | ||
608 | .use_max_tr = 1, | ||
402 | }; | 609 | }; |
403 | 610 | ||
404 | __init static int init_wakeup_tracer(void) | 611 | __init static int init_wakeup_tracer(void) |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 00dd6485bdd7..659732eba07c 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/stringify.h> | 3 | #include <linux/stringify.h> |
4 | #include <linux/kthread.h> | 4 | #include <linux/kthread.h> |
5 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
6 | #include <linux/slab.h> | ||
6 | 7 | ||
7 | static inline int trace_valid_entry(struct trace_entry *entry) | 8 | static inline int trace_valid_entry(struct trace_entry *entry) |
8 | { | 9 | { |
@@ -12,11 +13,9 @@ static inline int trace_valid_entry(struct trace_entry *entry) | |||
12 | case TRACE_WAKE: | 13 | case TRACE_WAKE: |
13 | case TRACE_STACK: | 14 | case TRACE_STACK: |
14 | case TRACE_PRINT: | 15 | case TRACE_PRINT: |
15 | case TRACE_SPECIAL: | ||
16 | case TRACE_BRANCH: | 16 | case TRACE_BRANCH: |
17 | case TRACE_GRAPH_ENT: | 17 | case TRACE_GRAPH_ENT: |
18 | case TRACE_GRAPH_RET: | 18 | case TRACE_GRAPH_RET: |
19 | case TRACE_HW_BRANCHES: | ||
20 | return 1; | 19 | return 1; |
21 | } | 20 | } |
22 | return 0; | 21 | return 0; |
@@ -28,7 +27,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) | |||
28 | struct trace_entry *entry; | 27 | struct trace_entry *entry; |
29 | unsigned int loops = 0; | 28 | unsigned int loops = 0; |
30 | 29 | ||
31 | while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { | 30 | while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) { |
32 | entry = ring_buffer_event_data(event); | 31 | entry = ring_buffer_event_data(event); |
33 | 32 | ||
34 | /* | 33 | /* |
@@ -66,7 +65,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) | |||
66 | 65 | ||
67 | /* Don't allow flipping of max traces now */ | 66 | /* Don't allow flipping of max traces now */ |
68 | local_irq_save(flags); | 67 | local_irq_save(flags); |
69 | __raw_spin_lock(&ftrace_max_lock); | 68 | arch_spin_lock(&ftrace_max_lock); |
70 | 69 | ||
71 | cnt = ring_buffer_entries(tr->buffer); | 70 | cnt = ring_buffer_entries(tr->buffer); |
72 | 71 | ||
@@ -84,7 +83,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) | |||
84 | break; | 83 | break; |
85 | } | 84 | } |
86 | tracing_on(); | 85 | tracing_on(); |
87 | __raw_spin_unlock(&ftrace_max_lock); | 86 | arch_spin_unlock(&ftrace_max_lock); |
88 | local_irq_restore(flags); | 87 | local_irq_restore(flags); |
89 | 88 | ||
90 | if (count) | 89 | if (count) |
@@ -254,7 +253,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) | |||
254 | /* Maximum number of functions to trace before diagnosing a hang */ | 253 | /* Maximum number of functions to trace before diagnosing a hang */ |
255 | #define GRAPH_MAX_FUNC_TEST 100000000 | 254 | #define GRAPH_MAX_FUNC_TEST 100000000 |
256 | 255 | ||
257 | static void __ftrace_dump(bool disable_tracing); | 256 | static void |
257 | __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode); | ||
258 | static unsigned int graph_hang_thresh; | 258 | static unsigned int graph_hang_thresh; |
259 | 259 | ||
260 | /* Wrap the real function entry probe to avoid possible hanging */ | 260 | /* Wrap the real function entry probe to avoid possible hanging */ |
@@ -265,7 +265,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) | |||
265 | ftrace_graph_stop(); | 265 | ftrace_graph_stop(); |
266 | printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); | 266 | printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); |
267 | if (ftrace_dump_on_oops) | 267 | if (ftrace_dump_on_oops) |
268 | __ftrace_dump(false); | 268 | __ftrace_dump(false, DUMP_ALL); |
269 | return 0; | 269 | return 0; |
270 | } | 270 | } |
271 | 271 | ||
@@ -288,6 +288,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, | |||
288 | * to detect and recover from possible hangs | 288 | * to detect and recover from possible hangs |
289 | */ | 289 | */ |
290 | tracing_reset_online_cpus(tr); | 290 | tracing_reset_online_cpus(tr); |
291 | set_graph_array(tr); | ||
291 | ret = register_ftrace_graph(&trace_graph_return, | 292 | ret = register_ftrace_graph(&trace_graph_return, |
292 | &trace_graph_entry_watchdog); | 293 | &trace_graph_entry_watchdog); |
293 | if (ret) { | 294 | if (ret) { |
@@ -557,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) | |||
557 | static int trace_wakeup_test_thread(void *data) | 558 | static int trace_wakeup_test_thread(void *data) |
558 | { | 559 | { |
559 | /* Make this a RT thread, doesn't need to be too high */ | 560 | /* Make this a RT thread, doesn't need to be too high */ |
560 | struct sched_param param = { .sched_priority = 5 }; | 561 | static const struct sched_param param = { .sched_priority = 5 }; |
561 | struct completion *x = data; | 562 | struct completion *x = data; |
562 | 563 | ||
563 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 564 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
@@ -688,38 +689,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr | |||
688 | } | 689 | } |
689 | #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ | 690 | #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ |
690 | 691 | ||
691 | #ifdef CONFIG_SYSPROF_TRACER | ||
692 | int | ||
693 | trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) | ||
694 | { | ||
695 | unsigned long count; | ||
696 | int ret; | ||
697 | |||
698 | /* start the tracing */ | ||
699 | ret = tracer_init(trace, tr); | ||
700 | if (ret) { | ||
701 | warn_failed_init_tracer(trace, ret); | ||
702 | return ret; | ||
703 | } | ||
704 | |||
705 | /* Sleep for a 1/10 of a second */ | ||
706 | msleep(100); | ||
707 | /* stop the tracing. */ | ||
708 | tracing_stop(); | ||
709 | /* check the trace buffer */ | ||
710 | ret = trace_test_buffer(tr, &count); | ||
711 | trace->reset(tr); | ||
712 | tracing_start(); | ||
713 | |||
714 | if (!ret && !count) { | ||
715 | printk(KERN_CONT ".. no entries found .."); | ||
716 | ret = -1; | ||
717 | } | ||
718 | |||
719 | return ret; | ||
720 | } | ||
721 | #endif /* CONFIG_SYSPROF_TRACER */ | ||
722 | |||
723 | #ifdef CONFIG_BRANCH_TRACER | 692 | #ifdef CONFIG_BRANCH_TRACER |
724 | int | 693 | int |
725 | trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) | 694 | trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) |
@@ -752,58 +721,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) | |||
752 | } | 721 | } |
753 | #endif /* CONFIG_BRANCH_TRACER */ | 722 | #endif /* CONFIG_BRANCH_TRACER */ |
754 | 723 | ||
755 | #ifdef CONFIG_HW_BRANCH_TRACER | ||
756 | int | ||
757 | trace_selftest_startup_hw_branches(struct tracer *trace, | ||
758 | struct trace_array *tr) | ||
759 | { | ||
760 | struct trace_iterator *iter; | ||
761 | struct tracer tracer; | ||
762 | unsigned long count; | ||
763 | int ret; | ||
764 | |||
765 | if (!trace->open) { | ||
766 | printk(KERN_CONT "missing open function..."); | ||
767 | return -1; | ||
768 | } | ||
769 | |||
770 | ret = tracer_init(trace, tr); | ||
771 | if (ret) { | ||
772 | warn_failed_init_tracer(trace, ret); | ||
773 | return ret; | ||
774 | } | ||
775 | |||
776 | /* | ||
777 | * The hw-branch tracer needs to collect the trace from the various | ||
778 | * cpu trace buffers - before tracing is stopped. | ||
779 | */ | ||
780 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | ||
781 | if (!iter) | ||
782 | return -ENOMEM; | ||
783 | |||
784 | memcpy(&tracer, trace, sizeof(tracer)); | ||
785 | |||
786 | iter->trace = &tracer; | ||
787 | iter->tr = tr; | ||
788 | iter->pos = -1; | ||
789 | mutex_init(&iter->mutex); | ||
790 | |||
791 | trace->open(iter); | ||
792 | |||
793 | mutex_destroy(&iter->mutex); | ||
794 | kfree(iter); | ||
795 | |||
796 | tracing_stop(); | ||
797 | |||
798 | ret = trace_test_buffer(tr, &count); | ||
799 | trace->reset(tr); | ||
800 | tracing_start(); | ||
801 | |||
802 | if (!ret && !count) { | ||
803 | printk(KERN_CONT "no entries found.."); | ||
804 | ret = -1; | ||
805 | } | ||
806 | |||
807 | return ret; | ||
808 | } | ||
809 | #endif /* CONFIG_HW_BRANCH_TRACER */ | ||
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 2d7aebd71dbd..4c5dead0c239 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = { | |||
27 | }; | 27 | }; |
28 | 28 | ||
29 | static unsigned long max_stack_size; | 29 | static unsigned long max_stack_size; |
30 | static raw_spinlock_t max_stack_lock = | 30 | static arch_spinlock_t max_stack_lock = |
31 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 31 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
32 | 32 | ||
33 | static int stack_trace_disabled __read_mostly; | 33 | static int stack_trace_disabled __read_mostly; |
34 | static DEFINE_PER_CPU(int, trace_active); | 34 | static DEFINE_PER_CPU(int, trace_active); |
@@ -54,7 +54,7 @@ static inline void check_stack(void) | |||
54 | return; | 54 | return; |
55 | 55 | ||
56 | local_irq_save(flags); | 56 | local_irq_save(flags); |
57 | __raw_spin_lock(&max_stack_lock); | 57 | arch_spin_lock(&max_stack_lock); |
58 | 58 | ||
59 | /* a race could have already updated it */ | 59 | /* a race could have already updated it */ |
60 | if (this_size <= max_stack_size) | 60 | if (this_size <= max_stack_size) |
@@ -103,19 +103,19 @@ static inline void check_stack(void) | |||
103 | } | 103 | } |
104 | 104 | ||
105 | out: | 105 | out: |
106 | __raw_spin_unlock(&max_stack_lock); | 106 | arch_spin_unlock(&max_stack_lock); |
107 | local_irq_restore(flags); | 107 | local_irq_restore(flags); |
108 | } | 108 | } |
109 | 109 | ||
110 | static void | 110 | static void |
111 | stack_trace_call(unsigned long ip, unsigned long parent_ip) | 111 | stack_trace_call(unsigned long ip, unsigned long parent_ip) |
112 | { | 112 | { |
113 | int cpu, resched; | 113 | int cpu; |
114 | 114 | ||
115 | if (unlikely(!ftrace_enabled || stack_trace_disabled)) | 115 | if (unlikely(!ftrace_enabled || stack_trace_disabled)) |
116 | return; | 116 | return; |
117 | 117 | ||
118 | resched = ftrace_preempt_disable(); | 118 | preempt_disable_notrace(); |
119 | 119 | ||
120 | cpu = raw_smp_processor_id(); | 120 | cpu = raw_smp_processor_id(); |
121 | /* no atomic needed, we only modify this variable by this cpu */ | 121 | /* no atomic needed, we only modify this variable by this cpu */ |
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) | |||
127 | out: | 127 | out: |
128 | per_cpu(trace_active, cpu)--; | 128 | per_cpu(trace_active, cpu)--; |
129 | /* prevent recursion in schedule */ | 129 | /* prevent recursion in schedule */ |
130 | ftrace_preempt_enable(resched); | 130 | preempt_enable_notrace(); |
131 | } | 131 | } |
132 | 132 | ||
133 | static struct ftrace_ops trace_ops __read_mostly = | 133 | static struct ftrace_ops trace_ops __read_mostly = |
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, | |||
157 | unsigned long val, flags; | 157 | unsigned long val, flags; |
158 | char buf[64]; | 158 | char buf[64]; |
159 | int ret; | 159 | int ret; |
160 | int cpu; | ||
160 | 161 | ||
161 | if (count >= sizeof(buf)) | 162 | if (count >= sizeof(buf)) |
162 | return -EINVAL; | 163 | return -EINVAL; |
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, | |||
171 | return ret; | 172 | return ret; |
172 | 173 | ||
173 | local_irq_save(flags); | 174 | local_irq_save(flags); |
174 | __raw_spin_lock(&max_stack_lock); | 175 | |
176 | /* | ||
177 | * In case we trace inside arch_spin_lock() or after (NMI), | ||
178 | * we will cause circular lock, so we also need to increase | ||
179 | * the percpu trace_active here. | ||
180 | */ | ||
181 | cpu = smp_processor_id(); | ||
182 | per_cpu(trace_active, cpu)++; | ||
183 | |||
184 | arch_spin_lock(&max_stack_lock); | ||
175 | *ptr = val; | 185 | *ptr = val; |
176 | __raw_spin_unlock(&max_stack_lock); | 186 | arch_spin_unlock(&max_stack_lock); |
187 | |||
188 | per_cpu(trace_active, cpu)--; | ||
177 | local_irq_restore(flags); | 189 | local_irq_restore(flags); |
178 | 190 | ||
179 | return count; | 191 | return count; |
@@ -183,66 +195,62 @@ static const struct file_operations stack_max_size_fops = { | |||
183 | .open = tracing_open_generic, | 195 | .open = tracing_open_generic, |
184 | .read = stack_max_size_read, | 196 | .read = stack_max_size_read, |
185 | .write = stack_max_size_write, | 197 | .write = stack_max_size_write, |
198 | .llseek = default_llseek, | ||
186 | }; | 199 | }; |
187 | 200 | ||
188 | static void * | 201 | static void * |
189 | t_next(struct seq_file *m, void *v, loff_t *pos) | 202 | __next(struct seq_file *m, loff_t *pos) |
190 | { | 203 | { |
191 | long i; | 204 | long n = *pos - 1; |
192 | 205 | ||
193 | (*pos)++; | 206 | if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX) |
194 | |||
195 | if (v == SEQ_START_TOKEN) | ||
196 | i = 0; | ||
197 | else { | ||
198 | i = *(long *)v; | ||
199 | i++; | ||
200 | } | ||
201 | |||
202 | if (i >= max_stack_trace.nr_entries || | ||
203 | stack_dump_trace[i] == ULONG_MAX) | ||
204 | return NULL; | 207 | return NULL; |
205 | 208 | ||
206 | m->private = (void *)i; | 209 | m->private = (void *)n; |
207 | |||
208 | return &m->private; | 210 | return &m->private; |
209 | } | 211 | } |
210 | 212 | ||
213 | static void * | ||
214 | t_next(struct seq_file *m, void *v, loff_t *pos) | ||
215 | { | ||
216 | (*pos)++; | ||
217 | return __next(m, pos); | ||
218 | } | ||
219 | |||
211 | static void *t_start(struct seq_file *m, loff_t *pos) | 220 | static void *t_start(struct seq_file *m, loff_t *pos) |
212 | { | 221 | { |
213 | void *t = SEQ_START_TOKEN; | 222 | int cpu; |
214 | loff_t l = 0; | ||
215 | 223 | ||
216 | local_irq_disable(); | 224 | local_irq_disable(); |
217 | __raw_spin_lock(&max_stack_lock); | 225 | |
226 | cpu = smp_processor_id(); | ||
227 | per_cpu(trace_active, cpu)++; | ||
228 | |||
229 | arch_spin_lock(&max_stack_lock); | ||
218 | 230 | ||
219 | if (*pos == 0) | 231 | if (*pos == 0) |
220 | return SEQ_START_TOKEN; | 232 | return SEQ_START_TOKEN; |
221 | 233 | ||
222 | for (; t && l < *pos; t = t_next(m, t, &l)) | 234 | return __next(m, pos); |
223 | ; | ||
224 | |||
225 | return t; | ||
226 | } | 235 | } |
227 | 236 | ||
228 | static void t_stop(struct seq_file *m, void *p) | 237 | static void t_stop(struct seq_file *m, void *p) |
229 | { | 238 | { |
230 | __raw_spin_unlock(&max_stack_lock); | 239 | int cpu; |
240 | |||
241 | arch_spin_unlock(&max_stack_lock); | ||
242 | |||
243 | cpu = smp_processor_id(); | ||
244 | per_cpu(trace_active, cpu)--; | ||
245 | |||
231 | local_irq_enable(); | 246 | local_irq_enable(); |
232 | } | 247 | } |
233 | 248 | ||
234 | static int trace_lookup_stack(struct seq_file *m, long i) | 249 | static int trace_lookup_stack(struct seq_file *m, long i) |
235 | { | 250 | { |
236 | unsigned long addr = stack_dump_trace[i]; | 251 | unsigned long addr = stack_dump_trace[i]; |
237 | #ifdef CONFIG_KALLSYMS | ||
238 | char str[KSYM_SYMBOL_LEN]; | ||
239 | |||
240 | sprint_symbol(str, addr); | ||
241 | 252 | ||
242 | return seq_printf(m, "%s\n", str); | 253 | return seq_printf(m, "%pS\n", (void *)addr); |
243 | #else | ||
244 | return seq_printf(m, "%p\n", (void*)addr); | ||
245 | #endif | ||
246 | } | 254 | } |
247 | 255 | ||
248 | static void print_disabled(struct seq_file *m) | 256 | static void print_disabled(struct seq_file *m) |
@@ -301,35 +309,32 @@ static const struct seq_operations stack_trace_seq_ops = { | |||
301 | 309 | ||
302 | static int stack_trace_open(struct inode *inode, struct file *file) | 310 | static int stack_trace_open(struct inode *inode, struct file *file) |
303 | { | 311 | { |
304 | int ret; | 312 | return seq_open(file, &stack_trace_seq_ops); |
305 | |||
306 | ret = seq_open(file, &stack_trace_seq_ops); | ||
307 | |||
308 | return ret; | ||
309 | } | 313 | } |
310 | 314 | ||
311 | static const struct file_operations stack_trace_fops = { | 315 | static const struct file_operations stack_trace_fops = { |
312 | .open = stack_trace_open, | 316 | .open = stack_trace_open, |
313 | .read = seq_read, | 317 | .read = seq_read, |
314 | .llseek = seq_lseek, | 318 | .llseek = seq_lseek, |
319 | .release = seq_release, | ||
315 | }; | 320 | }; |
316 | 321 | ||
317 | int | 322 | int |
318 | stack_trace_sysctl(struct ctl_table *table, int write, | 323 | stack_trace_sysctl(struct ctl_table *table, int write, |
319 | struct file *file, void __user *buffer, size_t *lenp, | 324 | void __user *buffer, size_t *lenp, |
320 | loff_t *ppos) | 325 | loff_t *ppos) |
321 | { | 326 | { |
322 | int ret; | 327 | int ret; |
323 | 328 | ||
324 | mutex_lock(&stack_sysctl_mutex); | 329 | mutex_lock(&stack_sysctl_mutex); |
325 | 330 | ||
326 | ret = proc_dointvec(table, write, file, buffer, lenp, ppos); | 331 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
327 | 332 | ||
328 | if (ret || !write || | 333 | if (ret || !write || |
329 | (last_stack_tracer_enabled == stack_tracer_enabled)) | 334 | (last_stack_tracer_enabled == !!stack_tracer_enabled)) |
330 | goto out; | 335 | goto out; |
331 | 336 | ||
332 | last_stack_tracer_enabled = stack_tracer_enabled; | 337 | last_stack_tracer_enabled = !!stack_tracer_enabled; |
333 | 338 | ||
334 | if (stack_tracer_enabled) | 339 | if (stack_tracer_enabled) |
335 | register_ftrace_function(&trace_ops); | 340 | register_ftrace_function(&trace_ops); |
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index c00643733f4c..96cffb269e73 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | 11 | ||
12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
13 | #include <linux/slab.h> | ||
13 | #include <linux/rbtree.h> | 14 | #include <linux/rbtree.h> |
14 | #include <linux/debugfs.h> | 15 | #include <linux/debugfs.h> |
15 | #include "trace_stat.h" | 16 | #include "trace_stat.h" |
@@ -49,7 +50,8 @@ static struct dentry *stat_dir; | |||
49 | * but it will at least advance closer to the next one | 50 | * but it will at least advance closer to the next one |
50 | * to be released. | 51 | * to be released. |
51 | */ | 52 | */ |
52 | static struct rb_node *release_next(struct rb_node *node) | 53 | static struct rb_node *release_next(struct tracer_stat *ts, |
54 | struct rb_node *node) | ||
53 | { | 55 | { |
54 | struct stat_node *snode; | 56 | struct stat_node *snode; |
55 | struct rb_node *parent = rb_parent(node); | 57 | struct rb_node *parent = rb_parent(node); |
@@ -67,26 +69,35 @@ static struct rb_node *release_next(struct rb_node *node) | |||
67 | parent->rb_right = NULL; | 69 | parent->rb_right = NULL; |
68 | 70 | ||
69 | snode = container_of(node, struct stat_node, node); | 71 | snode = container_of(node, struct stat_node, node); |
72 | if (ts->stat_release) | ||
73 | ts->stat_release(snode->stat); | ||
70 | kfree(snode); | 74 | kfree(snode); |
71 | 75 | ||
72 | return parent; | 76 | return parent; |
73 | } | 77 | } |
74 | } | 78 | } |
75 | 79 | ||
76 | static void reset_stat_session(struct stat_session *session) | 80 | static void __reset_stat_session(struct stat_session *session) |
77 | { | 81 | { |
78 | struct rb_node *node = session->stat_root.rb_node; | 82 | struct rb_node *node = session->stat_root.rb_node; |
79 | 83 | ||
80 | while (node) | 84 | while (node) |
81 | node = release_next(node); | 85 | node = release_next(session->ts, node); |
82 | 86 | ||
83 | session->stat_root = RB_ROOT; | 87 | session->stat_root = RB_ROOT; |
84 | } | 88 | } |
85 | 89 | ||
90 | static void reset_stat_session(struct stat_session *session) | ||
91 | { | ||
92 | mutex_lock(&session->stat_mutex); | ||
93 | __reset_stat_session(session); | ||
94 | mutex_unlock(&session->stat_mutex); | ||
95 | } | ||
96 | |||
86 | static void destroy_session(struct stat_session *session) | 97 | static void destroy_session(struct stat_session *session) |
87 | { | 98 | { |
88 | debugfs_remove(session->file); | 99 | debugfs_remove(session->file); |
89 | reset_stat_session(session); | 100 | __reset_stat_session(session); |
90 | mutex_destroy(&session->stat_mutex); | 101 | mutex_destroy(&session->stat_mutex); |
91 | kfree(session); | 102 | kfree(session); |
92 | } | 103 | } |
@@ -150,7 +161,7 @@ static int stat_seq_init(struct stat_session *session) | |||
150 | int i; | 161 | int i; |
151 | 162 | ||
152 | mutex_lock(&session->stat_mutex); | 163 | mutex_lock(&session->stat_mutex); |
153 | reset_stat_session(session); | 164 | __reset_stat_session(session); |
154 | 165 | ||
155 | if (!ts->stat_cmp) | 166 | if (!ts->stat_cmp) |
156 | ts->stat_cmp = dummy_cmp; | 167 | ts->stat_cmp = dummy_cmp; |
@@ -183,7 +194,7 @@ exit: | |||
183 | return ret; | 194 | return ret; |
184 | 195 | ||
185 | exit_free_rbtree: | 196 | exit_free_rbtree: |
186 | reset_stat_session(session); | 197 | __reset_stat_session(session); |
187 | mutex_unlock(&session->stat_mutex); | 198 | mutex_unlock(&session->stat_mutex); |
188 | return ret; | 199 | return ret; |
189 | } | 200 | } |
@@ -193,23 +204,23 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos) | |||
193 | { | 204 | { |
194 | struct stat_session *session = s->private; | 205 | struct stat_session *session = s->private; |
195 | struct rb_node *node; | 206 | struct rb_node *node; |
207 | int n = *pos; | ||
196 | int i; | 208 | int i; |
197 | 209 | ||
198 | /* Prevent from tracer switch or rbtree modification */ | 210 | /* Prevent from tracer switch or rbtree modification */ |
199 | mutex_lock(&session->stat_mutex); | 211 | mutex_lock(&session->stat_mutex); |
200 | 212 | ||
201 | /* If we are in the beginning of the file, print the headers */ | 213 | /* If we are in the beginning of the file, print the headers */ |
202 | if (!*pos && session->ts->stat_headers) { | 214 | if (session->ts->stat_headers) { |
203 | (*pos)++; | 215 | if (n == 0) |
204 | return SEQ_START_TOKEN; | 216 | return SEQ_START_TOKEN; |
217 | n--; | ||
205 | } | 218 | } |
206 | 219 | ||
207 | node = rb_first(&session->stat_root); | 220 | node = rb_first(&session->stat_root); |
208 | for (i = 0; node && i < *pos; i++) | 221 | for (i = 0; node && i < n; i++) |
209 | node = rb_next(node); | 222 | node = rb_next(node); |
210 | 223 | ||
211 | (*pos)++; | ||
212 | |||
213 | return node; | 224 | return node; |
214 | } | 225 | } |
215 | 226 | ||
@@ -254,16 +265,21 @@ static const struct seq_operations trace_stat_seq_ops = { | |||
254 | static int tracing_stat_open(struct inode *inode, struct file *file) | 265 | static int tracing_stat_open(struct inode *inode, struct file *file) |
255 | { | 266 | { |
256 | int ret; | 267 | int ret; |
257 | 268 | struct seq_file *m; | |
258 | struct stat_session *session = inode->i_private; | 269 | struct stat_session *session = inode->i_private; |
259 | 270 | ||
271 | ret = stat_seq_init(session); | ||
272 | if (ret) | ||
273 | return ret; | ||
274 | |||
260 | ret = seq_open(file, &trace_stat_seq_ops); | 275 | ret = seq_open(file, &trace_stat_seq_ops); |
261 | if (!ret) { | 276 | if (ret) { |
262 | struct seq_file *m = file->private_data; | 277 | reset_stat_session(session); |
263 | m->private = session; | 278 | return ret; |
264 | ret = stat_seq_init(session); | ||
265 | } | 279 | } |
266 | 280 | ||
281 | m = file->private_data; | ||
282 | m->private = session; | ||
267 | return ret; | 283 | return ret; |
268 | } | 284 | } |
269 | 285 | ||
@@ -274,11 +290,9 @@ static int tracing_stat_release(struct inode *i, struct file *f) | |||
274 | { | 290 | { |
275 | struct stat_session *session = i->i_private; | 291 | struct stat_session *session = i->i_private; |
276 | 292 | ||
277 | mutex_lock(&session->stat_mutex); | ||
278 | reset_stat_session(session); | 293 | reset_stat_session(session); |
279 | mutex_unlock(&session->stat_mutex); | ||
280 | 294 | ||
281 | return 0; | 295 | return seq_release(i, f); |
282 | } | 296 | } |
283 | 297 | ||
284 | static const struct file_operations tracing_stat_fops = { | 298 | static const struct file_operations tracing_stat_fops = { |
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h index f3546a2cd826..8f03914b9a6a 100644 --- a/kernel/trace/trace_stat.h +++ b/kernel/trace/trace_stat.h | |||
@@ -18,6 +18,8 @@ struct tracer_stat { | |||
18 | int (*stat_cmp)(void *p1, void *p2); | 18 | int (*stat_cmp)(void *p1, void *p2); |
19 | /* Print a stat entry */ | 19 | /* Print a stat entry */ |
20 | int (*stat_show)(struct seq_file *s, void *p); | 20 | int (*stat_show)(struct seq_file *s, void *p); |
21 | /* Release an entry */ | ||
22 | void (*stat_release)(void *stat); | ||
21 | /* Print the headers of your stat entries */ | 23 | /* Print the headers of your stat entries */ |
22 | int (*stat_headers)(struct seq_file *s); | 24 | int (*stat_headers)(struct seq_file *s); |
23 | }; | 25 | }; |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5e579645ac86..bac752f0cfb5 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -1,33 +1,109 @@ | |||
1 | #include <trace/syscall.h> | 1 | #include <trace/syscall.h> |
2 | #include <trace/events/syscalls.h> | ||
3 | #include <linux/slab.h> | ||
2 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/ftrace.h> | ||
6 | #include <linux/perf_event.h> | ||
3 | #include <asm/syscall.h> | 7 | #include <asm/syscall.h> |
4 | 8 | ||
5 | #include "trace_output.h" | 9 | #include "trace_output.h" |
6 | #include "trace.h" | 10 | #include "trace.h" |
7 | 11 | ||
8 | /* Keep a counter of the syscall tracing users */ | ||
9 | static int refcount; | ||
10 | |||
11 | /* Prevent from races on thread flags toggling */ | ||
12 | static DEFINE_MUTEX(syscall_trace_lock); | 12 | static DEFINE_MUTEX(syscall_trace_lock); |
13 | static int sys_refcount_enter; | ||
14 | static int sys_refcount_exit; | ||
15 | static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); | ||
16 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); | ||
17 | |||
18 | static int syscall_enter_register(struct ftrace_event_call *event, | ||
19 | enum trace_reg type); | ||
20 | static int syscall_exit_register(struct ftrace_event_call *event, | ||
21 | enum trace_reg type); | ||
22 | |||
23 | static int syscall_enter_define_fields(struct ftrace_event_call *call); | ||
24 | static int syscall_exit_define_fields(struct ftrace_event_call *call); | ||
25 | |||
26 | /* All syscall exit events have the same fields */ | ||
27 | static LIST_HEAD(syscall_exit_fields); | ||
28 | |||
29 | static struct list_head * | ||
30 | syscall_get_enter_fields(struct ftrace_event_call *call) | ||
31 | { | ||
32 | struct syscall_metadata *entry = call->data; | ||
33 | |||
34 | return &entry->enter_fields; | ||
35 | } | ||
36 | |||
37 | static struct list_head * | ||
38 | syscall_get_exit_fields(struct ftrace_event_call *call) | ||
39 | { | ||
40 | return &syscall_exit_fields; | ||
41 | } | ||
42 | |||
43 | struct trace_event_functions enter_syscall_print_funcs = { | ||
44 | .trace = print_syscall_enter, | ||
45 | }; | ||
13 | 46 | ||
14 | /* Option to display the parameters types */ | 47 | struct trace_event_functions exit_syscall_print_funcs = { |
15 | enum { | 48 | .trace = print_syscall_exit, |
16 | TRACE_SYSCALLS_OPT_TYPES = 0x1, | ||
17 | }; | 49 | }; |
18 | 50 | ||
19 | static struct tracer_opt syscalls_opts[] = { | 51 | struct ftrace_event_class event_class_syscall_enter = { |
20 | { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) }, | 52 | .system = "syscalls", |
21 | { } | 53 | .reg = syscall_enter_register, |
54 | .define_fields = syscall_enter_define_fields, | ||
55 | .get_fields = syscall_get_enter_fields, | ||
56 | .raw_init = init_syscall_trace, | ||
22 | }; | 57 | }; |
23 | 58 | ||
24 | static struct tracer_flags syscalls_flags = { | 59 | struct ftrace_event_class event_class_syscall_exit = { |
25 | .val = 0, /* By default: no parameters types */ | 60 | .system = "syscalls", |
26 | .opts = syscalls_opts | 61 | .reg = syscall_exit_register, |
62 | .define_fields = syscall_exit_define_fields, | ||
63 | .get_fields = syscall_get_exit_fields, | ||
64 | .raw_init = init_syscall_trace, | ||
27 | }; | 65 | }; |
28 | 66 | ||
67 | extern unsigned long __start_syscalls_metadata[]; | ||
68 | extern unsigned long __stop_syscalls_metadata[]; | ||
69 | |||
70 | static struct syscall_metadata **syscalls_metadata; | ||
71 | |||
72 | static struct syscall_metadata *find_syscall_meta(unsigned long syscall) | ||
73 | { | ||
74 | struct syscall_metadata *start; | ||
75 | struct syscall_metadata *stop; | ||
76 | char str[KSYM_SYMBOL_LEN]; | ||
77 | |||
78 | |||
79 | start = (struct syscall_metadata *)__start_syscalls_metadata; | ||
80 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | ||
81 | kallsyms_lookup(syscall, NULL, NULL, NULL, str); | ||
82 | |||
83 | for ( ; start < stop; start++) { | ||
84 | /* | ||
85 | * Only compare after the "sys" prefix. Archs that use | ||
86 | * syscall wrappers may have syscalls symbols aliases prefixed | ||
87 | * with "SyS" instead of "sys", leading to an unwanted | ||
88 | * mismatch. | ||
89 | */ | ||
90 | if (start->name && !strcmp(start->name + 3, str + 3)) | ||
91 | return start; | ||
92 | } | ||
93 | return NULL; | ||
94 | } | ||
95 | |||
96 | static struct syscall_metadata *syscall_nr_to_meta(int nr) | ||
97 | { | ||
98 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) | ||
99 | return NULL; | ||
100 | |||
101 | return syscalls_metadata[nr]; | ||
102 | } | ||
103 | |||
29 | enum print_line_t | 104 | enum print_line_t |
30 | print_syscall_enter(struct trace_iterator *iter, int flags) | 105 | print_syscall_enter(struct trace_iterator *iter, int flags, |
106 | struct trace_event *event) | ||
31 | { | 107 | { |
32 | struct trace_seq *s = &iter->seq; | 108 | struct trace_seq *s = &iter->seq; |
33 | struct trace_entry *ent = iter->ent; | 109 | struct trace_entry *ent = iter->ent; |
@@ -35,40 +111,52 @@ print_syscall_enter(struct trace_iterator *iter, int flags) | |||
35 | struct syscall_metadata *entry; | 111 | struct syscall_metadata *entry; |
36 | int i, ret, syscall; | 112 | int i, ret, syscall; |
37 | 113 | ||
38 | trace_assign_type(trace, ent); | 114 | trace = (typeof(trace))ent; |
39 | |||
40 | syscall = trace->nr; | 115 | syscall = trace->nr; |
41 | |||
42 | entry = syscall_nr_to_meta(syscall); | 116 | entry = syscall_nr_to_meta(syscall); |
117 | |||
43 | if (!entry) | 118 | if (!entry) |
44 | goto end; | 119 | goto end; |
45 | 120 | ||
121 | if (entry->enter_event->event.type != ent->type) { | ||
122 | WARN_ON_ONCE(1); | ||
123 | goto end; | ||
124 | } | ||
125 | |||
46 | ret = trace_seq_printf(s, "%s(", entry->name); | 126 | ret = trace_seq_printf(s, "%s(", entry->name); |
47 | if (!ret) | 127 | if (!ret) |
48 | return TRACE_TYPE_PARTIAL_LINE; | 128 | return TRACE_TYPE_PARTIAL_LINE; |
49 | 129 | ||
50 | for (i = 0; i < entry->nb_args; i++) { | 130 | for (i = 0; i < entry->nb_args; i++) { |
51 | /* parameter types */ | 131 | /* parameter types */ |
52 | if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { | 132 | if (trace_flags & TRACE_ITER_VERBOSE) { |
53 | ret = trace_seq_printf(s, "%s ", entry->types[i]); | 133 | ret = trace_seq_printf(s, "%s ", entry->types[i]); |
54 | if (!ret) | 134 | if (!ret) |
55 | return TRACE_TYPE_PARTIAL_LINE; | 135 | return TRACE_TYPE_PARTIAL_LINE; |
56 | } | 136 | } |
57 | /* parameter values */ | 137 | /* parameter values */ |
58 | ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], | 138 | ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], |
59 | trace->args[i], | 139 | trace->args[i], |
60 | i == entry->nb_args - 1 ? ")" : ","); | 140 | i == entry->nb_args - 1 ? "" : ", "); |
61 | if (!ret) | 141 | if (!ret) |
62 | return TRACE_TYPE_PARTIAL_LINE; | 142 | return TRACE_TYPE_PARTIAL_LINE; |
63 | } | 143 | } |
64 | 144 | ||
145 | ret = trace_seq_putc(s, ')'); | ||
146 | if (!ret) | ||
147 | return TRACE_TYPE_PARTIAL_LINE; | ||
148 | |||
65 | end: | 149 | end: |
66 | trace_seq_printf(s, "\n"); | 150 | ret = trace_seq_putc(s, '\n'); |
151 | if (!ret) | ||
152 | return TRACE_TYPE_PARTIAL_LINE; | ||
153 | |||
67 | return TRACE_TYPE_HANDLED; | 154 | return TRACE_TYPE_HANDLED; |
68 | } | 155 | } |
69 | 156 | ||
70 | enum print_line_t | 157 | enum print_line_t |
71 | print_syscall_exit(struct trace_iterator *iter, int flags) | 158 | print_syscall_exit(struct trace_iterator *iter, int flags, |
159 | struct trace_event *event) | ||
72 | { | 160 | { |
73 | struct trace_seq *s = &iter->seq; | 161 | struct trace_seq *s = &iter->seq; |
74 | struct trace_entry *ent = iter->ent; | 162 | struct trace_entry *ent = iter->ent; |
@@ -77,16 +165,20 @@ print_syscall_exit(struct trace_iterator *iter, int flags) | |||
77 | struct syscall_metadata *entry; | 165 | struct syscall_metadata *entry; |
78 | int ret; | 166 | int ret; |
79 | 167 | ||
80 | trace_assign_type(trace, ent); | 168 | trace = (typeof(trace))ent; |
81 | |||
82 | syscall = trace->nr; | 169 | syscall = trace->nr; |
83 | |||
84 | entry = syscall_nr_to_meta(syscall); | 170 | entry = syscall_nr_to_meta(syscall); |
171 | |||
85 | if (!entry) { | 172 | if (!entry) { |
86 | trace_seq_printf(s, "\n"); | 173 | trace_seq_printf(s, "\n"); |
87 | return TRACE_TYPE_HANDLED; | 174 | return TRACE_TYPE_HANDLED; |
88 | } | 175 | } |
89 | 176 | ||
177 | if (entry->exit_event->event.type != ent->type) { | ||
178 | WARN_ON_ONCE(1); | ||
179 | return TRACE_TYPE_UNHANDLED; | ||
180 | } | ||
181 | |||
90 | ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, | 182 | ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, |
91 | trace->ret); | 183 | trace->ret); |
92 | if (!ret) | 184 | if (!ret) |
@@ -95,62 +187,127 @@ print_syscall_exit(struct trace_iterator *iter, int flags) | |||
95 | return TRACE_TYPE_HANDLED; | 187 | return TRACE_TYPE_HANDLED; |
96 | } | 188 | } |
97 | 189 | ||
98 | void start_ftrace_syscalls(void) | 190 | extern char *__bad_type_size(void); |
191 | |||
192 | #define SYSCALL_FIELD(type, name) \ | ||
193 | sizeof(type) != sizeof(trace.name) ? \ | ||
194 | __bad_type_size() : \ | ||
195 | #type, #name, offsetof(typeof(trace), name), \ | ||
196 | sizeof(trace.name), is_signed_type(type) | ||
197 | |||
198 | static | ||
199 | int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) | ||
99 | { | 200 | { |
100 | unsigned long flags; | 201 | int i; |
101 | struct task_struct *g, *t; | 202 | int pos = 0; |
102 | 203 | ||
103 | mutex_lock(&syscall_trace_lock); | 204 | /* When len=0, we just calculate the needed length */ |
205 | #define LEN_OR_ZERO (len ? len - pos : 0) | ||
206 | |||
207 | pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); | ||
208 | for (i = 0; i < entry->nb_args; i++) { | ||
209 | pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", | ||
210 | entry->args[i], sizeof(unsigned long), | ||
211 | i == entry->nb_args - 1 ? "" : ", "); | ||
212 | } | ||
213 | pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); | ||
214 | |||
215 | for (i = 0; i < entry->nb_args; i++) { | ||
216 | pos += snprintf(buf + pos, LEN_OR_ZERO, | ||
217 | ", ((unsigned long)(REC->%s))", entry->args[i]); | ||
218 | } | ||
104 | 219 | ||
105 | /* Don't enable the flag on the tasks twice */ | 220 | #undef LEN_OR_ZERO |
106 | if (++refcount != 1) | ||
107 | goto unlock; | ||
108 | 221 | ||
109 | arch_init_ftrace_syscalls(); | 222 | /* return the length of print_fmt */ |
110 | read_lock_irqsave(&tasklist_lock, flags); | 223 | return pos; |
224 | } | ||
111 | 225 | ||
112 | do_each_thread(g, t) { | 226 | static int set_syscall_print_fmt(struct ftrace_event_call *call) |
113 | set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); | 227 | { |
114 | } while_each_thread(g, t); | 228 | char *print_fmt; |
229 | int len; | ||
230 | struct syscall_metadata *entry = call->data; | ||
115 | 231 | ||
116 | read_unlock_irqrestore(&tasklist_lock, flags); | 232 | if (entry->enter_event != call) { |
233 | call->print_fmt = "\"0x%lx\", REC->ret"; | ||
234 | return 0; | ||
235 | } | ||
117 | 236 | ||
118 | unlock: | 237 | /* First: called with 0 length to calculate the needed length */ |
119 | mutex_unlock(&syscall_trace_lock); | 238 | len = __set_enter_print_fmt(entry, NULL, 0); |
239 | |||
240 | print_fmt = kmalloc(len + 1, GFP_KERNEL); | ||
241 | if (!print_fmt) | ||
242 | return -ENOMEM; | ||
243 | |||
244 | /* Second: actually write the @print_fmt */ | ||
245 | __set_enter_print_fmt(entry, print_fmt, len + 1); | ||
246 | call->print_fmt = print_fmt; | ||
247 | |||
248 | return 0; | ||
120 | } | 249 | } |
121 | 250 | ||
122 | void stop_ftrace_syscalls(void) | 251 | static void free_syscall_print_fmt(struct ftrace_event_call *call) |
123 | { | 252 | { |
124 | unsigned long flags; | 253 | struct syscall_metadata *entry = call->data; |
125 | struct task_struct *g, *t; | ||
126 | 254 | ||
127 | mutex_lock(&syscall_trace_lock); | 255 | if (entry->enter_event == call) |
256 | kfree(call->print_fmt); | ||
257 | } | ||
128 | 258 | ||
129 | /* There are perhaps still some users */ | 259 | static int syscall_enter_define_fields(struct ftrace_event_call *call) |
130 | if (--refcount) | 260 | { |
131 | goto unlock; | 261 | struct syscall_trace_enter trace; |
262 | struct syscall_metadata *meta = call->data; | ||
263 | int ret; | ||
264 | int i; | ||
265 | int offset = offsetof(typeof(trace), args); | ||
266 | |||
267 | ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); | ||
268 | if (ret) | ||
269 | return ret; | ||
270 | |||
271 | for (i = 0; i < meta->nb_args; i++) { | ||
272 | ret = trace_define_field(call, meta->types[i], | ||
273 | meta->args[i], offset, | ||
274 | sizeof(unsigned long), 0, | ||
275 | FILTER_OTHER); | ||
276 | offset += sizeof(unsigned long); | ||
277 | } | ||
132 | 278 | ||
133 | read_lock_irqsave(&tasklist_lock, flags); | 279 | return ret; |
280 | } | ||
134 | 281 | ||
135 | do_each_thread(g, t) { | 282 | static int syscall_exit_define_fields(struct ftrace_event_call *call) |
136 | clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); | 283 | { |
137 | } while_each_thread(g, t); | 284 | struct syscall_trace_exit trace; |
285 | int ret; | ||
138 | 286 | ||
139 | read_unlock_irqrestore(&tasklist_lock, flags); | 287 | ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); |
288 | if (ret) | ||
289 | return ret; | ||
140 | 290 | ||
141 | unlock: | 291 | ret = trace_define_field(call, SYSCALL_FIELD(long, ret), |
142 | mutex_unlock(&syscall_trace_lock); | 292 | FILTER_OTHER); |
293 | |||
294 | return ret; | ||
143 | } | 295 | } |
144 | 296 | ||
145 | void ftrace_syscall_enter(struct pt_regs *regs) | 297 | void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) |
146 | { | 298 | { |
147 | struct syscall_trace_enter *entry; | 299 | struct syscall_trace_enter *entry; |
148 | struct syscall_metadata *sys_data; | 300 | struct syscall_metadata *sys_data; |
149 | struct ring_buffer_event *event; | 301 | struct ring_buffer_event *event; |
302 | struct ring_buffer *buffer; | ||
150 | int size; | 303 | int size; |
151 | int syscall_nr; | 304 | int syscall_nr; |
152 | 305 | ||
153 | syscall_nr = syscall_get_nr(current, regs); | 306 | syscall_nr = syscall_get_nr(current, regs); |
307 | if (syscall_nr < 0) | ||
308 | return; | ||
309 | if (!test_bit(syscall_nr, enabled_enter_syscalls)) | ||
310 | return; | ||
154 | 311 | ||
155 | sys_data = syscall_nr_to_meta(syscall_nr); | 312 | sys_data = syscall_nr_to_meta(syscall_nr); |
156 | if (!sys_data) | 313 | if (!sys_data) |
@@ -158,8 +315,8 @@ void ftrace_syscall_enter(struct pt_regs *regs) | |||
158 | 315 | ||
159 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; | 316 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; |
160 | 317 | ||
161 | event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, | 318 | event = trace_current_buffer_lock_reserve(&buffer, |
162 | 0, 0); | 319 | sys_data->enter_event->event.type, size, 0, 0); |
163 | if (!event) | 320 | if (!event) |
164 | return; | 321 | return; |
165 | 322 | ||
@@ -167,25 +324,31 @@ void ftrace_syscall_enter(struct pt_regs *regs) | |||
167 | entry->nr = syscall_nr; | 324 | entry->nr = syscall_nr; |
168 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); | 325 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); |
169 | 326 | ||
170 | trace_current_buffer_unlock_commit(event, 0, 0); | 327 | if (!filter_current_check_discard(buffer, sys_data->enter_event, |
171 | trace_wake_up(); | 328 | entry, event)) |
329 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | ||
172 | } | 330 | } |
173 | 331 | ||
174 | void ftrace_syscall_exit(struct pt_regs *regs) | 332 | void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) |
175 | { | 333 | { |
176 | struct syscall_trace_exit *entry; | 334 | struct syscall_trace_exit *entry; |
177 | struct syscall_metadata *sys_data; | 335 | struct syscall_metadata *sys_data; |
178 | struct ring_buffer_event *event; | 336 | struct ring_buffer_event *event; |
337 | struct ring_buffer *buffer; | ||
179 | int syscall_nr; | 338 | int syscall_nr; |
180 | 339 | ||
181 | syscall_nr = syscall_get_nr(current, regs); | 340 | syscall_nr = syscall_get_nr(current, regs); |
341 | if (syscall_nr < 0) | ||
342 | return; | ||
343 | if (!test_bit(syscall_nr, enabled_exit_syscalls)) | ||
344 | return; | ||
182 | 345 | ||
183 | sys_data = syscall_nr_to_meta(syscall_nr); | 346 | sys_data = syscall_nr_to_meta(syscall_nr); |
184 | if (!sys_data) | 347 | if (!sys_data) |
185 | return; | 348 | return; |
186 | 349 | ||
187 | event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, | 350 | event = trace_current_buffer_lock_reserve(&buffer, |
188 | sizeof(*entry), 0, 0); | 351 | sys_data->exit_event->event.type, sizeof(*entry), 0, 0); |
189 | if (!event) | 352 | if (!event) |
190 | return; | 353 | return; |
191 | 354 | ||
@@ -193,58 +356,325 @@ void ftrace_syscall_exit(struct pt_regs *regs) | |||
193 | entry->nr = syscall_nr; | 356 | entry->nr = syscall_nr; |
194 | entry->ret = syscall_get_return_value(current, regs); | 357 | entry->ret = syscall_get_return_value(current, regs); |
195 | 358 | ||
196 | trace_current_buffer_unlock_commit(event, 0, 0); | 359 | if (!filter_current_check_discard(buffer, sys_data->exit_event, |
197 | trace_wake_up(); | 360 | entry, event)) |
361 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | ||
198 | } | 362 | } |
199 | 363 | ||
200 | static int init_syscall_tracer(struct trace_array *tr) | 364 | int reg_event_syscall_enter(struct ftrace_event_call *call) |
201 | { | 365 | { |
202 | start_ftrace_syscalls(); | 366 | int ret = 0; |
367 | int num; | ||
368 | |||
369 | num = ((struct syscall_metadata *)call->data)->syscall_nr; | ||
370 | if (num < 0 || num >= NR_syscalls) | ||
371 | return -ENOSYS; | ||
372 | mutex_lock(&syscall_trace_lock); | ||
373 | if (!sys_refcount_enter) | ||
374 | ret = register_trace_sys_enter(ftrace_syscall_enter, NULL); | ||
375 | if (!ret) { | ||
376 | set_bit(num, enabled_enter_syscalls); | ||
377 | sys_refcount_enter++; | ||
378 | } | ||
379 | mutex_unlock(&syscall_trace_lock); | ||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | void unreg_event_syscall_enter(struct ftrace_event_call *call) | ||
384 | { | ||
385 | int num; | ||
386 | |||
387 | num = ((struct syscall_metadata *)call->data)->syscall_nr; | ||
388 | if (num < 0 || num >= NR_syscalls) | ||
389 | return; | ||
390 | mutex_lock(&syscall_trace_lock); | ||
391 | sys_refcount_enter--; | ||
392 | clear_bit(num, enabled_enter_syscalls); | ||
393 | if (!sys_refcount_enter) | ||
394 | unregister_trace_sys_enter(ftrace_syscall_enter, NULL); | ||
395 | mutex_unlock(&syscall_trace_lock); | ||
396 | } | ||
397 | |||
398 | int reg_event_syscall_exit(struct ftrace_event_call *call) | ||
399 | { | ||
400 | int ret = 0; | ||
401 | int num; | ||
402 | |||
403 | num = ((struct syscall_metadata *)call->data)->syscall_nr; | ||
404 | if (num < 0 || num >= NR_syscalls) | ||
405 | return -ENOSYS; | ||
406 | mutex_lock(&syscall_trace_lock); | ||
407 | if (!sys_refcount_exit) | ||
408 | ret = register_trace_sys_exit(ftrace_syscall_exit, NULL); | ||
409 | if (!ret) { | ||
410 | set_bit(num, enabled_exit_syscalls); | ||
411 | sys_refcount_exit++; | ||
412 | } | ||
413 | mutex_unlock(&syscall_trace_lock); | ||
414 | return ret; | ||
415 | } | ||
416 | |||
417 | void unreg_event_syscall_exit(struct ftrace_event_call *call) | ||
418 | { | ||
419 | int num; | ||
420 | |||
421 | num = ((struct syscall_metadata *)call->data)->syscall_nr; | ||
422 | if (num < 0 || num >= NR_syscalls) | ||
423 | return; | ||
424 | mutex_lock(&syscall_trace_lock); | ||
425 | sys_refcount_exit--; | ||
426 | clear_bit(num, enabled_exit_syscalls); | ||
427 | if (!sys_refcount_exit) | ||
428 | unregister_trace_sys_exit(ftrace_syscall_exit, NULL); | ||
429 | mutex_unlock(&syscall_trace_lock); | ||
430 | } | ||
431 | |||
432 | int init_syscall_trace(struct ftrace_event_call *call) | ||
433 | { | ||
434 | int id; | ||
435 | |||
436 | if (set_syscall_print_fmt(call) < 0) | ||
437 | return -ENOMEM; | ||
438 | |||
439 | id = trace_event_raw_init(call); | ||
440 | |||
441 | if (id < 0) { | ||
442 | free_syscall_print_fmt(call); | ||
443 | return id; | ||
444 | } | ||
445 | |||
446 | return id; | ||
447 | } | ||
448 | |||
449 | unsigned long __init arch_syscall_addr(int nr) | ||
450 | { | ||
451 | return (unsigned long)sys_call_table[nr]; | ||
452 | } | ||
453 | |||
454 | int __init init_ftrace_syscalls(void) | ||
455 | { | ||
456 | struct syscall_metadata *meta; | ||
457 | unsigned long addr; | ||
458 | int i; | ||
459 | |||
460 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | ||
461 | NR_syscalls, GFP_KERNEL); | ||
462 | if (!syscalls_metadata) { | ||
463 | WARN_ON(1); | ||
464 | return -ENOMEM; | ||
465 | } | ||
466 | |||
467 | for (i = 0; i < NR_syscalls; i++) { | ||
468 | addr = arch_syscall_addr(i); | ||
469 | meta = find_syscall_meta(addr); | ||
470 | if (!meta) | ||
471 | continue; | ||
472 | |||
473 | meta->syscall_nr = i; | ||
474 | syscalls_metadata[i] = meta; | ||
475 | } | ||
203 | 476 | ||
204 | return 0; | 477 | return 0; |
205 | } | 478 | } |
479 | core_initcall(init_ftrace_syscalls); | ||
480 | |||
481 | #ifdef CONFIG_PERF_EVENTS | ||
482 | |||
483 | static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); | ||
484 | static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); | ||
485 | static int sys_perf_refcount_enter; | ||
486 | static int sys_perf_refcount_exit; | ||
206 | 487 | ||
207 | static void reset_syscall_tracer(struct trace_array *tr) | 488 | static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) |
208 | { | 489 | { |
209 | stop_ftrace_syscalls(); | 490 | struct syscall_metadata *sys_data; |
210 | tracing_reset_online_cpus(tr); | 491 | struct syscall_trace_enter *rec; |
492 | struct hlist_head *head; | ||
493 | int syscall_nr; | ||
494 | int rctx; | ||
495 | int size; | ||
496 | |||
497 | syscall_nr = syscall_get_nr(current, regs); | ||
498 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) | ||
499 | return; | ||
500 | |||
501 | sys_data = syscall_nr_to_meta(syscall_nr); | ||
502 | if (!sys_data) | ||
503 | return; | ||
504 | |||
505 | /* get the size after alignment with the u32 buffer size field */ | ||
506 | size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); | ||
507 | size = ALIGN(size + sizeof(u32), sizeof(u64)); | ||
508 | size -= sizeof(u32); | ||
509 | |||
510 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
511 | "perf buffer not large enough")) | ||
512 | return; | ||
513 | |||
514 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, | ||
515 | sys_data->enter_event->event.type, regs, &rctx); | ||
516 | if (!rec) | ||
517 | return; | ||
518 | |||
519 | rec->nr = syscall_nr; | ||
520 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | ||
521 | (unsigned long *)&rec->args); | ||
522 | |||
523 | head = this_cpu_ptr(sys_data->enter_event->perf_events); | ||
524 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | ||
211 | } | 525 | } |
212 | 526 | ||
213 | static struct trace_event syscall_enter_event = { | 527 | int perf_sysenter_enable(struct ftrace_event_call *call) |
214 | .type = TRACE_SYSCALL_ENTER, | 528 | { |
215 | .trace = print_syscall_enter, | 529 | int ret = 0; |
216 | }; | 530 | int num; |
217 | 531 | ||
218 | static struct trace_event syscall_exit_event = { | 532 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
219 | .type = TRACE_SYSCALL_EXIT, | ||
220 | .trace = print_syscall_exit, | ||
221 | }; | ||
222 | 533 | ||
223 | static struct tracer syscall_tracer __read_mostly = { | 534 | mutex_lock(&syscall_trace_lock); |
224 | .name = "syscall", | 535 | if (!sys_perf_refcount_enter) |
225 | .init = init_syscall_tracer, | 536 | ret = register_trace_sys_enter(perf_syscall_enter, NULL); |
226 | .reset = reset_syscall_tracer, | 537 | if (ret) { |
227 | .flags = &syscalls_flags, | 538 | pr_info("event trace: Could not activate" |
228 | }; | 539 | "syscall entry trace point"); |
540 | } else { | ||
541 | set_bit(num, enabled_perf_enter_syscalls); | ||
542 | sys_perf_refcount_enter++; | ||
543 | } | ||
544 | mutex_unlock(&syscall_trace_lock); | ||
545 | return ret; | ||
546 | } | ||
229 | 547 | ||
230 | __init int register_ftrace_syscalls(void) | 548 | void perf_sysenter_disable(struct ftrace_event_call *call) |
231 | { | 549 | { |
232 | int ret; | 550 | int num; |
233 | 551 | ||
234 | ret = register_ftrace_event(&syscall_enter_event); | 552 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
235 | if (!ret) { | 553 | |
236 | printk(KERN_WARNING "event %d failed to register\n", | 554 | mutex_lock(&syscall_trace_lock); |
237 | syscall_enter_event.type); | 555 | sys_perf_refcount_enter--; |
238 | WARN_ON_ONCE(1); | 556 | clear_bit(num, enabled_perf_enter_syscalls); |
557 | if (!sys_perf_refcount_enter) | ||
558 | unregister_trace_sys_enter(perf_syscall_enter, NULL); | ||
559 | mutex_unlock(&syscall_trace_lock); | ||
560 | } | ||
561 | |||
562 | static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | ||
563 | { | ||
564 | struct syscall_metadata *sys_data; | ||
565 | struct syscall_trace_exit *rec; | ||
566 | struct hlist_head *head; | ||
567 | int syscall_nr; | ||
568 | int rctx; | ||
569 | int size; | ||
570 | |||
571 | syscall_nr = syscall_get_nr(current, regs); | ||
572 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) | ||
573 | return; | ||
574 | |||
575 | sys_data = syscall_nr_to_meta(syscall_nr); | ||
576 | if (!sys_data) | ||
577 | return; | ||
578 | |||
579 | /* We can probably do that at build time */ | ||
580 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); | ||
581 | size -= sizeof(u32); | ||
582 | |||
583 | /* | ||
584 | * Impossible, but be paranoid with the future | ||
585 | * How to put this check outside runtime? | ||
586 | */ | ||
587 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
588 | "exit event has grown above perf buffer size")) | ||
589 | return; | ||
590 | |||
591 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, | ||
592 | sys_data->exit_event->event.type, regs, &rctx); | ||
593 | if (!rec) | ||
594 | return; | ||
595 | |||
596 | rec->nr = syscall_nr; | ||
597 | rec->ret = syscall_get_return_value(current, regs); | ||
598 | |||
599 | head = this_cpu_ptr(sys_data->exit_event->perf_events); | ||
600 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | ||
601 | } | ||
602 | |||
603 | int perf_sysexit_enable(struct ftrace_event_call *call) | ||
604 | { | ||
605 | int ret = 0; | ||
606 | int num; | ||
607 | |||
608 | num = ((struct syscall_metadata *)call->data)->syscall_nr; | ||
609 | |||
610 | mutex_lock(&syscall_trace_lock); | ||
611 | if (!sys_perf_refcount_exit) | ||
612 | ret = register_trace_sys_exit(perf_syscall_exit, NULL); | ||
613 | if (ret) { | ||
614 | pr_info("event trace: Could not activate" | ||
615 | "syscall exit trace point"); | ||
616 | } else { | ||
617 | set_bit(num, enabled_perf_exit_syscalls); | ||
618 | sys_perf_refcount_exit++; | ||
239 | } | 619 | } |
620 | mutex_unlock(&syscall_trace_lock); | ||
621 | return ret; | ||
622 | } | ||
240 | 623 | ||
241 | ret = register_ftrace_event(&syscall_exit_event); | 624 | void perf_sysexit_disable(struct ftrace_event_call *call) |
242 | if (!ret) { | 625 | { |
243 | printk(KERN_WARNING "event %d failed to register\n", | 626 | int num; |
244 | syscall_exit_event.type); | 627 | |
245 | WARN_ON_ONCE(1); | 628 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
629 | |||
630 | mutex_lock(&syscall_trace_lock); | ||
631 | sys_perf_refcount_exit--; | ||
632 | clear_bit(num, enabled_perf_exit_syscalls); | ||
633 | if (!sys_perf_refcount_exit) | ||
634 | unregister_trace_sys_exit(perf_syscall_exit, NULL); | ||
635 | mutex_unlock(&syscall_trace_lock); | ||
636 | } | ||
637 | |||
638 | #endif /* CONFIG_PERF_EVENTS */ | ||
639 | |||
640 | static int syscall_enter_register(struct ftrace_event_call *event, | ||
641 | enum trace_reg type) | ||
642 | { | ||
643 | switch (type) { | ||
644 | case TRACE_REG_REGISTER: | ||
645 | return reg_event_syscall_enter(event); | ||
646 | case TRACE_REG_UNREGISTER: | ||
647 | unreg_event_syscall_enter(event); | ||
648 | return 0; | ||
649 | |||
650 | #ifdef CONFIG_PERF_EVENTS | ||
651 | case TRACE_REG_PERF_REGISTER: | ||
652 | return perf_sysenter_enable(event); | ||
653 | case TRACE_REG_PERF_UNREGISTER: | ||
654 | perf_sysenter_disable(event); | ||
655 | return 0; | ||
656 | #endif | ||
246 | } | 657 | } |
658 | return 0; | ||
659 | } | ||
247 | 660 | ||
248 | return register_tracer(&syscall_tracer); | 661 | static int syscall_exit_register(struct ftrace_event_call *event, |
662 | enum trace_reg type) | ||
663 | { | ||
664 | switch (type) { | ||
665 | case TRACE_REG_REGISTER: | ||
666 | return reg_event_syscall_exit(event); | ||
667 | case TRACE_REG_UNREGISTER: | ||
668 | unreg_event_syscall_exit(event); | ||
669 | return 0; | ||
670 | |||
671 | #ifdef CONFIG_PERF_EVENTS | ||
672 | case TRACE_REG_PERF_REGISTER: | ||
673 | return perf_sysexit_enable(event); | ||
674 | case TRACE_REG_PERF_UNREGISTER: | ||
675 | perf_sysexit_disable(event); | ||
676 | return 0; | ||
677 | #endif | ||
678 | } | ||
679 | return 0; | ||
249 | } | 680 | } |
250 | device_initcall(register_ftrace_syscalls); | ||
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c deleted file mode 100644 index f6693969287d..000000000000 --- a/kernel/trace/trace_sysprof.c +++ /dev/null | |||
@@ -1,328 +0,0 @@ | |||
1 | /* | ||
2 | * trace stack traces | ||
3 | * | ||
4 | * Copyright (C) 2004-2008, Soeren Sandmann | ||
5 | * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> | ||
6 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | ||
7 | */ | ||
8 | #include <linux/kallsyms.h> | ||
9 | #include <linux/debugfs.h> | ||
10 | #include <linux/hrtimer.h> | ||
11 | #include <linux/uaccess.h> | ||
12 | #include <linux/ftrace.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/irq.h> | ||
15 | #include <linux/fs.h> | ||
16 | |||
17 | #include <asm/stacktrace.h> | ||
18 | |||
19 | #include "trace.h" | ||
20 | |||
21 | static struct trace_array *sysprof_trace; | ||
22 | static int __read_mostly tracer_enabled; | ||
23 | |||
24 | /* | ||
25 | * 1 msec sample interval by default: | ||
26 | */ | ||
27 | static unsigned long sample_period = 1000000; | ||
28 | static const unsigned int sample_max_depth = 512; | ||
29 | |||
30 | static DEFINE_MUTEX(sample_timer_lock); | ||
31 | /* | ||
32 | * Per CPU hrtimers that do the profiling: | ||
33 | */ | ||
34 | static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); | ||
35 | |||
36 | struct stack_frame { | ||
37 | const void __user *next_fp; | ||
38 | unsigned long return_address; | ||
39 | }; | ||
40 | |||
41 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
42 | { | ||
43 | int ret; | ||
44 | |||
45 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
46 | return 0; | ||
47 | |||
48 | ret = 1; | ||
49 | pagefault_disable(); | ||
50 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
51 | ret = 0; | ||
52 | pagefault_enable(); | ||
53 | |||
54 | return ret; | ||
55 | } | ||
56 | |||
57 | struct backtrace_info { | ||
58 | struct trace_array_cpu *data; | ||
59 | struct trace_array *tr; | ||
60 | int pos; | ||
61 | }; | ||
62 | |||
63 | static void | ||
64 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
65 | { | ||
66 | /* Ignore warnings */ | ||
67 | } | ||
68 | |||
69 | static void backtrace_warning(void *data, char *msg) | ||
70 | { | ||
71 | /* Ignore warnings */ | ||
72 | } | ||
73 | |||
74 | static int backtrace_stack(void *data, char *name) | ||
75 | { | ||
76 | /* Don't bother with IRQ stacks for now */ | ||
77 | return -1; | ||
78 | } | ||
79 | |||
80 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
81 | { | ||
82 | struct backtrace_info *info = data; | ||
83 | |||
84 | if (info->pos < sample_max_depth && reliable) { | ||
85 | __trace_special(info->tr, info->data, 1, addr, 0); | ||
86 | |||
87 | info->pos++; | ||
88 | } | ||
89 | } | ||
90 | |||
91 | static const struct stacktrace_ops backtrace_ops = { | ||
92 | .warning = backtrace_warning, | ||
93 | .warning_symbol = backtrace_warning_symbol, | ||
94 | .stack = backtrace_stack, | ||
95 | .address = backtrace_address, | ||
96 | }; | ||
97 | |||
98 | static int | ||
99 | trace_kernel(struct pt_regs *regs, struct trace_array *tr, | ||
100 | struct trace_array_cpu *data) | ||
101 | { | ||
102 | struct backtrace_info info; | ||
103 | unsigned long bp; | ||
104 | char *stack; | ||
105 | |||
106 | info.tr = tr; | ||
107 | info.data = data; | ||
108 | info.pos = 1; | ||
109 | |||
110 | __trace_special(info.tr, info.data, 1, regs->ip, 0); | ||
111 | |||
112 | stack = ((char *)regs + sizeof(struct pt_regs)); | ||
113 | #ifdef CONFIG_FRAME_POINTER | ||
114 | bp = regs->bp; | ||
115 | #else | ||
116 | bp = 0; | ||
117 | #endif | ||
118 | |||
119 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); | ||
120 | |||
121 | return info.pos; | ||
122 | } | ||
123 | |||
124 | static void timer_notify(struct pt_regs *regs, int cpu) | ||
125 | { | ||
126 | struct trace_array_cpu *data; | ||
127 | struct stack_frame frame; | ||
128 | struct trace_array *tr; | ||
129 | const void __user *fp; | ||
130 | int is_user; | ||
131 | int i; | ||
132 | |||
133 | if (!regs) | ||
134 | return; | ||
135 | |||
136 | tr = sysprof_trace; | ||
137 | data = tr->data[cpu]; | ||
138 | is_user = user_mode(regs); | ||
139 | |||
140 | if (!current || current->pid == 0) | ||
141 | return; | ||
142 | |||
143 | if (is_user && current->state != TASK_RUNNING) | ||
144 | return; | ||
145 | |||
146 | __trace_special(tr, data, 0, 0, current->pid); | ||
147 | |||
148 | if (!is_user) | ||
149 | i = trace_kernel(regs, tr, data); | ||
150 | else | ||
151 | i = 0; | ||
152 | |||
153 | /* | ||
154 | * Trace user stack if we are not a kernel thread | ||
155 | */ | ||
156 | if (current->mm && i < sample_max_depth) { | ||
157 | regs = (struct pt_regs *)current->thread.sp0 - 1; | ||
158 | |||
159 | fp = (void __user *)regs->bp; | ||
160 | |||
161 | __trace_special(tr, data, 2, regs->ip, 0); | ||
162 | |||
163 | while (i < sample_max_depth) { | ||
164 | frame.next_fp = NULL; | ||
165 | frame.return_address = 0; | ||
166 | if (!copy_stack_frame(fp, &frame)) | ||
167 | break; | ||
168 | if ((unsigned long)fp < regs->sp) | ||
169 | break; | ||
170 | |||
171 | __trace_special(tr, data, 2, frame.return_address, | ||
172 | (unsigned long)fp); | ||
173 | fp = frame.next_fp; | ||
174 | |||
175 | i++; | ||
176 | } | ||
177 | |||
178 | } | ||
179 | |||
180 | /* | ||
181 | * Special trace entry if we overflow the max depth: | ||
182 | */ | ||
183 | if (i == sample_max_depth) | ||
184 | __trace_special(tr, data, -1, -1, -1); | ||
185 | |||
186 | __trace_special(tr, data, 3, current->pid, i); | ||
187 | } | ||
188 | |||
189 | static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) | ||
190 | { | ||
191 | /* trace here */ | ||
192 | timer_notify(get_irq_regs(), smp_processor_id()); | ||
193 | |||
194 | hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); | ||
195 | |||
196 | return HRTIMER_RESTART; | ||
197 | } | ||
198 | |||
199 | static void start_stack_timer(void *unused) | ||
200 | { | ||
201 | struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer); | ||
202 | |||
203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
204 | hrtimer->function = stack_trace_timer_fn; | ||
205 | |||
206 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), | ||
207 | HRTIMER_MODE_REL_PINNED); | ||
208 | } | ||
209 | |||
210 | static void start_stack_timers(void) | ||
211 | { | ||
212 | on_each_cpu(start_stack_timer, NULL, 1); | ||
213 | } | ||
214 | |||
215 | static void stop_stack_timer(int cpu) | ||
216 | { | ||
217 | struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | ||
218 | |||
219 | hrtimer_cancel(hrtimer); | ||
220 | } | ||
221 | |||
222 | static void stop_stack_timers(void) | ||
223 | { | ||
224 | int cpu; | ||
225 | |||
226 | for_each_online_cpu(cpu) | ||
227 | stop_stack_timer(cpu); | ||
228 | } | ||
229 | |||
230 | static void stop_stack_trace(struct trace_array *tr) | ||
231 | { | ||
232 | mutex_lock(&sample_timer_lock); | ||
233 | stop_stack_timers(); | ||
234 | tracer_enabled = 0; | ||
235 | mutex_unlock(&sample_timer_lock); | ||
236 | } | ||
237 | |||
238 | static int stack_trace_init(struct trace_array *tr) | ||
239 | { | ||
240 | sysprof_trace = tr; | ||
241 | |||
242 | tracing_start_cmdline_record(); | ||
243 | |||
244 | mutex_lock(&sample_timer_lock); | ||
245 | start_stack_timers(); | ||
246 | tracer_enabled = 1; | ||
247 | mutex_unlock(&sample_timer_lock); | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | static void stack_trace_reset(struct trace_array *tr) | ||
252 | { | ||
253 | tracing_stop_cmdline_record(); | ||
254 | stop_stack_trace(tr); | ||
255 | } | ||
256 | |||
257 | static struct tracer stack_trace __read_mostly = | ||
258 | { | ||
259 | .name = "sysprof", | ||
260 | .init = stack_trace_init, | ||
261 | .reset = stack_trace_reset, | ||
262 | #ifdef CONFIG_FTRACE_SELFTEST | ||
263 | .selftest = trace_selftest_startup_sysprof, | ||
264 | #endif | ||
265 | }; | ||
266 | |||
267 | __init static int init_stack_trace(void) | ||
268 | { | ||
269 | return register_tracer(&stack_trace); | ||
270 | } | ||
271 | device_initcall(init_stack_trace); | ||
272 | |||
273 | #define MAX_LONG_DIGITS 22 | ||
274 | |||
275 | static ssize_t | ||
276 | sysprof_sample_read(struct file *filp, char __user *ubuf, | ||
277 | size_t cnt, loff_t *ppos) | ||
278 | { | ||
279 | char buf[MAX_LONG_DIGITS]; | ||
280 | int r; | ||
281 | |||
282 | r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); | ||
283 | |||
284 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
285 | } | ||
286 | |||
287 | static ssize_t | ||
288 | sysprof_sample_write(struct file *filp, const char __user *ubuf, | ||
289 | size_t cnt, loff_t *ppos) | ||
290 | { | ||
291 | char buf[MAX_LONG_DIGITS]; | ||
292 | unsigned long val; | ||
293 | |||
294 | if (cnt > MAX_LONG_DIGITS-1) | ||
295 | cnt = MAX_LONG_DIGITS-1; | ||
296 | |||
297 | if (copy_from_user(&buf, ubuf, cnt)) | ||
298 | return -EFAULT; | ||
299 | |||
300 | buf[cnt] = 0; | ||
301 | |||
302 | val = simple_strtoul(buf, NULL, 10); | ||
303 | /* | ||
304 | * Enforce a minimum sample period of 100 usecs: | ||
305 | */ | ||
306 | if (val < 100) | ||
307 | val = 100; | ||
308 | |||
309 | mutex_lock(&sample_timer_lock); | ||
310 | stop_stack_timers(); | ||
311 | sample_period = val * 1000; | ||
312 | start_stack_timers(); | ||
313 | mutex_unlock(&sample_timer_lock); | ||
314 | |||
315 | return cnt; | ||
316 | } | ||
317 | |||
318 | static const struct file_operations sysprof_sample_fops = { | ||
319 | .read = sysprof_sample_read, | ||
320 | .write = sysprof_sample_write, | ||
321 | }; | ||
322 | |||
323 | void init_tracer_sysprof_debugfs(struct dentry *d_tracer) | ||
324 | { | ||
325 | |||
326 | trace_create_file("sysprof_sample_period", 0644, | ||
327 | d_tracer, NULL, &sysprof_sample_fops); | ||
328 | } | ||
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 97fcea4acce1..209b379a4721 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c | |||
@@ -9,6 +9,8 @@ | |||
9 | #include <trace/events/workqueue.h> | 9 | #include <trace/events/workqueue.h> |
10 | #include <linux/list.h> | 10 | #include <linux/list.h> |
11 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
12 | #include <linux/slab.h> | ||
13 | #include <linux/kref.h> | ||
12 | #include "trace_stat.h" | 14 | #include "trace_stat.h" |
13 | #include "trace.h" | 15 | #include "trace.h" |
14 | 16 | ||
@@ -16,6 +18,7 @@ | |||
16 | /* A cpu workqueue thread */ | 18 | /* A cpu workqueue thread */ |
17 | struct cpu_workqueue_stats { | 19 | struct cpu_workqueue_stats { |
18 | struct list_head list; | 20 | struct list_head list; |
21 | struct kref kref; | ||
19 | int cpu; | 22 | int cpu; |
20 | pid_t pid; | 23 | pid_t pid; |
21 | /* Can be inserted from interrupt or user context, need to be atomic */ | 24 | /* Can be inserted from interrupt or user context, need to be atomic */ |
@@ -39,9 +42,15 @@ struct workqueue_global_stats { | |||
39 | static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); | 42 | static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); |
40 | #define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) | 43 | #define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) |
41 | 44 | ||
45 | static void cpu_workqueue_stat_free(struct kref *kref) | ||
46 | { | ||
47 | kfree(container_of(kref, struct cpu_workqueue_stats, kref)); | ||
48 | } | ||
49 | |||
42 | /* Insertion of a work */ | 50 | /* Insertion of a work */ |
43 | static void | 51 | static void |
44 | probe_workqueue_insertion(struct task_struct *wq_thread, | 52 | probe_workqueue_insertion(void *ignore, |
53 | struct task_struct *wq_thread, | ||
45 | struct work_struct *work) | 54 | struct work_struct *work) |
46 | { | 55 | { |
47 | int cpu = cpumask_first(&wq_thread->cpus_allowed); | 56 | int cpu = cpumask_first(&wq_thread->cpus_allowed); |
@@ -62,7 +71,8 @@ found: | |||
62 | 71 | ||
63 | /* Execution of a work */ | 72 | /* Execution of a work */ |
64 | static void | 73 | static void |
65 | probe_workqueue_execution(struct task_struct *wq_thread, | 74 | probe_workqueue_execution(void *ignore, |
75 | struct task_struct *wq_thread, | ||
66 | struct work_struct *work) | 76 | struct work_struct *work) |
67 | { | 77 | { |
68 | int cpu = cpumask_first(&wq_thread->cpus_allowed); | 78 | int cpu = cpumask_first(&wq_thread->cpus_allowed); |
@@ -82,7 +92,8 @@ found: | |||
82 | } | 92 | } |
83 | 93 | ||
84 | /* Creation of a cpu workqueue thread */ | 94 | /* Creation of a cpu workqueue thread */ |
85 | static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) | 95 | static void probe_workqueue_creation(void *ignore, |
96 | struct task_struct *wq_thread, int cpu) | ||
86 | { | 97 | { |
87 | struct cpu_workqueue_stats *cws; | 98 | struct cpu_workqueue_stats *cws; |
88 | unsigned long flags; | 99 | unsigned long flags; |
@@ -96,8 +107,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) | |||
96 | return; | 107 | return; |
97 | } | 108 | } |
98 | INIT_LIST_HEAD(&cws->list); | 109 | INIT_LIST_HEAD(&cws->list); |
110 | kref_init(&cws->kref); | ||
99 | cws->cpu = cpu; | 111 | cws->cpu = cpu; |
100 | |||
101 | cws->pid = wq_thread->pid; | 112 | cws->pid = wq_thread->pid; |
102 | 113 | ||
103 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | 114 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); |
@@ -106,7 +117,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) | |||
106 | } | 117 | } |
107 | 118 | ||
108 | /* Destruction of a cpu workqueue thread */ | 119 | /* Destruction of a cpu workqueue thread */ |
109 | static void probe_workqueue_destruction(struct task_struct *wq_thread) | 120 | static void |
121 | probe_workqueue_destruction(void *ignore, struct task_struct *wq_thread) | ||
110 | { | 122 | { |
111 | /* Workqueue only execute on one cpu */ | 123 | /* Workqueue only execute on one cpu */ |
112 | int cpu = cpumask_first(&wq_thread->cpus_allowed); | 124 | int cpu = cpumask_first(&wq_thread->cpus_allowed); |
@@ -118,7 +130,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread) | |||
118 | list) { | 130 | list) { |
119 | if (node->pid == wq_thread->pid) { | 131 | if (node->pid == wq_thread->pid) { |
120 | list_del(&node->list); | 132 | list_del(&node->list); |
121 | kfree(node); | 133 | kref_put(&node->kref, cpu_workqueue_stat_free); |
122 | goto found; | 134 | goto found; |
123 | } | 135 | } |
124 | } | 136 | } |
@@ -137,9 +149,11 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu) | |||
137 | 149 | ||
138 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | 150 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); |
139 | 151 | ||
140 | if (!list_empty(&workqueue_cpu_stat(cpu)->list)) | 152 | if (!list_empty(&workqueue_cpu_stat(cpu)->list)) { |
141 | ret = list_entry(workqueue_cpu_stat(cpu)->list.next, | 153 | ret = list_entry(workqueue_cpu_stat(cpu)->list.next, |
142 | struct cpu_workqueue_stats, list); | 154 | struct cpu_workqueue_stats, list); |
155 | kref_get(&ret->kref); | ||
156 | } | ||
143 | 157 | ||
144 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | 158 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); |
145 | 159 | ||
@@ -162,9 +176,9 @@ static void *workqueue_stat_start(struct tracer_stat *trace) | |||
162 | static void *workqueue_stat_next(void *prev, int idx) | 176 | static void *workqueue_stat_next(void *prev, int idx) |
163 | { | 177 | { |
164 | struct cpu_workqueue_stats *prev_cws = prev; | 178 | struct cpu_workqueue_stats *prev_cws = prev; |
179 | struct cpu_workqueue_stats *ret; | ||
165 | int cpu = prev_cws->cpu; | 180 | int cpu = prev_cws->cpu; |
166 | unsigned long flags; | 181 | unsigned long flags; |
167 | void *ret = NULL; | ||
168 | 182 | ||
169 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | 183 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); |
170 | if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { | 184 | if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { |
@@ -175,11 +189,14 @@ static void *workqueue_stat_next(void *prev, int idx) | |||
175 | return NULL; | 189 | return NULL; |
176 | } while (!(ret = workqueue_stat_start_cpu(cpu))); | 190 | } while (!(ret = workqueue_stat_start_cpu(cpu))); |
177 | return ret; | 191 | return ret; |
192 | } else { | ||
193 | ret = list_entry(prev_cws->list.next, | ||
194 | struct cpu_workqueue_stats, list); | ||
195 | kref_get(&ret->kref); | ||
178 | } | 196 | } |
179 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | 197 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); |
180 | 198 | ||
181 | return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, | 199 | return ret; |
182 | list); | ||
183 | } | 200 | } |
184 | 201 | ||
185 | static int workqueue_stat_show(struct seq_file *s, void *p) | 202 | static int workqueue_stat_show(struct seq_file *s, void *p) |
@@ -203,6 +220,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p) | |||
203 | return 0; | 220 | return 0; |
204 | } | 221 | } |
205 | 222 | ||
223 | static void workqueue_stat_release(void *stat) | ||
224 | { | ||
225 | struct cpu_workqueue_stats *node = stat; | ||
226 | |||
227 | kref_put(&node->kref, cpu_workqueue_stat_free); | ||
228 | } | ||
229 | |||
206 | static int workqueue_stat_headers(struct seq_file *s) | 230 | static int workqueue_stat_headers(struct seq_file *s) |
207 | { | 231 | { |
208 | seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); | 232 | seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); |
@@ -215,6 +239,7 @@ struct tracer_stat workqueue_stats __read_mostly = { | |||
215 | .stat_start = workqueue_stat_start, | 239 | .stat_start = workqueue_stat_start, |
216 | .stat_next = workqueue_stat_next, | 240 | .stat_next = workqueue_stat_next, |
217 | .stat_show = workqueue_stat_show, | 241 | .stat_show = workqueue_stat_show, |
242 | .stat_release = workqueue_stat_release, | ||
218 | .stat_headers = workqueue_stat_headers | 243 | .stat_headers = workqueue_stat_headers |
219 | }; | 244 | }; |
220 | 245 | ||
@@ -238,35 +263,35 @@ int __init trace_workqueue_early_init(void) | |||
238 | { | 263 | { |
239 | int ret, cpu; | 264 | int ret, cpu; |
240 | 265 | ||
241 | ret = register_trace_workqueue_insertion(probe_workqueue_insertion); | 266 | for_each_possible_cpu(cpu) { |
267 | spin_lock_init(&workqueue_cpu_stat(cpu)->lock); | ||
268 | INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); | ||
269 | } | ||
270 | |||
271 | ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); | ||
242 | if (ret) | 272 | if (ret) |
243 | goto out; | 273 | goto out; |
244 | 274 | ||
245 | ret = register_trace_workqueue_execution(probe_workqueue_execution); | 275 | ret = register_trace_workqueue_execution(probe_workqueue_execution, NULL); |
246 | if (ret) | 276 | if (ret) |
247 | goto no_insertion; | 277 | goto no_insertion; |
248 | 278 | ||
249 | ret = register_trace_workqueue_creation(probe_workqueue_creation); | 279 | ret = register_trace_workqueue_creation(probe_workqueue_creation, NULL); |
250 | if (ret) | 280 | if (ret) |
251 | goto no_execution; | 281 | goto no_execution; |
252 | 282 | ||
253 | ret = register_trace_workqueue_destruction(probe_workqueue_destruction); | 283 | ret = register_trace_workqueue_destruction(probe_workqueue_destruction, NULL); |
254 | if (ret) | 284 | if (ret) |
255 | goto no_creation; | 285 | goto no_creation; |
256 | 286 | ||
257 | for_each_possible_cpu(cpu) { | ||
258 | spin_lock_init(&workqueue_cpu_stat(cpu)->lock); | ||
259 | INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); | ||
260 | } | ||
261 | |||
262 | return 0; | 287 | return 0; |
263 | 288 | ||
264 | no_creation: | 289 | no_creation: |
265 | unregister_trace_workqueue_creation(probe_workqueue_creation); | 290 | unregister_trace_workqueue_creation(probe_workqueue_creation, NULL); |
266 | no_execution: | 291 | no_execution: |
267 | unregister_trace_workqueue_execution(probe_workqueue_execution); | 292 | unregister_trace_workqueue_execution(probe_workqueue_execution, NULL); |
268 | no_insertion: | 293 | no_insertion: |
269 | unregister_trace_workqueue_insertion(probe_workqueue_insertion); | 294 | unregister_trace_workqueue_insertion(probe_workqueue_insertion, NULL); |
270 | out: | 295 | out: |
271 | pr_warning("trace_workqueue: unable to trace workqueues\n"); | 296 | pr_warning("trace_workqueue: unable to trace workqueues\n"); |
272 | 297 | ||