diff options
34 files changed, 5686 insertions, 1922 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8ccbf27aead4..5abc09a93bc2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -320,6 +320,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
320 | on: enable for both 32- and 64-bit processes | 320 | on: enable for both 32- and 64-bit processes |
321 | off: disable for both 32- and 64-bit processes | 321 | off: disable for both 32- and 64-bit processes |
322 | 322 | ||
323 | alloc_snapshot [FTRACE] | ||
324 | Allocate the ftrace snapshot buffer on boot up when the | ||
325 | main buffer is allocated. This is handy if debugging | ||
326 | and you need to use tracing_snapshot() on boot up, and | ||
327 | do not want to use tracing_snapshot_alloc() as it needs | ||
328 | to be done where GFP_KERNEL allocations are allowed. | ||
329 | |||
323 | amd_iommu= [HW,X86-64] | 330 | amd_iommu= [HW,X86-64] |
324 | Pass parameters to the AMD IOMMU driver in the system. | 331 | Pass parameters to the AMD IOMMU driver in the system. |
325 | Possible values are: | 332 | Possible values are: |
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index a372304aef10..bfe8c29b1f1d 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt | |||
@@ -8,6 +8,7 @@ Copyright 2008 Red Hat Inc. | |||
8 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, | 8 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, |
9 | John Kacur, and David Teigland. | 9 | John Kacur, and David Teigland. |
10 | Written for: 2.6.28-rc2 | 10 | Written for: 2.6.28-rc2 |
11 | Updated for: 3.10 | ||
11 | 12 | ||
12 | Introduction | 13 | Introduction |
13 | ------------ | 14 | ------------ |
@@ -17,13 +18,16 @@ designers of systems to find what is going on inside the kernel. | |||
17 | It can be used for debugging or analyzing latencies and | 18 | It can be used for debugging or analyzing latencies and |
18 | performance issues that take place outside of user-space. | 19 | performance issues that take place outside of user-space. |
19 | 20 | ||
20 | Although ftrace is the function tracer, it also includes an | 21 | Although ftrace is typically considered the function tracer, it |
21 | infrastructure that allows for other types of tracing. Some of | 22 | is really a frame work of several assorted tracing utilities. |
22 | the tracers that are currently in ftrace include a tracer to | 23 | There's latency tracing to examine what occurs between interrupts |
23 | trace context switches, the time it takes for a high priority | 24 | disabled and enabled, as well as for preemption and from a time |
24 | task to run after it was woken up, the time interrupts are | 25 | a task is woken to the task is actually scheduled in. |
25 | disabled, and more (ftrace allows for tracer plugins, which | 26 | |
26 | means that the list of tracers can always grow). | 27 | One of the most common uses of ftrace is the event tracing. |
28 | Through out the kernel is hundreds of static event points that | ||
29 | can be enabled via the debugfs file system to see what is | ||
30 | going on in certain parts of the kernel. | ||
27 | 31 | ||
28 | 32 | ||
29 | Implementation Details | 33 | Implementation Details |
@@ -61,7 +65,7 @@ the extended "/sys/kernel/debug/tracing" path name. | |||
61 | 65 | ||
62 | That's it! (assuming that you have ftrace configured into your kernel) | 66 | That's it! (assuming that you have ftrace configured into your kernel) |
63 | 67 | ||
64 | After mounting the debugfs, you can see a directory called | 68 | After mounting debugfs, you can see a directory called |
65 | "tracing". This directory contains the control and output files | 69 | "tracing". This directory contains the control and output files |
66 | of ftrace. Here is a list of some of the key files: | 70 | of ftrace. Here is a list of some of the key files: |
67 | 71 | ||
@@ -84,7 +88,9 @@ of ftrace. Here is a list of some of the key files: | |||
84 | 88 | ||
85 | This sets or displays whether writing to the trace | 89 | This sets or displays whether writing to the trace |
86 | ring buffer is enabled. Echo 0 into this file to disable | 90 | ring buffer is enabled. Echo 0 into this file to disable |
87 | the tracer or 1 to enable it. | 91 | the tracer or 1 to enable it. Note, this only disables |
92 | writing to the ring buffer, the tracing overhead may | ||
93 | still be occurring. | ||
88 | 94 | ||
89 | trace: | 95 | trace: |
90 | 96 | ||
@@ -109,7 +115,15 @@ of ftrace. Here is a list of some of the key files: | |||
109 | 115 | ||
110 | This file lets the user control the amount of data | 116 | This file lets the user control the amount of data |
111 | that is displayed in one of the above output | 117 | that is displayed in one of the above output |
112 | files. | 118 | files. Options also exist to modify how a tracer |
119 | or events work (stack traces, timestamps, etc). | ||
120 | |||
121 | options: | ||
122 | |||
123 | This is a directory that has a file for every available | ||
124 | trace option (also in trace_options). Options may also be set | ||
125 | or cleared by writing a "1" or "0" respectively into the | ||
126 | corresponding file with the option name. | ||
113 | 127 | ||
114 | tracing_max_latency: | 128 | tracing_max_latency: |
115 | 129 | ||
@@ -121,10 +135,17 @@ of ftrace. Here is a list of some of the key files: | |||
121 | latency is greater than the value in this | 135 | latency is greater than the value in this |
122 | file. (in microseconds) | 136 | file. (in microseconds) |
123 | 137 | ||
138 | tracing_thresh: | ||
139 | |||
140 | Some latency tracers will record a trace whenever the | ||
141 | latency is greater than the number in this file. | ||
142 | Only active when the file contains a number greater than 0. | ||
143 | (in microseconds) | ||
144 | |||
124 | buffer_size_kb: | 145 | buffer_size_kb: |
125 | 146 | ||
126 | This sets or displays the number of kilobytes each CPU | 147 | This sets or displays the number of kilobytes each CPU |
127 | buffer can hold. The tracer buffers are the same size | 148 | buffer holds. By default, the trace buffers are the same size |
128 | for each CPU. The displayed number is the size of the | 149 | for each CPU. The displayed number is the size of the |
129 | CPU buffer and not total size of all buffers. The | 150 | CPU buffer and not total size of all buffers. The |
130 | trace buffers are allocated in pages (blocks of memory | 151 | trace buffers are allocated in pages (blocks of memory |
@@ -133,16 +154,30 @@ of ftrace. Here is a list of some of the key files: | |||
133 | than requested, the rest of the page will be used, | 154 | than requested, the rest of the page will be used, |
134 | making the actual allocation bigger than requested. | 155 | making the actual allocation bigger than requested. |
135 | ( Note, the size may not be a multiple of the page size | 156 | ( Note, the size may not be a multiple of the page size |
136 | due to buffer management overhead. ) | 157 | due to buffer management meta-data. ) |
137 | 158 | ||
138 | This can only be updated when the current_tracer | 159 | buffer_total_size_kb: |
139 | is set to "nop". | 160 | |
161 | This displays the total combined size of all the trace buffers. | ||
162 | |||
163 | free_buffer: | ||
164 | |||
165 | If a process is performing the tracing, and the ring buffer | ||
166 | should be shrunk "freed" when the process is finished, even | ||
167 | if it were to be killed by a signal, this file can be used | ||
168 | for that purpose. On close of this file, the ring buffer will | ||
169 | be resized to its minimum size. Having a process that is tracing | ||
170 | also open this file, when the process exits its file descriptor | ||
171 | for this file will be closed, and in doing so, the ring buffer | ||
172 | will be "freed". | ||
173 | |||
174 | It may also stop tracing if disable_on_free option is set. | ||
140 | 175 | ||
141 | tracing_cpumask: | 176 | tracing_cpumask: |
142 | 177 | ||
143 | This is a mask that lets the user only trace | 178 | This is a mask that lets the user only trace |
144 | on specified CPUS. The format is a hex string | 179 | on specified CPUs. The format is a hex string |
145 | representing the CPUS. | 180 | representing the CPUs. |
146 | 181 | ||
147 | set_ftrace_filter: | 182 | set_ftrace_filter: |
148 | 183 | ||
@@ -183,6 +218,261 @@ of ftrace. Here is a list of some of the key files: | |||
183 | "set_ftrace_notrace". (See the section "dynamic ftrace" | 218 | "set_ftrace_notrace". (See the section "dynamic ftrace" |
184 | below for more details.) | 219 | below for more details.) |
185 | 220 | ||
221 | enabled_functions: | ||
222 | |||
223 | This file is more for debugging ftrace, but can also be useful | ||
224 | in seeing if any function has a callback attached to it. | ||
225 | Not only does the trace infrastructure use ftrace function | ||
226 | trace utility, but other subsystems might too. This file | ||
227 | displays all functions that have a callback attached to them | ||
228 | as well as the number of callbacks that have been attached. | ||
229 | Note, a callback may also call multiple functions which will | ||
230 | not be listed in this count. | ||
231 | |||
232 | If the callback registered to be traced by a function with | ||
233 | the "save regs" attribute (thus even more overhead), a 'R' | ||
234 | will be displayed on the same line as the function that | ||
235 | is returning registers. | ||
236 | |||
237 | function_profile_enabled: | ||
238 | |||
239 | When set it will enable all functions with either the function | ||
240 | tracer, or if enabled, the function graph tracer. It will | ||
241 | keep a histogram of the number of functions that were called | ||
242 | and if run with the function graph tracer, it will also keep | ||
243 | track of the time spent in those functions. The histogram | ||
244 | content can be displayed in the files: | ||
245 | |||
246 | trace_stats/function<cpu> ( function0, function1, etc). | ||
247 | |||
248 | trace_stats: | ||
249 | |||
250 | A directory that holds different tracing stats. | ||
251 | |||
252 | kprobe_events: | ||
253 | |||
254 | Enable dynamic trace points. See kprobetrace.txt. | ||
255 | |||
256 | kprobe_profile: | ||
257 | |||
258 | Dynamic trace points stats. See kprobetrace.txt. | ||
259 | |||
260 | max_graph_depth: | ||
261 | |||
262 | Used with the function graph tracer. This is the max depth | ||
263 | it will trace into a function. Setting this to a value of | ||
264 | one will show only the first kernel function that is called | ||
265 | from user space. | ||
266 | |||
267 | printk_formats: | ||
268 | |||
269 | This is for tools that read the raw format files. If an event in | ||
270 | the ring buffer references a string (currently only trace_printk() | ||
271 | does this), only a pointer to the string is recorded into the buffer | ||
272 | and not the string itself. This prevents tools from knowing what | ||
273 | that string was. This file displays the string and address for | ||
274 | the string allowing tools to map the pointers to what the | ||
275 | strings were. | ||
276 | |||
277 | saved_cmdlines: | ||
278 | |||
279 | Only the pid of the task is recorded in a trace event unless | ||
280 | the event specifically saves the task comm as well. Ftrace | ||
281 | makes a cache of pid mappings to comms to try to display | ||
282 | comms for events. If a pid for a comm is not listed, then | ||
283 | "<...>" is displayed in the output. | ||
284 | |||
285 | snapshot: | ||
286 | |||
287 | This displays the "snapshot" buffer and also lets the user | ||
288 | take a snapshot of the current running trace. | ||
289 | See the "Snapshot" section below for more details. | ||
290 | |||
291 | stack_max_size: | ||
292 | |||
293 | When the stack tracer is activated, this will display the | ||
294 | maximum stack size it has encountered. | ||
295 | See the "Stack Trace" section below. | ||
296 | |||
297 | stack_trace: | ||
298 | |||
299 | This displays the stack back trace of the largest stack | ||
300 | that was encountered when the stack tracer is activated. | ||
301 | See the "Stack Trace" section below. | ||
302 | |||
303 | stack_trace_filter: | ||
304 | |||
305 | This is similar to "set_ftrace_filter" but it limits what | ||
306 | functions the stack tracer will check. | ||
307 | |||
308 | trace_clock: | ||
309 | |||
310 | Whenever an event is recorded into the ring buffer, a | ||
311 | "timestamp" is added. This stamp comes from a specified | ||
312 | clock. By default, ftrace uses the "local" clock. This | ||
313 | clock is very fast and strictly per cpu, but on some | ||
314 | systems it may not be monotonic with respect to other | ||
315 | CPUs. In other words, the local clocks may not be in sync | ||
316 | with local clocks on other CPUs. | ||
317 | |||
318 | Usual clocks for tracing: | ||
319 | |||
320 | # cat trace_clock | ||
321 | [local] global counter x86-tsc | ||
322 | |||
323 | local: Default clock, but may not be in sync across CPUs | ||
324 | |||
325 | global: This clock is in sync with all CPUs but may | ||
326 | be a bit slower than the local clock. | ||
327 | |||
328 | counter: This is not a clock at all, but literally an atomic | ||
329 | counter. It counts up one by one, but is in sync | ||
330 | with all CPUs. This is useful when you need to | ||
331 | know exactly the order events occurred with respect to | ||
332 | each other on different CPUs. | ||
333 | |||
334 | uptime: This uses the jiffies counter and the time stamp | ||
335 | is relative to the time since boot up. | ||
336 | |||
337 | perf: This makes ftrace use the same clock that perf uses. | ||
338 | Eventually perf will be able to read ftrace buffers | ||
339 | and this will help out in interleaving the data. | ||
340 | |||
341 | x86-tsc: Architectures may define their own clocks. For | ||
342 | example, x86 uses its own TSC cycle clock here. | ||
343 | |||
344 | To set a clock, simply echo the clock name into this file. | ||
345 | |||
346 | echo global > trace_clock | ||
347 | |||
348 | trace_marker: | ||
349 | |||
350 | This is a very useful file for synchronizing user space | ||
351 | with events happening in the kernel. Writing strings into | ||
352 | this file will be written into the ftrace buffer. | ||
353 | |||
354 | It is useful in applications to open this file at the start | ||
355 | of the application and just reference the file descriptor | ||
356 | for the file. | ||
357 | |||
358 | void trace_write(const char *fmt, ...) | ||
359 | { | ||
360 | va_list ap; | ||
361 | char buf[256]; | ||
362 | int n; | ||
363 | |||
364 | if (trace_fd < 0) | ||
365 | return; | ||
366 | |||
367 | va_start(ap, fmt); | ||
368 | n = vsnprintf(buf, 256, fmt, ap); | ||
369 | va_end(ap); | ||
370 | |||
371 | write(trace_fd, buf, n); | ||
372 | } | ||
373 | |||
374 | start: | ||
375 | |||
376 | trace_fd = open("trace_marker", WR_ONLY); | ||
377 | |||
378 | uprobe_events: | ||
379 | |||
380 | Add dynamic tracepoints in programs. | ||
381 | See uprobetracer.txt | ||
382 | |||
383 | uprobe_profile: | ||
384 | |||
385 | Uprobe statistics. See uprobetrace.txt | ||
386 | |||
387 | instances: | ||
388 | |||
389 | This is a way to make multiple trace buffers where different | ||
390 | events can be recorded in different buffers. | ||
391 | See "Instances" section below. | ||
392 | |||
393 | events: | ||
394 | |||
395 | This is the trace event directory. It holds event tracepoints | ||
396 | (also known as static tracepoints) that have been compiled | ||
397 | into the kernel. It shows what event tracepoints exist | ||
398 | and how they are grouped by system. There are "enable" | ||
399 | files at various levels that can enable the tracepoints | ||
400 | when a "1" is written to them. | ||
401 | |||
402 | See events.txt for more information. | ||
403 | |||
404 | per_cpu: | ||
405 | |||
406 | This is a directory that contains the trace per_cpu information. | ||
407 | |||
408 | per_cpu/cpu0/buffer_size_kb: | ||
409 | |||
410 | The ftrace buffer is defined per_cpu. That is, there's a separate | ||
411 | buffer for each CPU to allow writes to be done atomically, | ||
412 | and free from cache bouncing. These buffers may have different | ||
413 | size buffers. This file is similar to the buffer_size_kb | ||
414 | file, but it only displays or sets the buffer size for the | ||
415 | specific CPU. (here cpu0). | ||
416 | |||
417 | per_cpu/cpu0/trace: | ||
418 | |||
419 | This is similar to the "trace" file, but it will only display | ||
420 | the data specific for the CPU. If written to, it only clears | ||
421 | the specific CPU buffer. | ||
422 | |||
423 | per_cpu/cpu0/trace_pipe | ||
424 | |||
425 | This is similar to the "trace_pipe" file, and is a consuming | ||
426 | read, but it will only display (and consume) the data specific | ||
427 | for the CPU. | ||
428 | |||
429 | per_cpu/cpu0/trace_pipe_raw | ||
430 | |||
431 | For tools that can parse the ftrace ring buffer binary format, | ||
432 | the trace_pipe_raw file can be used to extract the data | ||
433 | from the ring buffer directly. With the use of the splice() | ||
434 | system call, the buffer data can be quickly transferred to | ||
435 | a file or to the network where a server is collecting the | ||
436 | data. | ||
437 | |||
438 | Like trace_pipe, this is a consuming reader, where multiple | ||
439 | reads will always produce different data. | ||
440 | |||
441 | per_cpu/cpu0/snapshot: | ||
442 | |||
443 | This is similar to the main "snapshot" file, but will only | ||
444 | snapshot the current CPU (if supported). It only displays | ||
445 | the content of the snapshot for a given CPU, and if | ||
446 | written to, only clears this CPU buffer. | ||
447 | |||
448 | per_cpu/cpu0/snapshot_raw: | ||
449 | |||
450 | Similar to the trace_pipe_raw, but will read the binary format | ||
451 | from the snapshot buffer for the given CPU. | ||
452 | |||
453 | per_cpu/cpu0/stats: | ||
454 | |||
455 | This displays certain stats about the ring buffer: | ||
456 | |||
457 | entries: The number of events that are still in the buffer. | ||
458 | |||
459 | overrun: The number of lost events due to overwriting when | ||
460 | the buffer was full. | ||
461 | |||
462 | commit overrun: Should always be zero. | ||
463 | This gets set if so many events happened within a nested | ||
464 | event (ring buffer is re-entrant), that it fills the | ||
465 | buffer and starts dropping events. | ||
466 | |||
467 | bytes: Bytes actually read (not overwritten). | ||
468 | |||
469 | oldest event ts: The oldest timestamp in the buffer | ||
470 | |||
471 | now ts: The current timestamp | ||
472 | |||
473 | dropped events: Events lost due to overwrite option being off. | ||
474 | |||
475 | read events: The number of events read. | ||
186 | 476 | ||
187 | The Tracers | 477 | The Tracers |
188 | ----------- | 478 | ----------- |
@@ -234,11 +524,6 @@ Here is the list of current tracers that may be configured. | |||
234 | RT tasks (as the current "wakeup" does). This is useful | 524 | RT tasks (as the current "wakeup" does). This is useful |
235 | for those interested in wake up timings of RT tasks. | 525 | for those interested in wake up timings of RT tasks. |
236 | 526 | ||
237 | "hw-branch-tracer" | ||
238 | |||
239 | Uses the BTS CPU feature on x86 CPUs to traces all | ||
240 | branches executed. | ||
241 | |||
242 | "nop" | 527 | "nop" |
243 | 528 | ||
244 | This is the "trace nothing" tracer. To remove all | 529 | This is the "trace nothing" tracer. To remove all |
@@ -261,70 +546,100 @@ Here is an example of the output format of the file "trace" | |||
261 | -------- | 546 | -------- |
262 | # tracer: function | 547 | # tracer: function |
263 | # | 548 | # |
264 | # TASK-PID CPU# TIMESTAMP FUNCTION | 549 | # entries-in-buffer/entries-written: 140080/250280 #P:4 |
265 | # | | | | | | 550 | # |
266 | bash-4251 [01] 10152.583854: path_put <-path_walk | 551 | # _-----=> irqs-off |
267 | bash-4251 [01] 10152.583855: dput <-path_put | 552 | # / _----=> need-resched |
268 | bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput | 553 | # | / _---=> hardirq/softirq |
554 | # || / _--=> preempt-depth | ||
555 | # ||| / delay | ||
556 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
557 | # | | | |||| | | | ||
558 | bash-1977 [000] .... 17284.993652: sys_close <-system_call_fastpath | ||
559 | bash-1977 [000] .... 17284.993653: __close_fd <-sys_close | ||
560 | bash-1977 [000] .... 17284.993653: _raw_spin_lock <-__close_fd | ||
561 | sshd-1974 [003] .... 17284.993653: __srcu_read_unlock <-fsnotify | ||
562 | bash-1977 [000] .... 17284.993654: add_preempt_count <-_raw_spin_lock | ||
563 | bash-1977 [000] ...1 17284.993655: _raw_spin_unlock <-__close_fd | ||
564 | bash-1977 [000] ...1 17284.993656: sub_preempt_count <-_raw_spin_unlock | ||
565 | bash-1977 [000] .... 17284.993657: filp_close <-__close_fd | ||
566 | bash-1977 [000] .... 17284.993657: dnotify_flush <-filp_close | ||
567 | sshd-1974 [003] .... 17284.993658: sys_select <-system_call_fastpath | ||
269 | -------- | 568 | -------- |
270 | 569 | ||
271 | A header is printed with the tracer name that is represented by | 570 | A header is printed with the tracer name that is represented by |
272 | the trace. In this case the tracer is "function". Then a header | 571 | the trace. In this case the tracer is "function". Then it shows the |
273 | showing the format. Task name "bash", the task PID "4251", the | 572 | number of events in the buffer as well as the total number of entries |
274 | CPU that it was running on "01", the timestamp in <secs>.<usecs> | 573 | that were written. The difference is the number of entries that were |
275 | format, the function name that was traced "path_put" and the | 574 | lost due to the buffer filling up (250280 - 140080 = 110200 events |
276 | parent function that called this function "path_walk". The | 575 | lost). |
277 | timestamp is the time at which the function was entered. | 576 | |
577 | The header explains the content of the events. Task name "bash", the task | ||
578 | PID "1977", the CPU that it was running on "000", the latency format | ||
579 | (explained below), the timestamp in <secs>.<usecs> format, the | ||
580 | function name that was traced "sys_close" and the parent function that | ||
581 | called this function "system_call_fastpath". The timestamp is the time | ||
582 | at which the function was entered. | ||
278 | 583 | ||
279 | Latency trace format | 584 | Latency trace format |
280 | -------------------- | 585 | -------------------- |
281 | 586 | ||
282 | When the latency-format option is enabled, the trace file gives | 587 | When the latency-format option is enabled or when one of the latency |
283 | somewhat more information to see why a latency happened. | 588 | tracers is set, the trace file gives somewhat more information to see |
284 | Here is a typical trace. | 589 | why a latency happened. Here is a typical trace. |
285 | 590 | ||
286 | # tracer: irqsoff | 591 | # tracer: irqsoff |
287 | # | 592 | # |
288 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | 593 | # irqsoff latency trace v1.1.5 on 3.8.0-test+ |
289 | -------------------------------------------------------------------- | 594 | # -------------------------------------------------------------------- |
290 | latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 595 | # latency: 259 us, #4/4, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
291 | ----------------- | 596 | # ----------------- |
292 | | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) | 597 | # | task: ps-6143 (uid:0 nice:0 policy:0 rt_prio:0) |
293 | ----------------- | 598 | # ----------------- |
294 | => started at: apic_timer_interrupt | 599 | # => started at: __lock_task_sighand |
295 | => ended at: do_softirq | 600 | # => ended at: _raw_spin_unlock_irqrestore |
296 | 601 | # | |
297 | # _------=> CPU# | 602 | # |
298 | # / _-----=> irqs-off | 603 | # _------=> CPU# |
299 | # | / _----=> need-resched | 604 | # / _-----=> irqs-off |
300 | # || / _---=> hardirq/softirq | 605 | # | / _----=> need-resched |
301 | # ||| / _--=> preempt-depth | 606 | # || / _---=> hardirq/softirq |
302 | # |||| / | 607 | # ||| / _--=> preempt-depth |
303 | # ||||| delay | 608 | # |||| / delay |
304 | # cmd pid ||||| time | caller | 609 | # cmd pid ||||| time | caller |
305 | # \ / ||||| \ | / | 610 | # \ / ||||| \ | / |
306 | <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt) | 611 | ps-6143 2d... 0us!: trace_hardirqs_off <-__lock_task_sighand |
307 | <idle>-0 0d.s. 97us : __do_softirq (do_softirq) | 612 | ps-6143 2d..1 259us+: trace_hardirqs_on <-_raw_spin_unlock_irqrestore |
308 | <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq) | 613 | ps-6143 2d..1 263us+: time_hardirqs_on <-_raw_spin_unlock_irqrestore |
614 | ps-6143 2d..1 306us : <stack trace> | ||
615 | => trace_hardirqs_on_caller | ||
616 | => trace_hardirqs_on | ||
617 | => _raw_spin_unlock_irqrestore | ||
618 | => do_task_stat | ||
619 | => proc_tgid_stat | ||
620 | => proc_single_show | ||
621 | => seq_read | ||
622 | => vfs_read | ||
623 | => sys_read | ||
624 | => system_call_fastpath | ||
309 | 625 | ||
310 | 626 | ||
311 | This shows that the current tracer is "irqsoff" tracing the time | 627 | This shows that the current tracer is "irqsoff" tracing the time |
312 | for which interrupts were disabled. It gives the trace version | 628 | for which interrupts were disabled. It gives the trace version (which |
313 | and the version of the kernel upon which this was executed on | 629 | never changes) and the version of the kernel upon which this was executed on |
314 | (2.6.26-rc8). Then it displays the max latency in microsecs (97 | 630 | (3.10). Then it displays the max latency in microseconds (259 us). The number |
315 | us). The number of trace entries displayed and the total number | 631 | of trace entries displayed and the total number (both are four: #4/4). |
316 | recorded (both are three: #3/3). The type of preemption that was | 632 | VP, KP, SP, and HP are always zero and are reserved for later use. |
317 | used (PREEMPT). VP, KP, SP, and HP are always zero and are | 633 | #P is the number of online CPUs (#P:4). |
318 | reserved for later use. #P is the number of online CPUS (#P:2). | ||
319 | 634 | ||
320 | The task is the process that was running when the latency | 635 | The task is the process that was running when the latency |
321 | occurred. (swapper pid: 0). | 636 | occurred. (ps pid: 6143). |
322 | 637 | ||
323 | The start and stop (the functions in which the interrupts were | 638 | The start and stop (the functions in which the interrupts were |
324 | disabled and enabled respectively) that caused the latencies: | 639 | disabled and enabled respectively) that caused the latencies: |
325 | 640 | ||
326 | apic_timer_interrupt is where the interrupts were disabled. | 641 | __lock_task_sighand is where the interrupts were disabled. |
327 | do_softirq is where they were enabled again. | 642 | _raw_spin_unlock_irqrestore is where they were enabled again. |
328 | 643 | ||
329 | The next lines after the header are the trace itself. The header | 644 | The next lines after the header are the trace itself. The header |
330 | explains which is which. | 645 | explains which is which. |
@@ -367,16 +682,43 @@ The above is mostly meaningful for kernel developers. | |||
367 | 682 | ||
368 | The rest is the same as the 'trace' file. | 683 | The rest is the same as the 'trace' file. |
369 | 684 | ||
685 | Note, the latency tracers will usually end with a back trace | ||
686 | to easily find where the latency occurred. | ||
370 | 687 | ||
371 | trace_options | 688 | trace_options |
372 | ------------- | 689 | ------------- |
373 | 690 | ||
374 | The trace_options file is used to control what gets printed in | 691 | The trace_options file (or the options directory) is used to control |
375 | the trace output. To see what is available, simply cat the file: | 692 | what gets printed in the trace output, or manipulate the tracers. |
693 | To see what is available, simply cat the file: | ||
376 | 694 | ||
377 | cat trace_options | 695 | cat trace_options |
378 | print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ | 696 | print-parent |
379 | noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj | 697 | nosym-offset |
698 | nosym-addr | ||
699 | noverbose | ||
700 | noraw | ||
701 | nohex | ||
702 | nobin | ||
703 | noblock | ||
704 | nostacktrace | ||
705 | trace_printk | ||
706 | noftrace_preempt | ||
707 | nobranch | ||
708 | annotate | ||
709 | nouserstacktrace | ||
710 | nosym-userobj | ||
711 | noprintk-msg-only | ||
712 | context-info | ||
713 | latency-format | ||
714 | sleep-time | ||
715 | graph-time | ||
716 | record-cmd | ||
717 | overwrite | ||
718 | nodisable_on_free | ||
719 | irq-info | ||
720 | markers | ||
721 | function-trace | ||
380 | 722 | ||
381 | To disable one of the options, echo in the option prepended with | 723 | To disable one of the options, echo in the option prepended with |
382 | "no". | 724 | "no". |
@@ -428,13 +770,34 @@ Here are the available options: | |||
428 | 770 | ||
429 | bin - This will print out the formats in raw binary. | 771 | bin - This will print out the formats in raw binary. |
430 | 772 | ||
431 | block - TBD (needs update) | 773 | block - When set, reading trace_pipe will not block when polled. |
432 | 774 | ||
433 | stacktrace - This is one of the options that changes the trace | 775 | stacktrace - This is one of the options that changes the trace |
434 | itself. When a trace is recorded, so is the stack | 776 | itself. When a trace is recorded, so is the stack |
435 | of functions. This allows for back traces of | 777 | of functions. This allows for back traces of |
436 | trace sites. | 778 | trace sites. |
437 | 779 | ||
780 | trace_printk - Can disable trace_printk() from writing into the buffer. | ||
781 | |||
782 | branch - Enable branch tracing with the tracer. | ||
783 | |||
784 | annotate - It is sometimes confusing when the CPU buffers are full | ||
785 | and one CPU buffer had a lot of events recently, thus | ||
786 | a shorter time frame, were another CPU may have only had | ||
787 | a few events, which lets it have older events. When | ||
788 | the trace is reported, it shows the oldest events first, | ||
789 | and it may look like only one CPU ran (the one with the | ||
790 | oldest events). When the annotate option is set, it will | ||
791 | display when a new CPU buffer started: | ||
792 | |||
793 | <idle>-0 [001] dNs4 21169.031481: wake_up_idle_cpu <-add_timer_on | ||
794 | <idle>-0 [001] dNs4 21169.031482: _raw_spin_unlock_irqrestore <-add_timer_on | ||
795 | <idle>-0 [001] .Ns4 21169.031484: sub_preempt_count <-_raw_spin_unlock_irqrestore | ||
796 | ##### CPU 2 buffer started #### | ||
797 | <idle>-0 [002] .N.1 21169.031484: rcu_idle_exit <-cpu_idle | ||
798 | <idle>-0 [001] .Ns3 21169.031484: _raw_spin_unlock <-clocksource_watchdog | ||
799 | <idle>-0 [001] .Ns3 21169.031485: sub_preempt_count <-_raw_spin_unlock | ||
800 | |||
438 | userstacktrace - This option changes the trace. It records a | 801 | userstacktrace - This option changes the trace. It records a |
439 | stacktrace of the current userspace thread. | 802 | stacktrace of the current userspace thread. |
440 | 803 | ||
@@ -451,9 +814,13 @@ Here are the available options: | |||
451 | a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 | 814 | a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 |
452 | x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] | 815 | x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] |
453 | 816 | ||
454 | sched-tree - trace all tasks that are on the runqueue, at | 817 | |
455 | every scheduling event. Will add overhead if | 818 | printk-msg-only - When set, trace_printk()s will only show the format |
456 | there's a lot of tasks running at once. | 819 | and not their parameters (if trace_bprintk() or |
820 | trace_bputs() was used to save the trace_printk()). | ||
821 | |||
822 | context-info - Show only the event data. Hides the comm, PID, | ||
823 | timestamp, CPU, and other useful data. | ||
457 | 824 | ||
458 | latency-format - This option changes the trace. When | 825 | latency-format - This option changes the trace. When |
459 | it is enabled, the trace displays | 826 | it is enabled, the trace displays |
@@ -461,31 +828,61 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] | |||
461 | latencies, as described in "Latency | 828 | latencies, as described in "Latency |
462 | trace format". | 829 | trace format". |
463 | 830 | ||
831 | sleep-time - When running function graph tracer, to include | ||
832 | the time a task schedules out in its function. | ||
833 | When enabled, it will account time the task has been | ||
834 | scheduled out as part of the function call. | ||
835 | |||
836 | graph-time - When running function graph tracer, to include the | ||
837 | time to call nested functions. When this is not set, | ||
838 | the time reported for the function will only include | ||
839 | the time the function itself executed for, not the time | ||
840 | for functions that it called. | ||
841 | |||
842 | record-cmd - When any event or tracer is enabled, a hook is enabled | ||
843 | in the sched_switch trace point to fill comm cache | ||
844 | with mapped pids and comms. But this may cause some | ||
845 | overhead, and if you only care about pids, and not the | ||
846 | name of the task, disabling this option can lower the | ||
847 | impact of tracing. | ||
848 | |||
464 | overwrite - This controls what happens when the trace buffer is | 849 | overwrite - This controls what happens when the trace buffer is |
465 | full. If "1" (default), the oldest events are | 850 | full. If "1" (default), the oldest events are |
466 | discarded and overwritten. If "0", then the newest | 851 | discarded and overwritten. If "0", then the newest |
467 | events are discarded. | 852 | events are discarded. |
853 | (see per_cpu/cpu0/stats for overrun and dropped) | ||
468 | 854 | ||
469 | ftrace_enabled | 855 | disable_on_free - When the free_buffer is closed, tracing will |
470 | -------------- | 856 | stop (tracing_on set to 0). |
471 | 857 | ||
472 | The following tracers (listed below) give different output | 858 | irq-info - Shows the interrupt, preempt count, need resched data. |
473 | depending on whether or not the sysctl ftrace_enabled is set. To | 859 | When disabled, the trace looks like: |
474 | set ftrace_enabled, one can either use the sysctl function or | ||
475 | set it via the proc file system interface. | ||
476 | 860 | ||
477 | sysctl kernel.ftrace_enabled=1 | 861 | # tracer: function |
862 | # | ||
863 | # entries-in-buffer/entries-written: 144405/9452052 #P:4 | ||
864 | # | ||
865 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
866 | # | | | | | | ||
867 | <idle>-0 [002] 23636.756054: ttwu_do_activate.constprop.89 <-try_to_wake_up | ||
868 | <idle>-0 [002] 23636.756054: activate_task <-ttwu_do_activate.constprop.89 | ||
869 | <idle>-0 [002] 23636.756055: enqueue_task <-activate_task | ||
478 | 870 | ||
479 | or | ||
480 | 871 | ||
481 | echo 1 > /proc/sys/kernel/ftrace_enabled | 872 | markers - When set, the trace_marker is writable (only by root). |
873 | When disabled, the trace_marker will error with EINVAL | ||
874 | on write. | ||
875 | |||
876 | |||
877 | function-trace - The latency tracers will enable function tracing | ||
878 | if this option is enabled (default it is). When | ||
879 | it is disabled, the latency tracers do not trace | ||
880 | functions. This keeps the overhead of the tracer down | ||
881 | when performing latency tests. | ||
482 | 882 | ||
483 | To disable ftrace_enabled simply replace the '1' with '0' in the | 883 | Note: Some tracers have their own options. They only appear |
484 | above commands. | 884 | when the tracer is active. |
485 | 885 | ||
486 | When ftrace_enabled is set the tracers will also record the | ||
487 | functions that are within the trace. The descriptions of the | ||
488 | tracers will also show an example with ftrace enabled. | ||
489 | 886 | ||
490 | 887 | ||
491 | irqsoff | 888 | irqsoff |
@@ -506,95 +903,133 @@ new trace is saved. | |||
506 | To reset the maximum, echo 0 into tracing_max_latency. Here is | 903 | To reset the maximum, echo 0 into tracing_max_latency. Here is |
507 | an example: | 904 | an example: |
508 | 905 | ||
906 | # echo 0 > options/function-trace | ||
509 | # echo irqsoff > current_tracer | 907 | # echo irqsoff > current_tracer |
510 | # echo latency-format > trace_options | ||
511 | # echo 0 > tracing_max_latency | ||
512 | # echo 1 > tracing_on | 908 | # echo 1 > tracing_on |
909 | # echo 0 > tracing_max_latency | ||
513 | # ls -ltr | 910 | # ls -ltr |
514 | [...] | 911 | [...] |
515 | # echo 0 > tracing_on | 912 | # echo 0 > tracing_on |
516 | # cat trace | 913 | # cat trace |
517 | # tracer: irqsoff | 914 | # tracer: irqsoff |
518 | # | 915 | # |
519 | irqsoff latency trace v1.1.5 on 2.6.26 | 916 | # irqsoff latency trace v1.1.5 on 3.8.0-test+ |
520 | -------------------------------------------------------------------- | 917 | # -------------------------------------------------------------------- |
521 | latency: 12 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 918 | # latency: 16 us, #4/4, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
522 | ----------------- | 919 | # ----------------- |
523 | | task: bash-3730 (uid:0 nice:0 policy:0 rt_prio:0) | 920 | # | task: swapper/0-0 (uid:0 nice:0 policy:0 rt_prio:0) |
524 | ----------------- | 921 | # ----------------- |
525 | => started at: sys_setpgid | 922 | # => started at: run_timer_softirq |
526 | => ended at: sys_setpgid | 923 | # => ended at: run_timer_softirq |
527 | 924 | # | |
528 | # _------=> CPU# | 925 | # |
529 | # / _-----=> irqs-off | 926 | # _------=> CPU# |
530 | # | / _----=> need-resched | 927 | # / _-----=> irqs-off |
531 | # || / _---=> hardirq/softirq | 928 | # | / _----=> need-resched |
532 | # ||| / _--=> preempt-depth | 929 | # || / _---=> hardirq/softirq |
533 | # |||| / | 930 | # ||| / _--=> preempt-depth |
534 | # ||||| delay | 931 | # |||| / delay |
535 | # cmd pid ||||| time | caller | 932 | # cmd pid ||||| time | caller |
536 | # \ / ||||| \ | / | 933 | # \ / ||||| \ | / |
537 | bash-3730 1d... 0us : _write_lock_irq (sys_setpgid) | 934 | <idle>-0 0d.s2 0us+: _raw_spin_lock_irq <-run_timer_softirq |
538 | bash-3730 1d..1 1us+: _write_unlock_irq (sys_setpgid) | 935 | <idle>-0 0dNs3 17us : _raw_spin_unlock_irq <-run_timer_softirq |
539 | bash-3730 1d..2 14us : trace_hardirqs_on (sys_setpgid) | 936 | <idle>-0 0dNs3 17us+: trace_hardirqs_on <-run_timer_softirq |
540 | 937 | <idle>-0 0dNs3 25us : <stack trace> | |
541 | 938 | => _raw_spin_unlock_irq | |
542 | Here we see that that we had a latency of 12 microsecs (which is | 939 | => run_timer_softirq |
543 | very good). The _write_lock_irq in sys_setpgid disabled | 940 | => __do_softirq |
544 | interrupts. The difference between the 12 and the displayed | 941 | => call_softirq |
545 | timestamp 14us occurred because the clock was incremented | 942 | => do_softirq |
943 | => irq_exit | ||
944 | => smp_apic_timer_interrupt | ||
945 | => apic_timer_interrupt | ||
946 | => rcu_idle_exit | ||
947 | => cpu_idle | ||
948 | => rest_init | ||
949 | => start_kernel | ||
950 | => x86_64_start_reservations | ||
951 | => x86_64_start_kernel | ||
952 | |||
953 | Here we see that that we had a latency of 16 microseconds (which is | ||
954 | very good). The _raw_spin_lock_irq in run_timer_softirq disabled | ||
955 | interrupts. The difference between the 16 and the displayed | ||
956 | timestamp 25us occurred because the clock was incremented | ||
546 | between the time of recording the max latency and the time of | 957 | between the time of recording the max latency and the time of |
547 | recording the function that had that latency. | 958 | recording the function that had that latency. |
548 | 959 | ||
549 | Note the above example had ftrace_enabled not set. If we set the | 960 | Note the above example had function-trace not set. If we set |
550 | ftrace_enabled, we get a much larger output: | 961 | function-trace, we get a much larger output: |
962 | |||
963 | with echo 1 > options/function-trace | ||
551 | 964 | ||
552 | # tracer: irqsoff | 965 | # tracer: irqsoff |
553 | # | 966 | # |
554 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | 967 | # irqsoff latency trace v1.1.5 on 3.8.0-test+ |
555 | -------------------------------------------------------------------- | 968 | # -------------------------------------------------------------------- |
556 | latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 969 | # latency: 71 us, #168/168, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
557 | ----------------- | 970 | # ----------------- |
558 | | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0) | 971 | # | task: bash-2042 (uid:0 nice:0 policy:0 rt_prio:0) |
559 | ----------------- | 972 | # ----------------- |
560 | => started at: __alloc_pages_internal | 973 | # => started at: ata_scsi_queuecmd |
561 | => ended at: __alloc_pages_internal | 974 | # => ended at: ata_scsi_queuecmd |
562 | 975 | # | |
563 | # _------=> CPU# | 976 | # |
564 | # / _-----=> irqs-off | 977 | # _------=> CPU# |
565 | # | / _----=> need-resched | 978 | # / _-----=> irqs-off |
566 | # || / _---=> hardirq/softirq | 979 | # | / _----=> need-resched |
567 | # ||| / _--=> preempt-depth | 980 | # || / _---=> hardirq/softirq |
568 | # |||| / | 981 | # ||| / _--=> preempt-depth |
569 | # ||||| delay | 982 | # |||| / delay |
570 | # cmd pid ||||| time | caller | 983 | # cmd pid ||||| time | caller |
571 | # \ / ||||| \ | / | 984 | # \ / ||||| \ | / |
572 | ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal) | 985 | bash-2042 3d... 0us : _raw_spin_lock_irqsave <-ata_scsi_queuecmd |
573 | ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist) | 986 | bash-2042 3d... 0us : add_preempt_count <-_raw_spin_lock_irqsave |
574 | ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk) | 987 | bash-2042 3d..1 1us : ata_scsi_find_dev <-ata_scsi_queuecmd |
575 | ls-4339 0d..1 4us : add_preempt_count (_spin_lock) | 988 | bash-2042 3d..1 1us : __ata_scsi_find_dev <-ata_scsi_find_dev |
576 | ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk) | 989 | bash-2042 3d..1 2us : ata_find_dev.part.14 <-__ata_scsi_find_dev |
577 | ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue) | 990 | bash-2042 3d..1 2us : ata_qc_new_init <-__ata_scsi_queuecmd |
578 | ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest) | 991 | bash-2042 3d..1 3us : ata_sg_init <-__ata_scsi_queuecmd |
579 | ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk) | 992 | bash-2042 3d..1 4us : ata_scsi_rw_xlat <-__ata_scsi_queuecmd |
580 | ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue) | 993 | bash-2042 3d..1 4us : ata_build_rw_tf <-ata_scsi_rw_xlat |
581 | ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest) | ||
582 | ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk) | ||
583 | ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue) | ||
584 | [...] | 994 | [...] |
585 | ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue) | 995 | bash-2042 3d..1 67us : delay_tsc <-__delay |
586 | ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest) | 996 | bash-2042 3d..1 67us : add_preempt_count <-delay_tsc |
587 | ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk) | 997 | bash-2042 3d..2 67us : sub_preempt_count <-delay_tsc |
588 | ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue) | 998 | bash-2042 3d..1 67us : add_preempt_count <-delay_tsc |
589 | ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest) | 999 | bash-2042 3d..2 68us : sub_preempt_count <-delay_tsc |
590 | ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk) | 1000 | bash-2042 3d..1 68us+: ata_bmdma_start <-ata_bmdma_qc_issue |
591 | ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock) | 1001 | bash-2042 3d..1 71us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd |
592 | ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal) | 1002 | bash-2042 3d..1 71us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd |
593 | ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal) | 1003 | bash-2042 3d..1 72us+: trace_hardirqs_on <-ata_scsi_queuecmd |
594 | 1004 | bash-2042 3d..1 120us : <stack trace> | |
595 | 1005 | => _raw_spin_unlock_irqrestore | |
596 | 1006 | => ata_scsi_queuecmd | |
597 | Here we traced a 50 microsecond latency. But we also see all the | 1007 | => scsi_dispatch_cmd |
1008 | => scsi_request_fn | ||
1009 | => __blk_run_queue_uncond | ||
1010 | => __blk_run_queue | ||
1011 | => blk_queue_bio | ||
1012 | => generic_make_request | ||
1013 | => submit_bio | ||
1014 | => submit_bh | ||
1015 | => __ext3_get_inode_loc | ||
1016 | => ext3_iget | ||
1017 | => ext3_lookup | ||
1018 | => lookup_real | ||
1019 | => __lookup_hash | ||
1020 | => walk_component | ||
1021 | => lookup_last | ||
1022 | => path_lookupat | ||
1023 | => filename_lookup | ||
1024 | => user_path_at_empty | ||
1025 | => user_path_at | ||
1026 | => vfs_fstatat | ||
1027 | => vfs_stat | ||
1028 | => sys_newstat | ||
1029 | => system_call_fastpath | ||
1030 | |||
1031 | |||
1032 | Here we traced a 71 microsecond latency. But we also see all the | ||
598 | functions that were called during that time. Note that by | 1033 | functions that were called during that time. Note that by |
599 | enabling function tracing, we incur an added overhead. This | 1034 | enabling function tracing, we incur an added overhead. This |
600 | overhead may extend the latency times. But nevertheless, this | 1035 | overhead may extend the latency times. But nevertheless, this |
@@ -614,120 +1049,122 @@ Like the irqsoff tracer, it records the maximum latency for | |||
614 | which preemption was disabled. The control of preemptoff tracer | 1049 | which preemption was disabled. The control of preemptoff tracer |
615 | is much like the irqsoff tracer. | 1050 | is much like the irqsoff tracer. |
616 | 1051 | ||
1052 | # echo 0 > options/function-trace | ||
617 | # echo preemptoff > current_tracer | 1053 | # echo preemptoff > current_tracer |
618 | # echo latency-format > trace_options | ||
619 | # echo 0 > tracing_max_latency | ||
620 | # echo 1 > tracing_on | 1054 | # echo 1 > tracing_on |
1055 | # echo 0 > tracing_max_latency | ||
621 | # ls -ltr | 1056 | # ls -ltr |
622 | [...] | 1057 | [...] |
623 | # echo 0 > tracing_on | 1058 | # echo 0 > tracing_on |
624 | # cat trace | 1059 | # cat trace |
625 | # tracer: preemptoff | 1060 | # tracer: preemptoff |
626 | # | 1061 | # |
627 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | 1062 | # preemptoff latency trace v1.1.5 on 3.8.0-test+ |
628 | -------------------------------------------------------------------- | 1063 | # -------------------------------------------------------------------- |
629 | latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1064 | # latency: 46 us, #4/4, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
630 | ----------------- | 1065 | # ----------------- |
631 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | 1066 | # | task: sshd-1991 (uid:0 nice:0 policy:0 rt_prio:0) |
632 | ----------------- | 1067 | # ----------------- |
633 | => started at: do_IRQ | 1068 | # => started at: do_IRQ |
634 | => ended at: __do_softirq | 1069 | # => ended at: do_IRQ |
635 | 1070 | # | |
636 | # _------=> CPU# | 1071 | # |
637 | # / _-----=> irqs-off | 1072 | # _------=> CPU# |
638 | # | / _----=> need-resched | 1073 | # / _-----=> irqs-off |
639 | # || / _---=> hardirq/softirq | 1074 | # | / _----=> need-resched |
640 | # ||| / _--=> preempt-depth | 1075 | # || / _---=> hardirq/softirq |
641 | # |||| / | 1076 | # ||| / _--=> preempt-depth |
642 | # ||||| delay | 1077 | # |||| / delay |
643 | # cmd pid ||||| time | caller | 1078 | # cmd pid ||||| time | caller |
644 | # \ / ||||| \ | / | 1079 | # \ / ||||| \ | / |
645 | sshd-4261 0d.h. 0us+: irq_enter (do_IRQ) | 1080 | sshd-1991 1d.h. 0us+: irq_enter <-do_IRQ |
646 | sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq) | 1081 | sshd-1991 1d..1 46us : irq_exit <-do_IRQ |
647 | sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq) | 1082 | sshd-1991 1d..1 47us+: trace_preempt_on <-do_IRQ |
1083 | sshd-1991 1d..1 52us : <stack trace> | ||
1084 | => sub_preempt_count | ||
1085 | => irq_exit | ||
1086 | => do_IRQ | ||
1087 | => ret_from_intr | ||
648 | 1088 | ||
649 | 1089 | ||
650 | This has some more changes. Preemption was disabled when an | 1090 | This has some more changes. Preemption was disabled when an |
651 | interrupt came in (notice the 'h'), and was enabled while doing | 1091 | interrupt came in (notice the 'h'), and was enabled on exit. |
652 | a softirq. (notice the 's'). But we also see that interrupts | 1092 | But we also see that interrupts have been disabled when entering |
653 | have been disabled when entering the preempt off section and | 1093 | the preempt off section and leaving it (the 'd'). We do not know if |
654 | leaving it (the 'd'). We do not know if interrupts were enabled | 1094 | interrupts were enabled in the mean time or shortly after this |
655 | in the mean time. | 1095 | was over. |
656 | 1096 | ||
657 | # tracer: preemptoff | 1097 | # tracer: preemptoff |
658 | # | 1098 | # |
659 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | 1099 | # preemptoff latency trace v1.1.5 on 3.8.0-test+ |
660 | -------------------------------------------------------------------- | 1100 | # -------------------------------------------------------------------- |
661 | latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1101 | # latency: 83 us, #241/241, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
662 | ----------------- | 1102 | # ----------------- |
663 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | 1103 | # | task: bash-1994 (uid:0 nice:0 policy:0 rt_prio:0) |
664 | ----------------- | 1104 | # ----------------- |
665 | => started at: remove_wait_queue | 1105 | # => started at: wake_up_new_task |
666 | => ended at: __do_softirq | 1106 | # => ended at: task_rq_unlock |
667 | 1107 | # | |
668 | # _------=> CPU# | 1108 | # |
669 | # / _-----=> irqs-off | 1109 | # _------=> CPU# |
670 | # | / _----=> need-resched | 1110 | # / _-----=> irqs-off |
671 | # || / _---=> hardirq/softirq | 1111 | # | / _----=> need-resched |
672 | # ||| / _--=> preempt-depth | 1112 | # || / _---=> hardirq/softirq |
673 | # |||| / | 1113 | # ||| / _--=> preempt-depth |
674 | # ||||| delay | 1114 | # |||| / delay |
675 | # cmd pid ||||| time | caller | 1115 | # cmd pid ||||| time | caller |
676 | # \ / ||||| \ | / | 1116 | # \ / ||||| \ | / |
677 | sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue) | 1117 | bash-1994 1d..1 0us : _raw_spin_lock_irqsave <-wake_up_new_task |
678 | sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue) | 1118 | bash-1994 1d..1 0us : select_task_rq_fair <-select_task_rq |
679 | sshd-4261 0d..1 2us : do_IRQ (common_interrupt) | 1119 | bash-1994 1d..1 1us : __rcu_read_lock <-select_task_rq_fair |
680 | sshd-4261 0d..1 2us : irq_enter (do_IRQ) | 1120 | bash-1994 1d..1 1us : source_load <-select_task_rq_fair |
681 | sshd-4261 0d..1 2us : idle_cpu (irq_enter) | 1121 | bash-1994 1d..1 1us : source_load <-select_task_rq_fair |
682 | sshd-4261 0d..1 3us : add_preempt_count (irq_enter) | ||
683 | sshd-4261 0d.h1 3us : idle_cpu (irq_enter) | ||
684 | sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ) | ||
685 | [...] | 1122 | [...] |
686 | sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock) | 1123 | bash-1994 1d..1 12us : irq_enter <-smp_apic_timer_interrupt |
687 | sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq) | 1124 | bash-1994 1d..1 12us : rcu_irq_enter <-irq_enter |
688 | sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq) | 1125 | bash-1994 1d..1 13us : add_preempt_count <-irq_enter |
689 | sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq) | 1126 | bash-1994 1d.h1 13us : exit_idle <-smp_apic_timer_interrupt |
690 | sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock) | 1127 | bash-1994 1d.h1 13us : hrtimer_interrupt <-smp_apic_timer_interrupt |
691 | sshd-4261 0d.h1 14us : irq_exit (do_IRQ) | 1128 | bash-1994 1d.h1 13us : _raw_spin_lock <-hrtimer_interrupt |
692 | sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit) | 1129 | bash-1994 1d.h1 14us : add_preempt_count <-_raw_spin_lock |
693 | sshd-4261 0d..2 15us : do_softirq (irq_exit) | 1130 | bash-1994 1d.h2 14us : ktime_get_update_offsets <-hrtimer_interrupt |
694 | sshd-4261 0d... 15us : __do_softirq (do_softirq) | ||
695 | sshd-4261 0d... 16us : __local_bh_disable (__do_softirq) | ||
696 | sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable) | ||
697 | sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable) | ||
698 | sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable) | ||
699 | sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable) | ||
700 | [...] | 1131 | [...] |
701 | sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable) | 1132 | bash-1994 1d.h1 35us : lapic_next_event <-clockevents_program_event |
702 | sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable) | 1133 | bash-1994 1d.h1 35us : irq_exit <-smp_apic_timer_interrupt |
703 | sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable) | 1134 | bash-1994 1d.h1 36us : sub_preempt_count <-irq_exit |
704 | sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable) | 1135 | bash-1994 1d..2 36us : do_softirq <-irq_exit |
705 | sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip) | 1136 | bash-1994 1d..2 36us : __do_softirq <-call_softirq |
706 | sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip) | 1137 | bash-1994 1d..2 36us : __local_bh_disable <-__do_softirq |
707 | sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable) | 1138 | bash-1994 1d.s2 37us : add_preempt_count <-_raw_spin_lock_irq |
708 | sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable) | 1139 | bash-1994 1d.s3 38us : _raw_spin_unlock <-run_timer_softirq |
1140 | bash-1994 1d.s3 39us : sub_preempt_count <-_raw_spin_unlock | ||
1141 | bash-1994 1d.s2 39us : call_timer_fn <-run_timer_softirq | ||
709 | [...] | 1142 | [...] |
710 | sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq) | 1143 | bash-1994 1dNs2 81us : cpu_needs_another_gp <-rcu_process_callbacks |
711 | sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq) | 1144 | bash-1994 1dNs2 82us : __local_bh_enable <-__do_softirq |
1145 | bash-1994 1dNs2 82us : sub_preempt_count <-__local_bh_enable | ||
1146 | bash-1994 1dN.2 82us : idle_cpu <-irq_exit | ||
1147 | bash-1994 1dN.2 83us : rcu_irq_exit <-irq_exit | ||
1148 | bash-1994 1dN.2 83us : sub_preempt_count <-irq_exit | ||
1149 | bash-1994 1.N.1 84us : _raw_spin_unlock_irqrestore <-task_rq_unlock | ||
1150 | bash-1994 1.N.1 84us+: trace_preempt_on <-task_rq_unlock | ||
1151 | bash-1994 1.N.1 104us : <stack trace> | ||
1152 | => sub_preempt_count | ||
1153 | => _raw_spin_unlock_irqrestore | ||
1154 | => task_rq_unlock | ||
1155 | => wake_up_new_task | ||
1156 | => do_fork | ||
1157 | => sys_clone | ||
1158 | => stub_clone | ||
712 | 1159 | ||
713 | 1160 | ||
714 | The above is an example of the preemptoff trace with | 1161 | The above is an example of the preemptoff trace with |
715 | ftrace_enabled set. Here we see that interrupts were disabled | 1162 | function-trace set. Here we see that interrupts were not disabled |
716 | the entire time. The irq_enter code lets us know that we entered | 1163 | the entire time. The irq_enter code lets us know that we entered |
717 | an interrupt 'h'. Before that, the functions being traced still | 1164 | an interrupt 'h'. Before that, the functions being traced still |
718 | show that it is not in an interrupt, but we can see from the | 1165 | show that it is not in an interrupt, but we can see from the |
719 | functions themselves that this is not the case. | 1166 | functions themselves that this is not the case. |
720 | 1167 | ||
721 | Notice that __do_softirq when called does not have a | ||
722 | preempt_count. It may seem that we missed a preempt enabling. | ||
723 | What really happened is that the preempt count is held on the | ||
724 | thread's stack and we switched to the softirq stack (4K stacks | ||
725 | in effect). The code does not copy the preempt count, but | ||
726 | because interrupts are disabled, we do not need to worry about | ||
727 | it. Having a tracer like this is good for letting people know | ||
728 | what really happens inside the kernel. | ||
729 | |||
730 | |||
731 | preemptirqsoff | 1168 | preemptirqsoff |
732 | -------------- | 1169 | -------------- |
733 | 1170 | ||
@@ -762,38 +1199,57 @@ tracer. | |||
762 | Again, using this trace is much like the irqsoff and preemptoff | 1199 | Again, using this trace is much like the irqsoff and preemptoff |
763 | tracers. | 1200 | tracers. |
764 | 1201 | ||
1202 | # echo 0 > options/function-trace | ||
765 | # echo preemptirqsoff > current_tracer | 1203 | # echo preemptirqsoff > current_tracer |
766 | # echo latency-format > trace_options | ||
767 | # echo 0 > tracing_max_latency | ||
768 | # echo 1 > tracing_on | 1204 | # echo 1 > tracing_on |
1205 | # echo 0 > tracing_max_latency | ||
769 | # ls -ltr | 1206 | # ls -ltr |
770 | [...] | 1207 | [...] |
771 | # echo 0 > tracing_on | 1208 | # echo 0 > tracing_on |
772 | # cat trace | 1209 | # cat trace |
773 | # tracer: preemptirqsoff | 1210 | # tracer: preemptirqsoff |
774 | # | 1211 | # |
775 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | 1212 | # preemptirqsoff latency trace v1.1.5 on 3.8.0-test+ |
776 | -------------------------------------------------------------------- | 1213 | # -------------------------------------------------------------------- |
777 | latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1214 | # latency: 100 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
778 | ----------------- | 1215 | # ----------------- |
779 | | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0) | 1216 | # | task: ls-2230 (uid:0 nice:0 policy:0 rt_prio:0) |
780 | ----------------- | 1217 | # ----------------- |
781 | => started at: apic_timer_interrupt | 1218 | # => started at: ata_scsi_queuecmd |
782 | => ended at: __do_softirq | 1219 | # => ended at: ata_scsi_queuecmd |
783 | 1220 | # | |
784 | # _------=> CPU# | 1221 | # |
785 | # / _-----=> irqs-off | 1222 | # _------=> CPU# |
786 | # | / _----=> need-resched | 1223 | # / _-----=> irqs-off |
787 | # || / _---=> hardirq/softirq | 1224 | # | / _----=> need-resched |
788 | # ||| / _--=> preempt-depth | 1225 | # || / _---=> hardirq/softirq |
789 | # |||| / | 1226 | # ||| / _--=> preempt-depth |
790 | # ||||| delay | 1227 | # |||| / delay |
791 | # cmd pid ||||| time | caller | 1228 | # cmd pid ||||| time | caller |
792 | # \ / ||||| \ | / | 1229 | # \ / ||||| \ | / |
793 | ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt) | 1230 | ls-2230 3d... 0us+: _raw_spin_lock_irqsave <-ata_scsi_queuecmd |
794 | ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq) | 1231 | ls-2230 3...1 100us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd |
795 | ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq) | 1232 | ls-2230 3...1 101us+: trace_preempt_on <-ata_scsi_queuecmd |
796 | 1233 | ls-2230 3...1 111us : <stack trace> | |
1234 | => sub_preempt_count | ||
1235 | => _raw_spin_unlock_irqrestore | ||
1236 | => ata_scsi_queuecmd | ||
1237 | => scsi_dispatch_cmd | ||
1238 | => scsi_request_fn | ||
1239 | => __blk_run_queue_uncond | ||
1240 | => __blk_run_queue | ||
1241 | => blk_queue_bio | ||
1242 | => generic_make_request | ||
1243 | => submit_bio | ||
1244 | => submit_bh | ||
1245 | => ext3_bread | ||
1246 | => ext3_dir_bread | ||
1247 | => htree_dirblock_to_tree | ||
1248 | => ext3_htree_fill_tree | ||
1249 | => ext3_readdir | ||
1250 | => vfs_readdir | ||
1251 | => sys_getdents | ||
1252 | => system_call_fastpath | ||
797 | 1253 | ||
798 | 1254 | ||
799 | The trace_hardirqs_off_thunk is called from assembly on x86 when | 1255 | The trace_hardirqs_off_thunk is called from assembly on x86 when |
@@ -802,105 +1258,158 @@ function tracing, we do not know if interrupts were enabled | |||
802 | within the preemption points. We do see that it started with | 1258 | within the preemption points. We do see that it started with |
803 | preemption enabled. | 1259 | preemption enabled. |
804 | 1260 | ||
805 | Here is a trace with ftrace_enabled set: | 1261 | Here is a trace with function-trace set: |
806 | |||
807 | 1262 | ||
808 | # tracer: preemptirqsoff | 1263 | # tracer: preemptirqsoff |
809 | # | 1264 | # |
810 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | 1265 | # preemptirqsoff latency trace v1.1.5 on 3.8.0-test+ |
811 | -------------------------------------------------------------------- | 1266 | # -------------------------------------------------------------------- |
812 | latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1267 | # latency: 161 us, #339/339, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
813 | ----------------- | 1268 | # ----------------- |
814 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | 1269 | # | task: ls-2269 (uid:0 nice:0 policy:0 rt_prio:0) |
815 | ----------------- | 1270 | # ----------------- |
816 | => started at: write_chan | 1271 | # => started at: schedule |
817 | => ended at: __do_softirq | 1272 | # => ended at: mutex_unlock |
818 | 1273 | # | |
819 | # _------=> CPU# | 1274 | # |
820 | # / _-----=> irqs-off | 1275 | # _------=> CPU# |
821 | # | / _----=> need-resched | 1276 | # / _-----=> irqs-off |
822 | # || / _---=> hardirq/softirq | 1277 | # | / _----=> need-resched |
823 | # ||| / _--=> preempt-depth | 1278 | # || / _---=> hardirq/softirq |
824 | # |||| / | 1279 | # ||| / _--=> preempt-depth |
825 | # ||||| delay | 1280 | # |||| / delay |
826 | # cmd pid ||||| time | caller | 1281 | # cmd pid ||||| time | caller |
827 | # \ / ||||| \ | / | 1282 | # \ / ||||| \ | / |
828 | ls-4473 0.N.. 0us : preempt_schedule (write_chan) | 1283 | kworker/-59 3...1 0us : __schedule <-schedule |
829 | ls-4473 0dN.1 1us : _spin_lock (schedule) | 1284 | kworker/-59 3d..1 0us : rcu_preempt_qs <-rcu_note_context_switch |
830 | ls-4473 0dN.1 2us : add_preempt_count (_spin_lock) | 1285 | kworker/-59 3d..1 1us : add_preempt_count <-_raw_spin_lock_irq |
831 | ls-4473 0d..2 2us : put_prev_task_fair (schedule) | 1286 | kworker/-59 3d..2 1us : deactivate_task <-__schedule |
832 | [...] | 1287 | kworker/-59 3d..2 1us : dequeue_task <-deactivate_task |
833 | ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts) | 1288 | kworker/-59 3d..2 2us : update_rq_clock <-dequeue_task |
834 | ls-4473 0d..2 13us : __switch_to (schedule) | 1289 | kworker/-59 3d..2 2us : dequeue_task_fair <-dequeue_task |
835 | sshd-4261 0d..2 14us : finish_task_switch (schedule) | 1290 | kworker/-59 3d..2 2us : update_curr <-dequeue_task_fair |
836 | sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch) | 1291 | kworker/-59 3d..2 2us : update_min_vruntime <-update_curr |
837 | sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave) | 1292 | kworker/-59 3d..2 3us : cpuacct_charge <-update_curr |
838 | sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set) | 1293 | kworker/-59 3d..2 3us : __rcu_read_lock <-cpuacct_charge |
839 | sshd-4261 0d..2 16us : do_IRQ (common_interrupt) | 1294 | kworker/-59 3d..2 3us : __rcu_read_unlock <-cpuacct_charge |
840 | sshd-4261 0d..2 17us : irq_enter (do_IRQ) | 1295 | kworker/-59 3d..2 3us : update_cfs_rq_blocked_load <-dequeue_task_fair |
841 | sshd-4261 0d..2 17us : idle_cpu (irq_enter) | 1296 | kworker/-59 3d..2 4us : clear_buddies <-dequeue_task_fair |
842 | sshd-4261 0d..2 18us : add_preempt_count (irq_enter) | 1297 | kworker/-59 3d..2 4us : account_entity_dequeue <-dequeue_task_fair |
843 | sshd-4261 0d.h2 18us : idle_cpu (irq_enter) | 1298 | kworker/-59 3d..2 4us : update_min_vruntime <-dequeue_task_fair |
844 | sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ) | 1299 | kworker/-59 3d..2 4us : update_cfs_shares <-dequeue_task_fair |
845 | sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq) | 1300 | kworker/-59 3d..2 5us : hrtick_update <-dequeue_task_fair |
846 | sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock) | 1301 | kworker/-59 3d..2 5us : wq_worker_sleeping <-__schedule |
847 | sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq) | 1302 | kworker/-59 3d..2 5us : kthread_data <-wq_worker_sleeping |
848 | sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock) | 1303 | kworker/-59 3d..2 5us : put_prev_task_fair <-__schedule |
849 | [...] | 1304 | kworker/-59 3d..2 6us : pick_next_task_fair <-pick_next_task |
850 | sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq) | 1305 | kworker/-59 3d..2 6us : clear_buddies <-pick_next_task_fair |
851 | sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock) | 1306 | kworker/-59 3d..2 6us : set_next_entity <-pick_next_task_fair |
852 | sshd-4261 0d.h2 29us : irq_exit (do_IRQ) | 1307 | kworker/-59 3d..2 6us : update_stats_wait_end <-set_next_entity |
853 | sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit) | 1308 | ls-2269 3d..2 7us : finish_task_switch <-__schedule |
854 | sshd-4261 0d..3 30us : do_softirq (irq_exit) | 1309 | ls-2269 3d..2 7us : _raw_spin_unlock_irq <-finish_task_switch |
855 | sshd-4261 0d... 30us : __do_softirq (do_softirq) | 1310 | ls-2269 3d..2 8us : do_IRQ <-ret_from_intr |
856 | sshd-4261 0d... 31us : __local_bh_disable (__do_softirq) | 1311 | ls-2269 3d..2 8us : irq_enter <-do_IRQ |
857 | sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable) | 1312 | ls-2269 3d..2 8us : rcu_irq_enter <-irq_enter |
858 | sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable) | 1313 | ls-2269 3d..2 9us : add_preempt_count <-irq_enter |
1314 | ls-2269 3d.h2 9us : exit_idle <-do_IRQ | ||
859 | [...] | 1315 | [...] |
860 | sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip) | 1316 | ls-2269 3d.h3 20us : sub_preempt_count <-_raw_spin_unlock |
861 | sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip) | 1317 | ls-2269 3d.h2 20us : irq_exit <-do_IRQ |
862 | sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt) | 1318 | ls-2269 3d.h2 21us : sub_preempt_count <-irq_exit |
863 | sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt) | 1319 | ls-2269 3d..3 21us : do_softirq <-irq_exit |
864 | sshd-4261 0d.s3 45us : idle_cpu (irq_enter) | 1320 | ls-2269 3d..3 21us : __do_softirq <-call_softirq |
865 | sshd-4261 0d.s3 46us : add_preempt_count (irq_enter) | 1321 | ls-2269 3d..3 21us+: __local_bh_disable <-__do_softirq |
866 | sshd-4261 0d.H3 46us : idle_cpu (irq_enter) | 1322 | ls-2269 3d.s4 29us : sub_preempt_count <-_local_bh_enable_ip |
867 | sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt) | 1323 | ls-2269 3d.s5 29us : sub_preempt_count <-_local_bh_enable_ip |
868 | sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt) | 1324 | ls-2269 3d.s5 31us : do_IRQ <-ret_from_intr |
1325 | ls-2269 3d.s5 31us : irq_enter <-do_IRQ | ||
1326 | ls-2269 3d.s5 31us : rcu_irq_enter <-irq_enter | ||
869 | [...] | 1327 | [...] |
870 | sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt) | 1328 | ls-2269 3d.s5 31us : rcu_irq_enter <-irq_enter |
871 | sshd-4261 0d.H3 82us : ktime_get (tick_program_event) | 1329 | ls-2269 3d.s5 32us : add_preempt_count <-irq_enter |
872 | sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get) | 1330 | ls-2269 3d.H5 32us : exit_idle <-do_IRQ |
873 | sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts) | 1331 | ls-2269 3d.H5 32us : handle_irq <-do_IRQ |
874 | sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts) | 1332 | ls-2269 3d.H5 32us : irq_to_desc <-handle_irq |
875 | sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event) | 1333 | ls-2269 3d.H5 33us : handle_fasteoi_irq <-handle_irq |
876 | sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event) | ||
877 | sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt) | ||
878 | sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit) | ||
879 | sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit) | ||
880 | sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable) | ||
881 | [...] | 1334 | [...] |
882 | sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action) | 1335 | ls-2269 3d.s5 158us : _raw_spin_unlock_irqrestore <-rtl8139_poll |
883 | sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq) | 1336 | ls-2269 3d.s3 158us : net_rps_action_and_irq_enable.isra.65 <-net_rx_action |
884 | sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq) | 1337 | ls-2269 3d.s3 159us : __local_bh_enable <-__do_softirq |
885 | sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq) | 1338 | ls-2269 3d.s3 159us : sub_preempt_count <-__local_bh_enable |
886 | sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable) | 1339 | ls-2269 3d..3 159us : idle_cpu <-irq_exit |
887 | sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq) | 1340 | ls-2269 3d..3 159us : rcu_irq_exit <-irq_exit |
888 | sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq) | 1341 | ls-2269 3d..3 160us : sub_preempt_count <-irq_exit |
889 | 1342 | ls-2269 3d... 161us : __mutex_unlock_slowpath <-mutex_unlock | |
890 | 1343 | ls-2269 3d... 162us+: trace_hardirqs_on <-mutex_unlock | |
891 | This is a very interesting trace. It started with the preemption | 1344 | ls-2269 3d... 186us : <stack trace> |
892 | of the ls task. We see that the task had the "need_resched" bit | 1345 | => __mutex_unlock_slowpath |
893 | set via the 'N' in the trace. Interrupts were disabled before | 1346 | => mutex_unlock |
894 | the spin_lock at the beginning of the trace. We see that a | 1347 | => process_output |
895 | schedule took place to run sshd. When the interrupts were | 1348 | => n_tty_write |
896 | enabled, we took an interrupt. On return from the interrupt | 1349 | => tty_write |
897 | handler, the softirq ran. We took another interrupt while | 1350 | => vfs_write |
898 | running the softirq as we see from the capital 'H'. | 1351 | => sys_write |
1352 | => system_call_fastpath | ||
1353 | |||
1354 | This is an interesting trace. It started with kworker running and | ||
1355 | scheduling out and ls taking over. But as soon as ls released the | ||
1356 | rq lock and enabled interrupts (but not preemption) an interrupt | ||
1357 | triggered. When the interrupt finished, it started running softirqs. | ||
1358 | But while the softirq was running, another interrupt triggered. | ||
1359 | When an interrupt is running inside a softirq, the annotation is 'H'. | ||
899 | 1360 | ||
900 | 1361 | ||
901 | wakeup | 1362 | wakeup |
902 | ------ | 1363 | ------ |
903 | 1364 | ||
1365 | One common case that people are interested in tracing is the | ||
1366 | time it takes for a task that is woken to actually wake up. | ||
1367 | Now for non Real-Time tasks, this can be arbitrary. But tracing | ||
1368 | it none the less can be interesting. | ||
1369 | |||
1370 | Without function tracing: | ||
1371 | |||
1372 | # echo 0 > options/function-trace | ||
1373 | # echo wakeup > current_tracer | ||
1374 | # echo 1 > tracing_on | ||
1375 | # echo 0 > tracing_max_latency | ||
1376 | # chrt -f 5 sleep 1 | ||
1377 | # echo 0 > tracing_on | ||
1378 | # cat trace | ||
1379 | # tracer: wakeup | ||
1380 | # | ||
1381 | # wakeup latency trace v1.1.5 on 3.8.0-test+ | ||
1382 | # -------------------------------------------------------------------- | ||
1383 | # latency: 15 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) | ||
1384 | # ----------------- | ||
1385 | # | task: kworker/3:1H-312 (uid:0 nice:-20 policy:0 rt_prio:0) | ||
1386 | # ----------------- | ||
1387 | # | ||
1388 | # _------=> CPU# | ||
1389 | # / _-----=> irqs-off | ||
1390 | # | / _----=> need-resched | ||
1391 | # || / _---=> hardirq/softirq | ||
1392 | # ||| / _--=> preempt-depth | ||
1393 | # |||| / delay | ||
1394 | # cmd pid ||||| time | caller | ||
1395 | # \ / ||||| \ | / | ||
1396 | <idle>-0 3dNs7 0us : 0:120:R + [003] 312:100:R kworker/3:1H | ||
1397 | <idle>-0 3dNs7 1us+: ttwu_do_activate.constprop.87 <-try_to_wake_up | ||
1398 | <idle>-0 3d..3 15us : __schedule <-schedule | ||
1399 | <idle>-0 3d..3 15us : 0:120:R ==> [003] 312:100:R kworker/3:1H | ||
1400 | |||
1401 | The tracer only traces the highest priority task in the system | ||
1402 | to avoid tracing the normal circumstances. Here we see that | ||
1403 | the kworker with a nice priority of -20 (not very nice), took | ||
1404 | just 15 microseconds from the time it woke up, to the time it | ||
1405 | ran. | ||
1406 | |||
1407 | Non Real-Time tasks are not that interesting. A more interesting | ||
1408 | trace is to concentrate only on Real-Time tasks. | ||
1409 | |||
1410 | wakeup_rt | ||
1411 | --------- | ||
1412 | |||
904 | In a Real-Time environment it is very important to know the | 1413 | In a Real-Time environment it is very important to know the |
905 | wakeup time it takes for the highest priority task that is woken | 1414 | wakeup time it takes for the highest priority task that is woken |
906 | up to the time that it executes. This is also known as "schedule | 1415 | up to the time that it executes. This is also known as "schedule |
@@ -914,124 +1423,229 @@ Real-Time environments are interested in the worst case latency. | |||
914 | That is the longest latency it takes for something to happen, | 1423 | That is the longest latency it takes for something to happen, |
915 | and not the average. We can have a very fast scheduler that may | 1424 | and not the average. We can have a very fast scheduler that may |
916 | only have a large latency once in a while, but that would not | 1425 | only have a large latency once in a while, but that would not |
917 | work well with Real-Time tasks. The wakeup tracer was designed | 1426 | work well with Real-Time tasks. The wakeup_rt tracer was designed |
918 | to record the worst case wakeups of RT tasks. Non-RT tasks are | 1427 | to record the worst case wakeups of RT tasks. Non-RT tasks are |
919 | not recorded because the tracer only records one worst case and | 1428 | not recorded because the tracer only records one worst case and |
920 | tracing non-RT tasks that are unpredictable will overwrite the | 1429 | tracing non-RT tasks that are unpredictable will overwrite the |
921 | worst case latency of RT tasks. | 1430 | worst case latency of RT tasks (just run the normal wakeup |
1431 | tracer for a while to see that effect). | ||
922 | 1432 | ||
923 | Since this tracer only deals with RT tasks, we will run this | 1433 | Since this tracer only deals with RT tasks, we will run this |
924 | slightly differently than we did with the previous tracers. | 1434 | slightly differently than we did with the previous tracers. |
925 | Instead of performing an 'ls', we will run 'sleep 1' under | 1435 | Instead of performing an 'ls', we will run 'sleep 1' under |
926 | 'chrt' which changes the priority of the task. | 1436 | 'chrt' which changes the priority of the task. |
927 | 1437 | ||
928 | # echo wakeup > current_tracer | 1438 | # echo 0 > options/function-trace |
929 | # echo latency-format > trace_options | 1439 | # echo wakeup_rt > current_tracer |
930 | # echo 0 > tracing_max_latency | ||
931 | # echo 1 > tracing_on | 1440 | # echo 1 > tracing_on |
1441 | # echo 0 > tracing_max_latency | ||
932 | # chrt -f 5 sleep 1 | 1442 | # chrt -f 5 sleep 1 |
933 | # echo 0 > tracing_on | 1443 | # echo 0 > tracing_on |
934 | # cat trace | 1444 | # cat trace |
935 | # tracer: wakeup | 1445 | # tracer: wakeup |
936 | # | 1446 | # |
937 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | 1447 | # tracer: wakeup_rt |
938 | -------------------------------------------------------------------- | 1448 | # |
939 | latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1449 | # wakeup_rt latency trace v1.1.5 on 3.8.0-test+ |
940 | ----------------- | 1450 | # -------------------------------------------------------------------- |
941 | | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5) | 1451 | # latency: 5 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
942 | ----------------- | 1452 | # ----------------- |
943 | 1453 | # | task: sleep-2389 (uid:0 nice:0 policy:1 rt_prio:5) | |
944 | # _------=> CPU# | 1454 | # ----------------- |
945 | # / _-----=> irqs-off | 1455 | # |
946 | # | / _----=> need-resched | 1456 | # _------=> CPU# |
947 | # || / _---=> hardirq/softirq | 1457 | # / _-----=> irqs-off |
948 | # ||| / _--=> preempt-depth | 1458 | # | / _----=> need-resched |
949 | # |||| / | 1459 | # || / _---=> hardirq/softirq |
950 | # ||||| delay | 1460 | # ||| / _--=> preempt-depth |
951 | # cmd pid ||||| time | caller | 1461 | # |||| / delay |
952 | # \ / ||||| \ | / | 1462 | # cmd pid ||||| time | caller |
953 | <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process) | 1463 | # \ / ||||| \ | / |
954 | <idle>-0 1d..4 4us : schedule (cpu_idle) | 1464 | <idle>-0 3d.h4 0us : 0:120:R + [003] 2389: 94:R sleep |
955 | 1465 | <idle>-0 3d.h4 1us+: ttwu_do_activate.constprop.87 <-try_to_wake_up | |
956 | 1466 | <idle>-0 3d..3 5us : __schedule <-schedule | |
957 | Running this on an idle system, we see that it only took 4 | 1467 | <idle>-0 3d..3 5us : 0:120:R ==> [003] 2389: 94:R sleep |
958 | microseconds to perform the task switch. Note, since the trace | 1468 | |
959 | marker in the schedule is before the actual "switch", we stop | 1469 | |
960 | the tracing when the recorded task is about to schedule in. This | 1470 | Running this on an idle system, we see that it only took 5 microseconds |
961 | may change if we add a new marker at the end of the scheduler. | 1471 | to perform the task switch. Note, since the trace point in the schedule |
962 | 1472 | is before the actual "switch", we stop the tracing when the recorded task | |
963 | Notice that the recorded task is 'sleep' with the PID of 4901 | 1473 | is about to schedule in. This may change if we add a new marker at the |
1474 | end of the scheduler. | ||
1475 | |||
1476 | Notice that the recorded task is 'sleep' with the PID of 2389 | ||
964 | and it has an rt_prio of 5. This priority is user-space priority | 1477 | and it has an rt_prio of 5. This priority is user-space priority |
965 | and not the internal kernel priority. The policy is 1 for | 1478 | and not the internal kernel priority. The policy is 1 for |
966 | SCHED_FIFO and 2 for SCHED_RR. | 1479 | SCHED_FIFO and 2 for SCHED_RR. |
967 | 1480 | ||
968 | Doing the same with chrt -r 5 and ftrace_enabled set. | 1481 | Note, that the trace data shows the internal priority (99 - rtprio). |
969 | 1482 | ||
970 | # tracer: wakeup | 1483 | <idle>-0 3d..3 5us : 0:120:R ==> [003] 2389: 94:R sleep |
1484 | |||
1485 | The 0:120:R means idle was running with a nice priority of 0 (120 - 20) | ||
1486 | and in the running state 'R'. The sleep task was scheduled in with | ||
1487 | 2389: 94:R. That is the priority is the kernel rtprio (99 - 5 = 94) | ||
1488 | and it too is in the running state. | ||
1489 | |||
1490 | Doing the same with chrt -r 5 and function-trace set. | ||
1491 | |||
1492 | echo 1 > options/function-trace | ||
1493 | |||
1494 | # tracer: wakeup_rt | ||
971 | # | 1495 | # |
972 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | 1496 | # wakeup_rt latency trace v1.1.5 on 3.8.0-test+ |
973 | -------------------------------------------------------------------- | 1497 | # -------------------------------------------------------------------- |
974 | latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1498 | # latency: 29 us, #85/85, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
975 | ----------------- | 1499 | # ----------------- |
976 | | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5) | 1500 | # | task: sleep-2448 (uid:0 nice:0 policy:1 rt_prio:5) |
977 | ----------------- | 1501 | # ----------------- |
978 | 1502 | # | |
979 | # _------=> CPU# | 1503 | # _------=> CPU# |
980 | # / _-----=> irqs-off | 1504 | # / _-----=> irqs-off |
981 | # | / _----=> need-resched | 1505 | # | / _----=> need-resched |
982 | # || / _---=> hardirq/softirq | 1506 | # || / _---=> hardirq/softirq |
983 | # ||| / _--=> preempt-depth | 1507 | # ||| / _--=> preempt-depth |
984 | # |||| / | 1508 | # |||| / delay |
985 | # ||||| delay | 1509 | # cmd pid ||||| time | caller |
986 | # cmd pid ||||| time | caller | 1510 | # \ / ||||| \ | / |
987 | # \ / ||||| \ | / | 1511 | <idle>-0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep |
988 | ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process) | 1512 | <idle>-0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up |
989 | ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb) | 1513 | <idle>-0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup |
990 | ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up) | 1514 | <idle>-0 3d.h3 3us : resched_task <-check_preempt_curr |
991 | ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup) | 1515 | <idle>-0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup |
992 | ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr) | 1516 | <idle>-0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up |
993 | ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup) | 1517 | <idle>-0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock |
994 | ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up) | 1518 | <idle>-0 3dNh2 5us : ttwu_stat <-try_to_wake_up |
995 | ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up) | 1519 | <idle>-0 3dNh2 5us : _raw_spin_unlock_irqrestore <-try_to_wake_up |
996 | [...] | 1520 | <idle>-0 3dNh2 6us : sub_preempt_count <-_raw_spin_unlock_irqrestore |
997 | ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt) | 1521 | <idle>-0 3dNh1 6us : _raw_spin_lock <-__run_hrtimer |
998 | ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit) | 1522 | <idle>-0 3dNh1 6us : add_preempt_count <-_raw_spin_lock |
999 | ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit) | 1523 | <idle>-0 3dNh2 7us : _raw_spin_unlock <-hrtimer_interrupt |
1000 | ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq) | 1524 | <idle>-0 3dNh2 7us : sub_preempt_count <-_raw_spin_unlock |
1001 | [...] | 1525 | <idle>-0 3dNh1 7us : tick_program_event <-hrtimer_interrupt |
1002 | ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks) | 1526 | <idle>-0 3dNh1 7us : clockevents_program_event <-tick_program_event |
1003 | ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq) | 1527 | <idle>-0 3dNh1 8us : ktime_get <-clockevents_program_event |
1004 | ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable) | 1528 | <idle>-0 3dNh1 8us : lapic_next_event <-clockevents_program_event |
1005 | ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd) | 1529 | <idle>-0 3dNh1 8us : irq_exit <-smp_apic_timer_interrupt |
1006 | ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd) | 1530 | <idle>-0 3dNh1 9us : sub_preempt_count <-irq_exit |
1007 | ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched) | 1531 | <idle>-0 3dN.2 9us : idle_cpu <-irq_exit |
1008 | ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched) | 1532 | <idle>-0 3dN.2 9us : rcu_irq_exit <-irq_exit |
1009 | ksoftirq-7 1.N.2 33us : schedule (__cond_resched) | 1533 | <idle>-0 3dN.2 10us : rcu_eqs_enter_common.isra.45 <-rcu_irq_exit |
1010 | ksoftirq-7 1.N.2 33us : add_preempt_count (schedule) | 1534 | <idle>-0 3dN.2 10us : sub_preempt_count <-irq_exit |
1011 | ksoftirq-7 1.N.3 34us : hrtick_clear (schedule) | 1535 | <idle>-0 3.N.1 11us : rcu_idle_exit <-cpu_idle |
1012 | ksoftirq-7 1dN.3 35us : _spin_lock (schedule) | 1536 | <idle>-0 3dN.1 11us : rcu_eqs_exit_common.isra.43 <-rcu_idle_exit |
1013 | ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock) | 1537 | <idle>-0 3.N.1 11us : tick_nohz_idle_exit <-cpu_idle |
1014 | ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule) | 1538 | <idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit |
1015 | ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair) | 1539 | <idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit |
1016 | [...] | 1540 | <idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit |
1017 | ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline) | 1541 | <idle>-0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit |
1018 | ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock) | 1542 | <idle>-0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz |
1019 | ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline) | 1543 | <idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock |
1020 | ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock) | 1544 | <idle>-0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz |
1021 | ksoftirq-7 1d..4 50us : schedule (__cond_resched) | 1545 | <idle>-0 3dN.2 14us : sched_avg_update <-__update_cpu_load |
1022 | 1546 | <idle>-0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz | |
1023 | The interrupt went off while running ksoftirqd. This task runs | 1547 | <idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock |
1024 | at SCHED_OTHER. Why did not we see the 'N' set early? This may | 1548 | <idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit |
1025 | be a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K | 1549 | <idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit |
1026 | stacks configured, the interrupt and softirq run with their own | 1550 | <idle>-0 3dN.1 15us : hrtimer_cancel <-tick_nohz_idle_exit |
1027 | stack. Some information is held on the top of the task's stack | 1551 | <idle>-0 3dN.1 15us : hrtimer_try_to_cancel <-hrtimer_cancel |
1028 | (need_resched and preempt_count are both stored there). The | 1552 | <idle>-0 3dN.1 16us : lock_hrtimer_base.isra.18 <-hrtimer_try_to_cancel |
1029 | setting of the NEED_RESCHED bit is done directly to the task's | 1553 | <idle>-0 3dN.1 16us : _raw_spin_lock_irqsave <-lock_hrtimer_base.isra.18 |
1030 | stack, but the reading of the NEED_RESCHED is done by looking at | 1554 | <idle>-0 3dN.1 16us : add_preempt_count <-_raw_spin_lock_irqsave |
1031 | the current stack, which in this case is the stack for the hard | 1555 | <idle>-0 3dN.2 17us : __remove_hrtimer <-remove_hrtimer.part.16 |
1032 | interrupt. This hides the fact that NEED_RESCHED has been set. | 1556 | <idle>-0 3dN.2 17us : hrtimer_force_reprogram <-__remove_hrtimer |
1033 | We do not see the 'N' until we switch back to the task's | 1557 | <idle>-0 3dN.2 17us : tick_program_event <-hrtimer_force_reprogram |
1034 | assigned stack. | 1558 | <idle>-0 3dN.2 18us : clockevents_program_event <-tick_program_event |
1559 | <idle>-0 3dN.2 18us : ktime_get <-clockevents_program_event | ||
1560 | <idle>-0 3dN.2 18us : lapic_next_event <-clockevents_program_event | ||
1561 | <idle>-0 3dN.2 19us : _raw_spin_unlock_irqrestore <-hrtimer_try_to_cancel | ||
1562 | <idle>-0 3dN.2 19us : sub_preempt_count <-_raw_spin_unlock_irqrestore | ||
1563 | <idle>-0 3dN.1 19us : hrtimer_forward <-tick_nohz_idle_exit | ||
1564 | <idle>-0 3dN.1 20us : ktime_add_safe <-hrtimer_forward | ||
1565 | <idle>-0 3dN.1 20us : ktime_add_safe <-hrtimer_forward | ||
1566 | <idle>-0 3dN.1 20us : hrtimer_start_range_ns <-hrtimer_start_expires.constprop.11 | ||
1567 | <idle>-0 3dN.1 20us : __hrtimer_start_range_ns <-hrtimer_start_range_ns | ||
1568 | <idle>-0 3dN.1 21us : lock_hrtimer_base.isra.18 <-__hrtimer_start_range_ns | ||
1569 | <idle>-0 3dN.1 21us : _raw_spin_lock_irqsave <-lock_hrtimer_base.isra.18 | ||
1570 | <idle>-0 3dN.1 21us : add_preempt_count <-_raw_spin_lock_irqsave | ||
1571 | <idle>-0 3dN.2 22us : ktime_add_safe <-__hrtimer_start_range_ns | ||
1572 | <idle>-0 3dN.2 22us : enqueue_hrtimer <-__hrtimer_start_range_ns | ||
1573 | <idle>-0 3dN.2 22us : tick_program_event <-__hrtimer_start_range_ns | ||
1574 | <idle>-0 3dN.2 23us : clockevents_program_event <-tick_program_event | ||
1575 | <idle>-0 3dN.2 23us : ktime_get <-clockevents_program_event | ||
1576 | <idle>-0 3dN.2 23us : lapic_next_event <-clockevents_program_event | ||
1577 | <idle>-0 3dN.2 24us : _raw_spin_unlock_irqrestore <-__hrtimer_start_range_ns | ||
1578 | <idle>-0 3dN.2 24us : sub_preempt_count <-_raw_spin_unlock_irqrestore | ||
1579 | <idle>-0 3dN.1 24us : account_idle_ticks <-tick_nohz_idle_exit | ||
1580 | <idle>-0 3dN.1 24us : account_idle_time <-account_idle_ticks | ||
1581 | <idle>-0 3.N.1 25us : sub_preempt_count <-cpu_idle | ||
1582 | <idle>-0 3.N.. 25us : schedule <-cpu_idle | ||
1583 | <idle>-0 3.N.. 25us : __schedule <-preempt_schedule | ||
1584 | <idle>-0 3.N.. 26us : add_preempt_count <-__schedule | ||
1585 | <idle>-0 3.N.1 26us : rcu_note_context_switch <-__schedule | ||
1586 | <idle>-0 3.N.1 26us : rcu_sched_qs <-rcu_note_context_switch | ||
1587 | <idle>-0 3dN.1 27us : rcu_preempt_qs <-rcu_note_context_switch | ||
1588 | <idle>-0 3.N.1 27us : _raw_spin_lock_irq <-__schedule | ||
1589 | <idle>-0 3dN.1 27us : add_preempt_count <-_raw_spin_lock_irq | ||
1590 | <idle>-0 3dN.2 28us : put_prev_task_idle <-__schedule | ||
1591 | <idle>-0 3dN.2 28us : pick_next_task_stop <-pick_next_task | ||
1592 | <idle>-0 3dN.2 28us : pick_next_task_rt <-pick_next_task | ||
1593 | <idle>-0 3dN.2 29us : dequeue_pushable_task <-pick_next_task_rt | ||
1594 | <idle>-0 3d..3 29us : __schedule <-preempt_schedule | ||
1595 | <idle>-0 3d..3 30us : 0:120:R ==> [003] 2448: 94:R sleep | ||
1596 | |||
1597 | This isn't that big of a trace, even with function tracing enabled, | ||
1598 | so I included the entire trace. | ||
1599 | |||
1600 | The interrupt went off while when the system was idle. Somewhere | ||
1601 | before task_woken_rt() was called, the NEED_RESCHED flag was set, | ||
1602 | this is indicated by the first occurrence of the 'N' flag. | ||
1603 | |||
1604 | Latency tracing and events | ||
1605 | -------------------------- | ||
1606 | As function tracing can induce a much larger latency, but without | ||
1607 | seeing what happens within the latency it is hard to know what | ||
1608 | caused it. There is a middle ground, and that is with enabling | ||
1609 | events. | ||
1610 | |||
1611 | # echo 0 > options/function-trace | ||
1612 | # echo wakeup_rt > current_tracer | ||
1613 | # echo 1 > events/enable | ||
1614 | # echo 1 > tracing_on | ||
1615 | # echo 0 > tracing_max_latency | ||
1616 | # chrt -f 5 sleep 1 | ||
1617 | # echo 0 > tracing_on | ||
1618 | # cat trace | ||
1619 | # tracer: wakeup_rt | ||
1620 | # | ||
1621 | # wakeup_rt latency trace v1.1.5 on 3.8.0-test+ | ||
1622 | # -------------------------------------------------------------------- | ||
1623 | # latency: 6 us, #12/12, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) | ||
1624 | # ----------------- | ||
1625 | # | task: sleep-5882 (uid:0 nice:0 policy:1 rt_prio:5) | ||
1626 | # ----------------- | ||
1627 | # | ||
1628 | # _------=> CPU# | ||
1629 | # / _-----=> irqs-off | ||
1630 | # | / _----=> need-resched | ||
1631 | # || / _---=> hardirq/softirq | ||
1632 | # ||| / _--=> preempt-depth | ||
1633 | # |||| / delay | ||
1634 | # cmd pid ||||| time | caller | ||
1635 | # \ / ||||| \ | / | ||
1636 | <idle>-0 2d.h4 0us : 0:120:R + [002] 5882: 94:R sleep | ||
1637 | <idle>-0 2d.h4 0us : ttwu_do_activate.constprop.87 <-try_to_wake_up | ||
1638 | <idle>-0 2d.h4 1us : sched_wakeup: comm=sleep pid=5882 prio=94 success=1 target_cpu=002 | ||
1639 | <idle>-0 2dNh2 1us : hrtimer_expire_exit: hrtimer=ffff88007796feb8 | ||
1640 | <idle>-0 2.N.2 2us : power_end: cpu_id=2 | ||
1641 | <idle>-0 2.N.2 3us : cpu_idle: state=4294967295 cpu_id=2 | ||
1642 | <idle>-0 2dN.3 4us : hrtimer_cancel: hrtimer=ffff88007d50d5e0 | ||
1643 | <idle>-0 2dN.3 4us : hrtimer_start: hrtimer=ffff88007d50d5e0 function=tick_sched_timer expires=34311211000000 softexpires=34311211000000 | ||
1644 | <idle>-0 2.N.2 5us : rcu_utilization: Start context switch | ||
1645 | <idle>-0 2.N.2 5us : rcu_utilization: End context switch | ||
1646 | <idle>-0 2d..3 6us : __schedule <-schedule | ||
1647 | <idle>-0 2d..3 6us : 0:120:R ==> [002] 5882: 94:R sleep | ||
1648 | |||
1035 | 1649 | ||
1036 | function | 1650 | function |
1037 | -------- | 1651 | -------- |
@@ -1039,6 +1653,7 @@ function | |||
1039 | This tracer is the function tracer. Enabling the function tracer | 1653 | This tracer is the function tracer. Enabling the function tracer |
1040 | can be done from the debug file system. Make sure the | 1654 | can be done from the debug file system. Make sure the |
1041 | ftrace_enabled is set; otherwise this tracer is a nop. | 1655 | ftrace_enabled is set; otherwise this tracer is a nop. |
1656 | See the "ftrace_enabled" section below. | ||
1042 | 1657 | ||
1043 | # sysctl kernel.ftrace_enabled=1 | 1658 | # sysctl kernel.ftrace_enabled=1 |
1044 | # echo function > current_tracer | 1659 | # echo function > current_tracer |
@@ -1048,23 +1663,23 @@ ftrace_enabled is set; otherwise this tracer is a nop. | |||
1048 | # cat trace | 1663 | # cat trace |
1049 | # tracer: function | 1664 | # tracer: function |
1050 | # | 1665 | # |
1051 | # TASK-PID CPU# TIMESTAMP FUNCTION | 1666 | # entries-in-buffer/entries-written: 24799/24799 #P:4 |
1052 | # | | | | | | 1667 | # |
1053 | bash-4003 [00] 123.638713: finish_task_switch <-schedule | 1668 | # _-----=> irqs-off |
1054 | bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch | 1669 | # / _----=> need-resched |
1055 | bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq | 1670 | # | / _---=> hardirq/softirq |
1056 | bash-4003 [00] 123.638715: hrtick_set <-schedule | 1671 | # || / _--=> preempt-depth |
1057 | bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set | 1672 | # ||| / delay |
1058 | bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave | 1673 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION |
1059 | bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set | 1674 | # | | | |||| | | |
1060 | bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore | 1675 | bash-1994 [002] .... 3082.063030: mutex_unlock <-rb_simple_write |
1061 | bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set | 1676 | bash-1994 [002] .... 3082.063031: __mutex_unlock_slowpath <-mutex_unlock |
1062 | bash-4003 [00] 123.638718: sub_preempt_count <-schedule | 1677 | bash-1994 [002] .... 3082.063031: __fsnotify_parent <-fsnotify_modify |
1063 | bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule | 1678 | bash-1994 [002] .... 3082.063032: fsnotify <-fsnotify_modify |
1064 | bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run | 1679 | bash-1994 [002] .... 3082.063032: __srcu_read_lock <-fsnotify |
1065 | bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion | 1680 | bash-1994 [002] .... 3082.063032: add_preempt_count <-__srcu_read_lock |
1066 | bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common | 1681 | bash-1994 [002] ...1 3082.063032: sub_preempt_count <-__srcu_read_lock |
1067 | bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq | 1682 | bash-1994 [002] .... 3082.063033: __srcu_read_unlock <-fsnotify |
1068 | [...] | 1683 | [...] |
1069 | 1684 | ||
1070 | 1685 | ||
@@ -1214,79 +1829,19 @@ int main (int argc, char **argv) | |||
1214 | return 0; | 1829 | return 0; |
1215 | } | 1830 | } |
1216 | 1831 | ||
1832 | Or this simple script! | ||
1217 | 1833 | ||
1218 | hw-branch-tracer (x86 only) | 1834 | ------ |
1219 | --------------------------- | 1835 | #!/bin/bash |
1220 | 1836 | ||
1221 | This tracer uses the x86 last branch tracing hardware feature to | 1837 | debugfs=`sed -ne 's/^debugfs \(.*\) debugfs.*/\1/p' /proc/mounts` |
1222 | collect a branch trace on all cpus with relatively low overhead. | 1838 | echo nop > $debugfs/tracing/current_tracer |
1223 | 1839 | echo 0 > $debugfs/tracing/tracing_on | |
1224 | The tracer uses a fixed-size circular buffer per cpu and only | 1840 | echo $$ > $debugfs/tracing/set_ftrace_pid |
1225 | traces ring 0 branches. The trace file dumps that buffer in the | 1841 | echo function > $debugfs/tracing/current_tracer |
1226 | following format: | 1842 | echo 1 > $debugfs/tracing/tracing_on |
1227 | 1843 | exec "$@" | |
1228 | # tracer: hw-branch-tracer | 1844 | ------ |
1229 | # | ||
1230 | # CPU# TO <- FROM | ||
1231 | 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6 | ||
1232 | 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a | ||
1233 | 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf | ||
1234 | 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf | ||
1235 | 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a | ||
1236 | 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf | ||
1237 | |||
1238 | |||
1239 | The tracer may be used to dump the trace for the oops'ing cpu on | ||
1240 | a kernel oops into the system log. To enable this, | ||
1241 | ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one | ||
1242 | can either use the sysctl function or set it via the proc system | ||
1243 | interface. | ||
1244 | |||
1245 | sysctl kernel.ftrace_dump_on_oops=n | ||
1246 | |||
1247 | or | ||
1248 | |||
1249 | echo n > /proc/sys/kernel/ftrace_dump_on_oops | ||
1250 | |||
1251 | If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will | ||
1252 | only dump the buffer of the CPU that triggered the oops. | ||
1253 | |||
1254 | Here's an example of such a dump after a null pointer | ||
1255 | dereference in a kernel module: | ||
1256 | |||
1257 | [57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 | ||
1258 | [57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops] | ||
1259 | [57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0 | ||
1260 | [57848.106019] Oops: 0002 [#1] SMP | ||
1261 | [57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus | ||
1262 | [57848.106019] Dumping ftrace buffer: | ||
1263 | [57848.106019] --------------------------------- | ||
1264 | [...] | ||
1265 | [57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24 | ||
1266 | [57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165 | ||
1267 | [57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165 | ||
1268 | [57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165 | ||
1269 | [57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165 | ||
1270 | [57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops] | ||
1271 | [57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30 | ||
1272 | [57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b | ||
1273 | [57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31 | ||
1274 | [57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1 | ||
1275 | [57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30 | ||
1276 | [...] | ||
1277 | [57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2 | ||
1278 | [57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881 | ||
1279 | [57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881 | ||
1280 | [57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96 | ||
1281 | [...] | ||
1282 | [57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3 | ||
1283 | [57848.106019] --------------------------------- | ||
1284 | [57848.106019] CPU 0 | ||
1285 | [57848.106019] Modules linked in: oops | ||
1286 | [57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23 | ||
1287 | [57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops] | ||
1288 | [57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246 | ||
1289 | [...] | ||
1290 | 1845 | ||
1291 | 1846 | ||
1292 | function graph tracer | 1847 | function graph tracer |
@@ -1473,16 +2028,18 @@ starts of pointing to a simple return. (Enabling FTRACE will | |||
1473 | include the -pg switch in the compiling of the kernel.) | 2028 | include the -pg switch in the compiling of the kernel.) |
1474 | 2029 | ||
1475 | At compile time every C file object is run through the | 2030 | At compile time every C file object is run through the |
1476 | recordmcount.pl script (located in the scripts directory). This | 2031 | recordmcount program (located in the scripts directory). This |
1477 | script will process the C object using objdump to find all the | 2032 | program will parse the ELF headers in the C object to find all |
1478 | locations in the .text section that call mcount. (Note, only the | 2033 | the locations in the .text section that call mcount. (Note, only |
1479 | .text section is processed, since processing other sections like | 2034 | white listed .text sections are processed, since processing other |
1480 | .init.text may cause races due to those sections being freed). | 2035 | sections like .init.text may cause races due to those sections |
2036 | being freed unexpectedly). | ||
1481 | 2037 | ||
1482 | A new section called "__mcount_loc" is created that holds | 2038 | A new section called "__mcount_loc" is created that holds |
1483 | references to all the mcount call sites in the .text section. | 2039 | references to all the mcount call sites in the .text section. |
1484 | This section is compiled back into the original object. The | 2040 | The recordmcount program re-links this section back into the |
1485 | final linker will add all these references into a single table. | 2041 | original object. The final linking stage of the kernel will add all these |
2042 | references into a single table. | ||
1486 | 2043 | ||
1487 | On boot up, before SMP is initialized, the dynamic ftrace code | 2044 | On boot up, before SMP is initialized, the dynamic ftrace code |
1488 | scans this table and updates all the locations into nops. It | 2045 | scans this table and updates all the locations into nops. It |
@@ -1493,13 +2050,25 @@ unloaded, it also removes its functions from the ftrace function | |||
1493 | list. This is automatic in the module unload code, and the | 2050 | list. This is automatic in the module unload code, and the |
1494 | module author does not need to worry about it. | 2051 | module author does not need to worry about it. |
1495 | 2052 | ||
1496 | When tracing is enabled, kstop_machine is called to prevent | 2053 | When tracing is enabled, the process of modifying the function |
1497 | races with the CPUS executing code being modified (which can | 2054 | tracepoints is dependent on architecture. The old method is to use |
1498 | cause the CPU to do undesirable things), and the nops are | 2055 | kstop_machine to prevent races with the CPUs executing code being |
2056 | modified (which can cause the CPU to do undesirable things, especially | ||
2057 | if the modified code crosses cache (or page) boundaries), and the nops are | ||
1499 | patched back to calls. But this time, they do not call mcount | 2058 | patched back to calls. But this time, they do not call mcount |
1500 | (which is just a function stub). They now call into the ftrace | 2059 | (which is just a function stub). They now call into the ftrace |
1501 | infrastructure. | 2060 | infrastructure. |
1502 | 2061 | ||
2062 | The new method of modifying the function tracepoints is to place | ||
2063 | a breakpoint at the location to be modified, sync all CPUs, modify | ||
2064 | the rest of the instruction not covered by the breakpoint. Sync | ||
2065 | all CPUs again, and then remove the breakpoint with the finished | ||
2066 | version to the ftrace call site. | ||
2067 | |||
2068 | Some archs do not even need to monkey around with the synchronization, | ||
2069 | and can just slap the new code on top of the old without any | ||
2070 | problems with other CPUs executing it at the same time. | ||
2071 | |||
1503 | One special side-effect to the recording of the functions being | 2072 | One special side-effect to the recording of the functions being |
1504 | traced is that we can now selectively choose which functions we | 2073 | traced is that we can now selectively choose which functions we |
1505 | wish to trace and which ones we want the mcount calls to remain | 2074 | wish to trace and which ones we want the mcount calls to remain |
@@ -1530,20 +2099,28 @@ mutex_lock | |||
1530 | 2099 | ||
1531 | If I am only interested in sys_nanosleep and hrtimer_interrupt: | 2100 | If I am only interested in sys_nanosleep and hrtimer_interrupt: |
1532 | 2101 | ||
1533 | # echo sys_nanosleep hrtimer_interrupt \ | 2102 | # echo sys_nanosleep hrtimer_interrupt > set_ftrace_filter |
1534 | > set_ftrace_filter | ||
1535 | # echo function > current_tracer | 2103 | # echo function > current_tracer |
1536 | # echo 1 > tracing_on | 2104 | # echo 1 > tracing_on |
1537 | # usleep 1 | 2105 | # usleep 1 |
1538 | # echo 0 > tracing_on | 2106 | # echo 0 > tracing_on |
1539 | # cat trace | 2107 | # cat trace |
1540 | # tracer: ftrace | 2108 | # tracer: function |
2109 | # | ||
2110 | # entries-in-buffer/entries-written: 5/5 #P:4 | ||
1541 | # | 2111 | # |
1542 | # TASK-PID CPU# TIMESTAMP FUNCTION | 2112 | # _-----=> irqs-off |
1543 | # | | | | | | 2113 | # / _----=> need-resched |
1544 | usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt | 2114 | # | / _---=> hardirq/softirq |
1545 | usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call | 2115 | # || / _--=> preempt-depth |
1546 | <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt | 2116 | # ||| / delay |
2117 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
2118 | # | | | |||| | | | ||
2119 | usleep-2665 [001] .... 4186.475355: sys_nanosleep <-system_call_fastpath | ||
2120 | <idle>-0 [001] d.h1 4186.475409: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
2121 | usleep-2665 [001] d.h1 4186.475426: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
2122 | <idle>-0 [003] d.h1 4186.475426: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
2123 | <idle>-0 [002] d.h1 4186.475427: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1547 | 2124 | ||
1548 | To see which functions are being traced, you can cat the file: | 2125 | To see which functions are being traced, you can cat the file: |
1549 | 2126 | ||
@@ -1571,20 +2148,25 @@ Note: It is better to use quotes to enclose the wild cards, | |||
1571 | 2148 | ||
1572 | Produces: | 2149 | Produces: |
1573 | 2150 | ||
1574 | # tracer: ftrace | 2151 | # tracer: function |
1575 | # | 2152 | # |
1576 | # TASK-PID CPU# TIMESTAMP FUNCTION | 2153 | # entries-in-buffer/entries-written: 897/897 #P:4 |
1577 | # | | | | | | 2154 | # |
1578 | bash-4003 [00] 1480.611794: hrtimer_init <-copy_process | 2155 | # _-----=> irqs-off |
1579 | bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set | 2156 | # / _----=> need-resched |
1580 | bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear | 2157 | # | / _---=> hardirq/softirq |
1581 | bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel | 2158 | # || / _--=> preempt-depth |
1582 | <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt | 2159 | # ||| / delay |
1583 | <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt | 2160 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION |
1584 | <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt | 2161 | # | | | |||| | | |
1585 | <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt | 2162 | <idle>-0 [003] dN.1 4228.547803: hrtimer_cancel <-tick_nohz_idle_exit |
1586 | <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt | 2163 | <idle>-0 [003] dN.1 4228.547804: hrtimer_try_to_cancel <-hrtimer_cancel |
1587 | 2164 | <idle>-0 [003] dN.2 4228.547805: hrtimer_force_reprogram <-__remove_hrtimer | |
2165 | <idle>-0 [003] dN.1 4228.547805: hrtimer_forward <-tick_nohz_idle_exit | ||
2166 | <idle>-0 [003] dN.1 4228.547805: hrtimer_start_range_ns <-hrtimer_start_expires.constprop.11 | ||
2167 | <idle>-0 [003] d..1 4228.547858: hrtimer_get_next_event <-get_next_timer_interrupt | ||
2168 | <idle>-0 [003] d..1 4228.547859: hrtimer_start <-__tick_nohz_idle_enter | ||
2169 | <idle>-0 [003] d..2 4228.547860: hrtimer_force_reprogram <-__rem | ||
1588 | 2170 | ||
1589 | Notice that we lost the sys_nanosleep. | 2171 | Notice that we lost the sys_nanosleep. |
1590 | 2172 | ||
@@ -1651,19 +2233,29 @@ traced. | |||
1651 | 2233 | ||
1652 | Produces: | 2234 | Produces: |
1653 | 2235 | ||
1654 | # tracer: ftrace | 2236 | # tracer: function |
2237 | # | ||
2238 | # entries-in-buffer/entries-written: 39608/39608 #P:4 | ||
1655 | # | 2239 | # |
1656 | # TASK-PID CPU# TIMESTAMP FUNCTION | 2240 | # _-----=> irqs-off |
1657 | # | | | | | | 2241 | # / _----=> need-resched |
1658 | bash-4043 [01] 115.281644: finish_task_switch <-schedule | 2242 | # | / _---=> hardirq/softirq |
1659 | bash-4043 [01] 115.281645: hrtick_set <-schedule | 2243 | # || / _--=> preempt-depth |
1660 | bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set | 2244 | # ||| / delay |
1661 | bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run | 2245 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION |
1662 | bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion | 2246 | # | | | |||| | | |
1663 | bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run | 2247 | bash-1994 [000] .... 4342.324896: file_ra_state_init <-do_dentry_open |
1664 | bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop | 2248 | bash-1994 [000] .... 4342.324897: open_check_o_direct <-do_last |
1665 | bash-4043 [01] 115.281648: wake_up_process <-kthread_stop | 2249 | bash-1994 [000] .... 4342.324897: ima_file_check <-do_last |
1666 | bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process | 2250 | bash-1994 [000] .... 4342.324898: process_measurement <-ima_file_check |
2251 | bash-1994 [000] .... 4342.324898: ima_get_action <-process_measurement | ||
2252 | bash-1994 [000] .... 4342.324898: ima_match_policy <-ima_get_action | ||
2253 | bash-1994 [000] .... 4342.324899: do_truncate <-do_last | ||
2254 | bash-1994 [000] .... 4342.324899: should_remove_suid <-do_truncate | ||
2255 | bash-1994 [000] .... 4342.324899: notify_change <-do_truncate | ||
2256 | bash-1994 [000] .... 4342.324900: current_fs_time <-notify_change | ||
2257 | bash-1994 [000] .... 4342.324900: current_kernel_time <-current_fs_time | ||
2258 | bash-1994 [000] .... 4342.324900: timespec_trunc <-current_fs_time | ||
1667 | 2259 | ||
1668 | We can see that there's no more lock or preempt tracing. | 2260 | We can see that there's no more lock or preempt tracing. |
1669 | 2261 | ||
@@ -1729,6 +2321,28 @@ this special filter via: | |||
1729 | echo > set_graph_function | 2321 | echo > set_graph_function |
1730 | 2322 | ||
1731 | 2323 | ||
2324 | ftrace_enabled | ||
2325 | -------------- | ||
2326 | |||
2327 | Note, the proc sysctl ftrace_enable is a big on/off switch for the | ||
2328 | function tracer. By default it is enabled (when function tracing is | ||
2329 | enabled in the kernel). If it is disabled, all function tracing is | ||
2330 | disabled. This includes not only the function tracers for ftrace, but | ||
2331 | also for any other uses (perf, kprobes, stack tracing, profiling, etc). | ||
2332 | |||
2333 | Please disable this with care. | ||
2334 | |||
2335 | This can be disable (and enabled) with: | ||
2336 | |||
2337 | sysctl kernel.ftrace_enabled=0 | ||
2338 | sysctl kernel.ftrace_enabled=1 | ||
2339 | |||
2340 | or | ||
2341 | |||
2342 | echo 0 > /proc/sys/kernel/ftrace_enabled | ||
2343 | echo 1 > /proc/sys/kernel/ftrace_enabled | ||
2344 | |||
2345 | |||
1732 | Filter commands | 2346 | Filter commands |
1733 | --------------- | 2347 | --------------- |
1734 | 2348 | ||
@@ -1763,12 +2377,58 @@ The following commands are supported: | |||
1763 | 2377 | ||
1764 | echo '__schedule_bug:traceoff:5' > set_ftrace_filter | 2378 | echo '__schedule_bug:traceoff:5' > set_ftrace_filter |
1765 | 2379 | ||
2380 | To always disable tracing when __schedule_bug is hit: | ||
2381 | |||
2382 | echo '__schedule_bug:traceoff' > set_ftrace_filter | ||
2383 | |||
1766 | These commands are cumulative whether or not they are appended | 2384 | These commands are cumulative whether or not they are appended |
1767 | to set_ftrace_filter. To remove a command, prepend it by '!' | 2385 | to set_ftrace_filter. To remove a command, prepend it by '!' |
1768 | and drop the parameter: | 2386 | and drop the parameter: |
1769 | 2387 | ||
2388 | echo '!__schedule_bug:traceoff:0' > set_ftrace_filter | ||
2389 | |||
2390 | The above removes the traceoff command for __schedule_bug | ||
2391 | that have a counter. To remove commands without counters: | ||
2392 | |||
1770 | echo '!__schedule_bug:traceoff' > set_ftrace_filter | 2393 | echo '!__schedule_bug:traceoff' > set_ftrace_filter |
1771 | 2394 | ||
2395 | - snapshot | ||
2396 | Will cause a snapshot to be triggered when the function is hit. | ||
2397 | |||
2398 | echo 'native_flush_tlb_others:snapshot' > set_ftrace_filter | ||
2399 | |||
2400 | To only snapshot once: | ||
2401 | |||
2402 | echo 'native_flush_tlb_others:snapshot:1' > set_ftrace_filter | ||
2403 | |||
2404 | To remove the above commands: | ||
2405 | |||
2406 | echo '!native_flush_tlb_others:snapshot' > set_ftrace_filter | ||
2407 | echo '!native_flush_tlb_others:snapshot:0' > set_ftrace_filter | ||
2408 | |||
2409 | - enable_event/disable_event | ||
2410 | These commands can enable or disable a trace event. Note, because | ||
2411 | function tracing callbacks are very sensitive, when these commands | ||
2412 | are registered, the trace point is activated, but disabled in | ||
2413 | a "soft" mode. That is, the tracepoint will be called, but | ||
2414 | just will not be traced. The event tracepoint stays in this mode | ||
2415 | as long as there's a command that triggers it. | ||
2416 | |||
2417 | echo 'try_to_wake_up:enable_event:sched:sched_switch:2' > \ | ||
2418 | set_ftrace_filter | ||
2419 | |||
2420 | The format is: | ||
2421 | |||
2422 | <function>:enable_event:<system>:<event>[:count] | ||
2423 | <function>:disable_event:<system>:<event>[:count] | ||
2424 | |||
2425 | To remove the events commands: | ||
2426 | |||
2427 | |||
2428 | echo '!try_to_wake_up:enable_event:sched:sched_switch:0' > \ | ||
2429 | set_ftrace_filter | ||
2430 | echo '!schedule:disable_event:sched:sched_switch' > \ | ||
2431 | set_ftrace_filter | ||
1772 | 2432 | ||
1773 | trace_pipe | 2433 | trace_pipe |
1774 | ---------- | 2434 | ---------- |
@@ -1787,28 +2447,31 @@ different. The trace is live. | |||
1787 | # cat trace | 2447 | # cat trace |
1788 | # tracer: function | 2448 | # tracer: function |
1789 | # | 2449 | # |
1790 | # TASK-PID CPU# TIMESTAMP FUNCTION | 2450 | # entries-in-buffer/entries-written: 0/0 #P:4 |
1791 | # | | | | | | 2451 | # |
2452 | # _-----=> irqs-off | ||
2453 | # / _----=> need-resched | ||
2454 | # | / _---=> hardirq/softirq | ||
2455 | # || / _--=> preempt-depth | ||
2456 | # ||| / delay | ||
2457 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
2458 | # | | | |||| | | | ||
1792 | 2459 | ||
1793 | # | 2460 | # |
1794 | # cat /tmp/trace.out | 2461 | # cat /tmp/trace.out |
1795 | bash-4043 [00] 41.267106: finish_task_switch <-schedule | 2462 | bash-1994 [000] .... 5281.568961: mutex_unlock <-rb_simple_write |
1796 | bash-4043 [00] 41.267106: hrtick_set <-schedule | 2463 | bash-1994 [000] .... 5281.568963: __mutex_unlock_slowpath <-mutex_unlock |
1797 | bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set | 2464 | bash-1994 [000] .... 5281.568963: __fsnotify_parent <-fsnotify_modify |
1798 | bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run | 2465 | bash-1994 [000] .... 5281.568964: fsnotify <-fsnotify_modify |
1799 | bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion | 2466 | bash-1994 [000] .... 5281.568964: __srcu_read_lock <-fsnotify |
1800 | bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run | 2467 | bash-1994 [000] .... 5281.568964: add_preempt_count <-__srcu_read_lock |
1801 | bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop | 2468 | bash-1994 [000] ...1 5281.568965: sub_preempt_count <-__srcu_read_lock |
1802 | bash-4043 [00] 41.267110: wake_up_process <-kthread_stop | 2469 | bash-1994 [000] .... 5281.568965: __srcu_read_unlock <-fsnotify |
1803 | bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process | 2470 | bash-1994 [000] .... 5281.568967: sys_dup2 <-system_call_fastpath |
1804 | bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up | ||
1805 | 2471 | ||
1806 | 2472 | ||
1807 | Note, reading the trace_pipe file will block until more input is | 2473 | Note, reading the trace_pipe file will block until more input is |
1808 | added. By changing the tracer, trace_pipe will issue an EOF. We | 2474 | added. |
1809 | needed to set the function tracer _before_ we "cat" the | ||
1810 | trace_pipe file. | ||
1811 | |||
1812 | 2475 | ||
1813 | trace entries | 2476 | trace entries |
1814 | ------------- | 2477 | ------------- |
@@ -1817,31 +2480,50 @@ Having too much or not enough data can be troublesome in | |||
1817 | diagnosing an issue in the kernel. The file buffer_size_kb is | 2480 | diagnosing an issue in the kernel. The file buffer_size_kb is |
1818 | used to modify the size of the internal trace buffers. The | 2481 | used to modify the size of the internal trace buffers. The |
1819 | number listed is the number of entries that can be recorded per | 2482 | number listed is the number of entries that can be recorded per |
1820 | CPU. To know the full size, multiply the number of possible CPUS | 2483 | CPU. To know the full size, multiply the number of possible CPUs |
1821 | with the number of entries. | 2484 | with the number of entries. |
1822 | 2485 | ||
1823 | # cat buffer_size_kb | 2486 | # cat buffer_size_kb |
1824 | 1408 (units kilobytes) | 2487 | 1408 (units kilobytes) |
1825 | 2488 | ||
1826 | Note, to modify this, you must have tracing completely disabled. | 2489 | Or simply read buffer_total_size_kb |
1827 | To do that, echo "nop" into the current_tracer. If the | 2490 | |
1828 | current_tracer is not set to "nop", an EINVAL error will be | 2491 | # cat buffer_total_size_kb |
1829 | returned. | 2492 | 5632 |
2493 | |||
2494 | To modify the buffer, simple echo in a number (in 1024 byte segments). | ||
1830 | 2495 | ||
1831 | # echo nop > current_tracer | ||
1832 | # echo 10000 > buffer_size_kb | 2496 | # echo 10000 > buffer_size_kb |
1833 | # cat buffer_size_kb | 2497 | # cat buffer_size_kb |
1834 | 10000 (units kilobytes) | 2498 | 10000 (units kilobytes) |
1835 | 2499 | ||
1836 | The number of pages which will be allocated is limited to a | 2500 | It will try to allocate as much as possible. If you allocate too |
1837 | percentage of available memory. Allocating too much will produce | 2501 | much, it can cause Out-Of-Memory to trigger. |
1838 | an error. | ||
1839 | 2502 | ||
1840 | # echo 1000000000000 > buffer_size_kb | 2503 | # echo 1000000000000 > buffer_size_kb |
1841 | -bash: echo: write error: Cannot allocate memory | 2504 | -bash: echo: write error: Cannot allocate memory |
1842 | # cat buffer_size_kb | 2505 | # cat buffer_size_kb |
1843 | 85 | 2506 | 85 |
1844 | 2507 | ||
2508 | The per_cpu buffers can be changed individually as well: | ||
2509 | |||
2510 | # echo 10000 > per_cpu/cpu0/buffer_size_kb | ||
2511 | # echo 100 > per_cpu/cpu1/buffer_size_kb | ||
2512 | |||
2513 | When the per_cpu buffers are not the same, the buffer_size_kb | ||
2514 | at the top level will just show an X | ||
2515 | |||
2516 | # cat buffer_size_kb | ||
2517 | X | ||
2518 | |||
2519 | This is where the buffer_total_size_kb is useful: | ||
2520 | |||
2521 | # cat buffer_total_size_kb | ||
2522 | 12916 | ||
2523 | |||
2524 | Writing to the top level buffer_size_kb will reset all the buffers | ||
2525 | to be the same again. | ||
2526 | |||
1845 | Snapshot | 2527 | Snapshot |
1846 | -------- | 2528 | -------- |
1847 | CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature | 2529 | CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature |
@@ -1925,7 +2607,188 @@ bash: echo: write error: Device or resource busy | |||
1925 | # cat snapshot | 2607 | # cat snapshot |
1926 | cat: snapshot: Device or resource busy | 2608 | cat: snapshot: Device or resource busy |
1927 | 2609 | ||
2610 | |||
2611 | Instances | ||
2612 | --------- | ||
2613 | In the debugfs tracing directory is a directory called "instances". | ||
2614 | This directory can have new directories created inside of it using | ||
2615 | mkdir, and removing directories with rmdir. The directory created | ||
2616 | with mkdir in this directory will already contain files and other | ||
2617 | directories after it is created. | ||
2618 | |||
2619 | # mkdir instances/foo | ||
2620 | # ls instances/foo | ||
2621 | buffer_size_kb buffer_total_size_kb events free_buffer per_cpu | ||
2622 | set_event snapshot trace trace_clock trace_marker trace_options | ||
2623 | trace_pipe tracing_on | ||
2624 | |||
2625 | As you can see, the new directory looks similar to the tracing directory | ||
2626 | itself. In fact, it is very similar, except that the buffer and | ||
2627 | events are agnostic from the main director, or from any other | ||
2628 | instances that are created. | ||
2629 | |||
2630 | The files in the new directory work just like the files with the | ||
2631 | same name in the tracing directory except the buffer that is used | ||
2632 | is a separate and new buffer. The files affect that buffer but do not | ||
2633 | affect the main buffer with the exception of trace_options. Currently, | ||
2634 | the trace_options affect all instances and the top level buffer | ||
2635 | the same, but this may change in future releases. That is, options | ||
2636 | may become specific to the instance they reside in. | ||
2637 | |||
2638 | Notice that none of the function tracer files are there, nor is | ||
2639 | current_tracer and available_tracers. This is because the buffers | ||
2640 | can currently only have events enabled for them. | ||
2641 | |||
2642 | # mkdir instances/foo | ||
2643 | # mkdir instances/bar | ||
2644 | # mkdir instances/zoot | ||
2645 | # echo 100000 > buffer_size_kb | ||
2646 | # echo 1000 > instances/foo/buffer_size_kb | ||
2647 | # echo 5000 > instances/bar/per_cpu/cpu1/buffer_size_kb | ||
2648 | # echo function > current_trace | ||
2649 | # echo 1 > instances/foo/events/sched/sched_wakeup/enable | ||
2650 | # echo 1 > instances/foo/events/sched/sched_wakeup_new/enable | ||
2651 | # echo 1 > instances/foo/events/sched/sched_switch/enable | ||
2652 | # echo 1 > instances/bar/events/irq/enable | ||
2653 | # echo 1 > instances/zoot/events/syscalls/enable | ||
2654 | # cat trace_pipe | ||
2655 | CPU:2 [LOST 11745 EVENTS] | ||
2656 | bash-2044 [002] .... 10594.481032: _raw_spin_lock_irqsave <-get_page_from_freelist | ||
2657 | bash-2044 [002] d... 10594.481032: add_preempt_count <-_raw_spin_lock_irqsave | ||
2658 | bash-2044 [002] d..1 10594.481032: __rmqueue <-get_page_from_freelist | ||
2659 | bash-2044 [002] d..1 10594.481033: _raw_spin_unlock <-get_page_from_freelist | ||
2660 | bash-2044 [002] d..1 10594.481033: sub_preempt_count <-_raw_spin_unlock | ||
2661 | bash-2044 [002] d... 10594.481033: get_pageblock_flags_group <-get_pageblock_migratetype | ||
2662 | bash-2044 [002] d... 10594.481034: __mod_zone_page_state <-get_page_from_freelist | ||
2663 | bash-2044 [002] d... 10594.481034: zone_statistics <-get_page_from_freelist | ||
2664 | bash-2044 [002] d... 10594.481034: __inc_zone_state <-zone_statistics | ||
2665 | bash-2044 [002] d... 10594.481034: __inc_zone_state <-zone_statistics | ||
2666 | bash-2044 [002] .... 10594.481035: arch_dup_task_struct <-copy_process | ||
2667 | [...] | ||
2668 | |||
2669 | # cat instances/foo/trace_pipe | ||
2670 | bash-1998 [000] d..4 136.676759: sched_wakeup: comm=kworker/0:1 pid=59 prio=120 success=1 target_cpu=000 | ||
2671 | bash-1998 [000] dN.4 136.676760: sched_wakeup: comm=bash pid=1998 prio=120 success=1 target_cpu=000 | ||
2672 | <idle>-0 [003] d.h3 136.676906: sched_wakeup: comm=rcu_preempt pid=9 prio=120 success=1 target_cpu=003 | ||
2673 | <idle>-0 [003] d..3 136.676909: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=rcu_preempt next_pid=9 next_prio=120 | ||
2674 | rcu_preempt-9 [003] d..3 136.676916: sched_switch: prev_comm=rcu_preempt prev_pid=9 prev_prio=120 prev_state=S ==> next_comm=swapper/3 next_pid=0 next_prio=120 | ||
2675 | bash-1998 [000] d..4 136.677014: sched_wakeup: comm=kworker/0:1 pid=59 prio=120 success=1 target_cpu=000 | ||
2676 | bash-1998 [000] dN.4 136.677016: sched_wakeup: comm=bash pid=1998 prio=120 success=1 target_cpu=000 | ||
2677 | bash-1998 [000] d..3 136.677018: sched_switch: prev_comm=bash prev_pid=1998 prev_prio=120 prev_state=R+ ==> next_comm=kworker/0:1 next_pid=59 next_prio=120 | ||
2678 | kworker/0:1-59 [000] d..4 136.677022: sched_wakeup: comm=sshd pid=1995 prio=120 success=1 target_cpu=001 | ||
2679 | kworker/0:1-59 [000] d..3 136.677025: sched_switch: prev_comm=kworker/0:1 prev_pid=59 prev_prio=120 prev_state=S ==> next_comm=bash next_pid=1998 next_prio=120 | ||
2680 | [...] | ||
2681 | |||
2682 | # cat instances/bar/trace_pipe | ||
2683 | migration/1-14 [001] d.h3 138.732674: softirq_raise: vec=3 [action=NET_RX] | ||
2684 | <idle>-0 [001] dNh3 138.732725: softirq_raise: vec=3 [action=NET_RX] | ||
2685 | bash-1998 [000] d.h1 138.733101: softirq_raise: vec=1 [action=TIMER] | ||
2686 | bash-1998 [000] d.h1 138.733102: softirq_raise: vec=9 [action=RCU] | ||
2687 | bash-1998 [000] ..s2 138.733105: softirq_entry: vec=1 [action=TIMER] | ||
2688 | bash-1998 [000] ..s2 138.733106: softirq_exit: vec=1 [action=TIMER] | ||
2689 | bash-1998 [000] ..s2 138.733106: softirq_entry: vec=9 [action=RCU] | ||
2690 | bash-1998 [000] ..s2 138.733109: softirq_exit: vec=9 [action=RCU] | ||
2691 | sshd-1995 [001] d.h1 138.733278: irq_handler_entry: irq=21 name=uhci_hcd:usb4 | ||
2692 | sshd-1995 [001] d.h1 138.733280: irq_handler_exit: irq=21 ret=unhandled | ||
2693 | sshd-1995 [001] d.h1 138.733281: irq_handler_entry: irq=21 name=eth0 | ||
2694 | sshd-1995 [001] d.h1 138.733283: irq_handler_exit: irq=21 ret=handled | ||
2695 | [...] | ||
2696 | |||
2697 | # cat instances/zoot/trace | ||
2698 | # tracer: nop | ||
2699 | # | ||
2700 | # entries-in-buffer/entries-written: 18996/18996 #P:4 | ||
2701 | # | ||
2702 | # _-----=> irqs-off | ||
2703 | # / _----=> need-resched | ||
2704 | # | / _---=> hardirq/softirq | ||
2705 | # || / _--=> preempt-depth | ||
2706 | # ||| / delay | ||
2707 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
2708 | # | | | |||| | | | ||
2709 | bash-1998 [000] d... 140.733501: sys_write -> 0x2 | ||
2710 | bash-1998 [000] d... 140.733504: sys_dup2(oldfd: a, newfd: 1) | ||
2711 | bash-1998 [000] d... 140.733506: sys_dup2 -> 0x1 | ||
2712 | bash-1998 [000] d... 140.733508: sys_fcntl(fd: a, cmd: 1, arg: 0) | ||
2713 | bash-1998 [000] d... 140.733509: sys_fcntl -> 0x1 | ||
2714 | bash-1998 [000] d... 140.733510: sys_close(fd: a) | ||
2715 | bash-1998 [000] d... 140.733510: sys_close -> 0x0 | ||
2716 | bash-1998 [000] d... 140.733514: sys_rt_sigprocmask(how: 0, nset: 0, oset: 6e2768, sigsetsize: 8) | ||
2717 | bash-1998 [000] d... 140.733515: sys_rt_sigprocmask -> 0x0 | ||
2718 | bash-1998 [000] d... 140.733516: sys_rt_sigaction(sig: 2, act: 7fff718846f0, oact: 7fff71884650, sigsetsize: 8) | ||
2719 | bash-1998 [000] d... 140.733516: sys_rt_sigaction -> 0x0 | ||
2720 | |||
2721 | You can see that the trace of the top most trace buffer shows only | ||
2722 | the function tracing. The foo instance displays wakeups and task | ||
2723 | switches. | ||
2724 | |||
2725 | To remove the instances, simply delete their directories: | ||
2726 | |||
2727 | # rmdir instances/foo | ||
2728 | # rmdir instances/bar | ||
2729 | # rmdir instances/zoot | ||
2730 | |||
2731 | Note, if a process has a trace file open in one of the instance | ||
2732 | directories, the rmdir will fail with EBUSY. | ||
2733 | |||
2734 | |||
2735 | Stack trace | ||
1928 | ----------- | 2736 | ----------- |
2737 | Since the kernel has a fixed sized stack, it is important not to | ||
2738 | waste it in functions. A kernel developer must be conscience of | ||
2739 | what they allocate on the stack. If they add too much, the system | ||
2740 | can be in danger of a stack overflow, and corruption will occur, | ||
2741 | usually leading to a system panic. | ||
2742 | |||
2743 | There are some tools that check this, usually with interrupts | ||
2744 | periodically checking usage. But if you can perform a check | ||
2745 | at every function call that will become very useful. As ftrace provides | ||
2746 | a function tracer, it makes it convenient to check the stack size | ||
2747 | at every function call. This is enabled via the stack tracer. | ||
2748 | |||
2749 | CONFIG_STACK_TRACER enables the ftrace stack tracing functionality. | ||
2750 | To enable it, write a '1' into /proc/sys/kernel/stack_tracer_enabled. | ||
2751 | |||
2752 | # echo 1 > /proc/sys/kernel/stack_tracer_enabled | ||
2753 | |||
2754 | You can also enable it from the kernel command line to trace | ||
2755 | the stack size of the kernel during boot up, by adding "stacktrace" | ||
2756 | to the kernel command line parameter. | ||
2757 | |||
2758 | After running it for a few minutes, the output looks like: | ||
2759 | |||
2760 | # cat stack_max_size | ||
2761 | 2928 | ||
2762 | |||
2763 | # cat stack_trace | ||
2764 | Depth Size Location (18 entries) | ||
2765 | ----- ---- -------- | ||
2766 | 0) 2928 224 update_sd_lb_stats+0xbc/0x4ac | ||
2767 | 1) 2704 160 find_busiest_group+0x31/0x1f1 | ||
2768 | 2) 2544 256 load_balance+0xd9/0x662 | ||
2769 | 3) 2288 80 idle_balance+0xbb/0x130 | ||
2770 | 4) 2208 128 __schedule+0x26e/0x5b9 | ||
2771 | 5) 2080 16 schedule+0x64/0x66 | ||
2772 | 6) 2064 128 schedule_timeout+0x34/0xe0 | ||
2773 | 7) 1936 112 wait_for_common+0x97/0xf1 | ||
2774 | 8) 1824 16 wait_for_completion+0x1d/0x1f | ||
2775 | 9) 1808 128 flush_work+0xfe/0x119 | ||
2776 | 10) 1680 16 tty_flush_to_ldisc+0x1e/0x20 | ||
2777 | 11) 1664 48 input_available_p+0x1d/0x5c | ||
2778 | 12) 1616 48 n_tty_poll+0x6d/0x134 | ||
2779 | 13) 1568 64 tty_poll+0x64/0x7f | ||
2780 | 14) 1504 880 do_select+0x31e/0x511 | ||
2781 | 15) 624 400 core_sys_select+0x177/0x216 | ||
2782 | 16) 224 96 sys_select+0x91/0xb9 | ||
2783 | 17) 128 128 system_call_fastpath+0x16/0x1b | ||
2784 | |||
2785 | Note, if -mfentry is being used by gcc, functions get traced before | ||
2786 | they set up the stack frame. This means that leaf level functions | ||
2787 | are not tested by the stack tracer when -mfentry is used. | ||
2788 | |||
2789 | Currently, -mfentry is used by gcc 4.6.0 and above on x86 only. | ||
2790 | |||
2791 | --------- | ||
1929 | 2792 | ||
1930 | More details can be found in the source code, in the | 2793 | More details can be found in the source code, in the |
1931 | kernel/trace/*.c files. | 2794 | kernel/trace/*.c files. |
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 52da2a250795..f83e17a40e8b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h | |||
@@ -261,8 +261,10 @@ struct ftrace_probe_ops { | |||
261 | void (*func)(unsigned long ip, | 261 | void (*func)(unsigned long ip, |
262 | unsigned long parent_ip, | 262 | unsigned long parent_ip, |
263 | void **data); | 263 | void **data); |
264 | int (*callback)(unsigned long ip, void **data); | 264 | int (*init)(struct ftrace_probe_ops *ops, |
265 | void (*free)(void **data); | 265 | unsigned long ip, void **data); |
266 | void (*free)(struct ftrace_probe_ops *ops, | ||
267 | unsigned long ip, void **data); | ||
266 | int (*print)(struct seq_file *m, | 268 | int (*print)(struct seq_file *m, |
267 | unsigned long ip, | 269 | unsigned long ip, |
268 | struct ftrace_probe_ops *ops, | 270 | struct ftrace_probe_ops *ops, |
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 13a54d0bdfa8..34e00fb49bec 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/perf_event.h> | 8 | #include <linux/perf_event.h> |
9 | 9 | ||
10 | struct trace_array; | 10 | struct trace_array; |
11 | struct trace_buffer; | ||
11 | struct tracer; | 12 | struct tracer; |
12 | struct dentry; | 13 | struct dentry; |
13 | 14 | ||
@@ -38,6 +39,12 @@ const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, | |||
38 | const char *ftrace_print_hex_seq(struct trace_seq *p, | 39 | const char *ftrace_print_hex_seq(struct trace_seq *p, |
39 | const unsigned char *buf, int len); | 40 | const unsigned char *buf, int len); |
40 | 41 | ||
42 | struct trace_iterator; | ||
43 | struct trace_event; | ||
44 | |||
45 | int ftrace_raw_output_prep(struct trace_iterator *iter, | ||
46 | struct trace_event *event); | ||
47 | |||
41 | /* | 48 | /* |
42 | * The trace entry - the most basic unit of tracing. This is what | 49 | * The trace entry - the most basic unit of tracing. This is what |
43 | * is printed in the end as a single line in the trace output, such as: | 50 | * is printed in the end as a single line in the trace output, such as: |
@@ -61,6 +68,7 @@ struct trace_entry { | |||
61 | struct trace_iterator { | 68 | struct trace_iterator { |
62 | struct trace_array *tr; | 69 | struct trace_array *tr; |
63 | struct tracer *trace; | 70 | struct tracer *trace; |
71 | struct trace_buffer *trace_buffer; | ||
64 | void *private; | 72 | void *private; |
65 | int cpu_file; | 73 | int cpu_file; |
66 | struct mutex mutex; | 74 | struct mutex mutex; |
@@ -95,8 +103,6 @@ enum trace_iter_flags { | |||
95 | }; | 103 | }; |
96 | 104 | ||
97 | 105 | ||
98 | struct trace_event; | ||
99 | |||
100 | typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, | 106 | typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, |
101 | int flags, struct trace_event *event); | 107 | int flags, struct trace_event *event); |
102 | 108 | ||
@@ -128,6 +134,13 @@ enum print_line_t { | |||
128 | void tracing_generic_entry_update(struct trace_entry *entry, | 134 | void tracing_generic_entry_update(struct trace_entry *entry, |
129 | unsigned long flags, | 135 | unsigned long flags, |
130 | int pc); | 136 | int pc); |
137 | struct ftrace_event_file; | ||
138 | |||
139 | struct ring_buffer_event * | ||
140 | trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, | ||
141 | struct ftrace_event_file *ftrace_file, | ||
142 | int type, unsigned long len, | ||
143 | unsigned long flags, int pc); | ||
131 | struct ring_buffer_event * | 144 | struct ring_buffer_event * |
132 | trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, | 145 | trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, |
133 | int type, unsigned long len, | 146 | int type, unsigned long len, |
@@ -182,53 +195,49 @@ extern int ftrace_event_reg(struct ftrace_event_call *event, | |||
182 | enum trace_reg type, void *data); | 195 | enum trace_reg type, void *data); |
183 | 196 | ||
184 | enum { | 197 | enum { |
185 | TRACE_EVENT_FL_ENABLED_BIT, | ||
186 | TRACE_EVENT_FL_FILTERED_BIT, | 198 | TRACE_EVENT_FL_FILTERED_BIT, |
187 | TRACE_EVENT_FL_RECORDED_CMD_BIT, | ||
188 | TRACE_EVENT_FL_CAP_ANY_BIT, | 199 | TRACE_EVENT_FL_CAP_ANY_BIT, |
189 | TRACE_EVENT_FL_NO_SET_FILTER_BIT, | 200 | TRACE_EVENT_FL_NO_SET_FILTER_BIT, |
190 | TRACE_EVENT_FL_IGNORE_ENABLE_BIT, | 201 | TRACE_EVENT_FL_IGNORE_ENABLE_BIT, |
202 | TRACE_EVENT_FL_WAS_ENABLED_BIT, | ||
191 | }; | 203 | }; |
192 | 204 | ||
205 | /* | ||
206 | * Event flags: | ||
207 | * FILTERED - The event has a filter attached | ||
208 | * CAP_ANY - Any user can enable for perf | ||
209 | * NO_SET_FILTER - Set when filter has error and is to be ignored | ||
210 | * IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file | ||
211 | * WAS_ENABLED - Set and stays set when an event was ever enabled | ||
212 | * (used for module unloading, if a module event is enabled, | ||
213 | * it is best to clear the buffers that used it). | ||
214 | */ | ||
193 | enum { | 215 | enum { |
194 | TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), | ||
195 | TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), | 216 | TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), |
196 | TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), | ||
197 | TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), | 217 | TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), |
198 | TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), | 218 | TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), |
199 | TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), | 219 | TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), |
220 | TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), | ||
200 | }; | 221 | }; |
201 | 222 | ||
202 | struct ftrace_event_call { | 223 | struct ftrace_event_call { |
203 | struct list_head list; | 224 | struct list_head list; |
204 | struct ftrace_event_class *class; | 225 | struct ftrace_event_class *class; |
205 | char *name; | 226 | char *name; |
206 | struct dentry *dir; | ||
207 | struct trace_event event; | 227 | struct trace_event event; |
208 | const char *print_fmt; | 228 | const char *print_fmt; |
209 | struct event_filter *filter; | 229 | struct event_filter *filter; |
230 | struct list_head *files; | ||
210 | void *mod; | 231 | void *mod; |
211 | void *data; | 232 | void *data; |
212 | |||
213 | /* | 233 | /* |
214 | * 32 bit flags: | 234 | * bit 0: filter_active |
215 | * bit 1: enabled | 235 | * bit 1: allow trace by non root (cap any) |
216 | * bit 2: filter_active | 236 | * bit 2: failed to apply filter |
217 | * bit 3: enabled cmd record | 237 | * bit 3: ftrace internal event (do not enable) |
218 | * bit 4: allow trace by non root (cap any) | 238 | * bit 4: Event was enabled by module |
219 | * bit 5: failed to apply filter | ||
220 | * bit 6: ftrace internal event (do not enable) | ||
221 | * | ||
222 | * Changes to flags must hold the event_mutex. | ||
223 | * | ||
224 | * Note: Reads of flags do not hold the event_mutex since | ||
225 | * they occur in critical sections. But the way flags | ||
226 | * is currently used, these changes do no affect the code | ||
227 | * except that when a change is made, it may have a slight | ||
228 | * delay in propagating the changes to other CPUs due to | ||
229 | * caching and such. | ||
230 | */ | 239 | */ |
231 | unsigned int flags; | 240 | int flags; /* static flags of different events */ |
232 | 241 | ||
233 | #ifdef CONFIG_PERF_EVENTS | 242 | #ifdef CONFIG_PERF_EVENTS |
234 | int perf_refcount; | 243 | int perf_refcount; |
@@ -236,6 +245,56 @@ struct ftrace_event_call { | |||
236 | #endif | 245 | #endif |
237 | }; | 246 | }; |
238 | 247 | ||
248 | struct trace_array; | ||
249 | struct ftrace_subsystem_dir; | ||
250 | |||
251 | enum { | ||
252 | FTRACE_EVENT_FL_ENABLED_BIT, | ||
253 | FTRACE_EVENT_FL_RECORDED_CMD_BIT, | ||
254 | FTRACE_EVENT_FL_SOFT_MODE_BIT, | ||
255 | FTRACE_EVENT_FL_SOFT_DISABLED_BIT, | ||
256 | }; | ||
257 | |||
258 | /* | ||
259 | * Ftrace event file flags: | ||
260 | * ENABLED - The event is enabled | ||
261 | * RECORDED_CMD - The comms should be recorded at sched_switch | ||
262 | * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED | ||
263 | * SOFT_DISABLED - When set, do not trace the event (even though its | ||
264 | * tracepoint may be enabled) | ||
265 | */ | ||
266 | enum { | ||
267 | FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT), | ||
268 | FTRACE_EVENT_FL_RECORDED_CMD = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT), | ||
269 | FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT), | ||
270 | FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT), | ||
271 | }; | ||
272 | |||
273 | struct ftrace_event_file { | ||
274 | struct list_head list; | ||
275 | struct ftrace_event_call *event_call; | ||
276 | struct dentry *dir; | ||
277 | struct trace_array *tr; | ||
278 | struct ftrace_subsystem_dir *system; | ||
279 | |||
280 | /* | ||
281 | * 32 bit flags: | ||
282 | * bit 0: enabled | ||
283 | * bit 1: enabled cmd record | ||
284 | * bit 2: enable/disable with the soft disable bit | ||
285 | * bit 3: soft disabled | ||
286 | * | ||
287 | * Note: The bits must be set atomically to prevent races | ||
288 | * from other writers. Reads of flags do not need to be in | ||
289 | * sync as they occur in critical sections. But the way flags | ||
290 | * is currently used, these changes do not affect the code | ||
291 | * except that when a change is made, it may have a slight | ||
292 | * delay in propagating the changes to other CPUs due to | ||
293 | * caching and such. Which is mostly OK ;-) | ||
294 | */ | ||
295 | unsigned long flags; | ||
296 | }; | ||
297 | |||
239 | #define __TRACE_EVENT_FLAGS(name, value) \ | 298 | #define __TRACE_EVENT_FLAGS(name, value) \ |
240 | static int __init trace_init_flags_##name(void) \ | 299 | static int __init trace_init_flags_##name(void) \ |
241 | { \ | 300 | { \ |
@@ -274,7 +333,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type, | |||
274 | extern int trace_add_event_call(struct ftrace_event_call *call); | 333 | extern int trace_add_event_call(struct ftrace_event_call *call); |
275 | extern void trace_remove_event_call(struct ftrace_event_call *call); | 334 | extern void trace_remove_event_call(struct ftrace_event_call *call); |
276 | 335 | ||
277 | #define is_signed_type(type) (((type)(-1)) < (type)0) | 336 | #define is_signed_type(type) (((type)(-1)) < (type)1) |
278 | 337 | ||
279 | int trace_set_clr_event(const char *system, const char *event, int set); | 338 | int trace_set_clr_event(const char *system, const char *event, int set); |
280 | 339 | ||
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 79fdd80a42d4..2dac79c39199 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -486,6 +486,8 @@ enum ftrace_dump_mode { | |||
486 | void tracing_on(void); | 486 | void tracing_on(void); |
487 | void tracing_off(void); | 487 | void tracing_off(void); |
488 | int tracing_is_on(void); | 488 | int tracing_is_on(void); |
489 | void tracing_snapshot(void); | ||
490 | void tracing_snapshot_alloc(void); | ||
489 | 491 | ||
490 | extern void tracing_start(void); | 492 | extern void tracing_start(void); |
491 | extern void tracing_stop(void); | 493 | extern void tracing_stop(void); |
@@ -515,10 +517,32 @@ do { \ | |||
515 | * | 517 | * |
516 | * This is intended as a debugging tool for the developer only. | 518 | * This is intended as a debugging tool for the developer only. |
517 | * Please refrain from leaving trace_printks scattered around in | 519 | * Please refrain from leaving trace_printks scattered around in |
518 | * your code. | 520 | * your code. (Extra memory is used for special buffers that are |
521 | * allocated when trace_printk() is used) | ||
522 | * | ||
523 | * A little optization trick is done here. If there's only one | ||
524 | * argument, there's no need to scan the string for printf formats. | ||
525 | * The trace_puts() will suffice. But how can we take advantage of | ||
526 | * using trace_puts() when trace_printk() has only one argument? | ||
527 | * By stringifying the args and checking the size we can tell | ||
528 | * whether or not there are args. __stringify((__VA_ARGS__)) will | ||
529 | * turn into "()\0" with a size of 3 when there are no args, anything | ||
530 | * else will be bigger. All we need to do is define a string to this, | ||
531 | * and then take its size and compare to 3. If it's bigger, use | ||
532 | * do_trace_printk() otherwise, optimize it to trace_puts(). Then just | ||
533 | * let gcc optimize the rest. | ||
519 | */ | 534 | */ |
520 | 535 | ||
521 | #define trace_printk(fmt, args...) \ | 536 | #define trace_printk(fmt, ...) \ |
537 | do { \ | ||
538 | char _______STR[] = __stringify((__VA_ARGS__)); \ | ||
539 | if (sizeof(_______STR) > 3) \ | ||
540 | do_trace_printk(fmt, ##__VA_ARGS__); \ | ||
541 | else \ | ||
542 | trace_puts(fmt); \ | ||
543 | } while (0) | ||
544 | |||
545 | #define do_trace_printk(fmt, args...) \ | ||
522 | do { \ | 546 | do { \ |
523 | static const char *trace_printk_fmt \ | 547 | static const char *trace_printk_fmt \ |
524 | __attribute__((section("__trace_printk_fmt"))) = \ | 548 | __attribute__((section("__trace_printk_fmt"))) = \ |
@@ -538,7 +562,45 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...); | |||
538 | extern __printf(2, 3) | 562 | extern __printf(2, 3) |
539 | int __trace_printk(unsigned long ip, const char *fmt, ...); | 563 | int __trace_printk(unsigned long ip, const char *fmt, ...); |
540 | 564 | ||
541 | extern void trace_dump_stack(void); | 565 | /** |
566 | * trace_puts - write a string into the ftrace buffer | ||
567 | * @str: the string to record | ||
568 | * | ||
569 | * Note: __trace_bputs is an internal function for trace_puts and | ||
570 | * the @ip is passed in via the trace_puts macro. | ||
571 | * | ||
572 | * This is similar to trace_printk() but is made for those really fast | ||
573 | * paths that a developer wants the least amount of "Heisenbug" affects, | ||
574 | * where the processing of the print format is still too much. | ||
575 | * | ||
576 | * This function allows a kernel developer to debug fast path sections | ||
577 | * that printk is not appropriate for. By scattering in various | ||
578 | * printk like tracing in the code, a developer can quickly see | ||
579 | * where problems are occurring. | ||
580 | * | ||
581 | * This is intended as a debugging tool for the developer only. | ||
582 | * Please refrain from leaving trace_puts scattered around in | ||
583 | * your code. (Extra memory is used for special buffers that are | ||
584 | * allocated when trace_puts() is used) | ||
585 | * | ||
586 | * Returns: 0 if nothing was written, positive # if string was. | ||
587 | * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) | ||
588 | */ | ||
589 | |||
590 | extern int __trace_bputs(unsigned long ip, const char *str); | ||
591 | extern int __trace_puts(unsigned long ip, const char *str, int size); | ||
592 | #define trace_puts(str) ({ \ | ||
593 | static const char *trace_printk_fmt \ | ||
594 | __attribute__((section("__trace_printk_fmt"))) = \ | ||
595 | __builtin_constant_p(str) ? str : NULL; \ | ||
596 | \ | ||
597 | if (__builtin_constant_p(str)) \ | ||
598 | __trace_bputs(_THIS_IP_, trace_printk_fmt); \ | ||
599 | else \ | ||
600 | __trace_puts(_THIS_IP_, str, strlen(str)); \ | ||
601 | }) | ||
602 | |||
603 | extern void trace_dump_stack(int skip); | ||
542 | 604 | ||
543 | /* | 605 | /* |
544 | * The double __builtin_constant_p is because gcc will give us an error | 606 | * The double __builtin_constant_p is because gcc will give us an error |
@@ -573,6 +635,8 @@ static inline void trace_dump_stack(void) { } | |||
573 | static inline void tracing_on(void) { } | 635 | static inline void tracing_on(void) { } |
574 | static inline void tracing_off(void) { } | 636 | static inline void tracing_off(void) { } |
575 | static inline int tracing_is_on(void) { return 0; } | 637 | static inline int tracing_is_on(void) { return 0; } |
638 | static inline void tracing_snapshot(void) { } | ||
639 | static inline void tracing_snapshot_alloc(void) { } | ||
576 | 640 | ||
577 | static inline __printf(1, 2) | 641 | static inline __printf(1, 2) |
578 | int trace_printk(const char *fmt, ...) | 642 | int trace_printk(const char *fmt, ...) |
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 1342e69542f3..d69cf637a15a 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/kmemcheck.h> | 4 | #include <linux/kmemcheck.h> |
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | #include <linux/seq_file.h> | 6 | #include <linux/seq_file.h> |
7 | #include <linux/poll.h> | ||
7 | 8 | ||
8 | struct ring_buffer; | 9 | struct ring_buffer; |
9 | struct ring_buffer_iter; | 10 | struct ring_buffer_iter; |
@@ -96,6 +97,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k | |||
96 | __ring_buffer_alloc((size), (flags), &__key); \ | 97 | __ring_buffer_alloc((size), (flags), &__key); \ |
97 | }) | 98 | }) |
98 | 99 | ||
100 | void ring_buffer_wait(struct ring_buffer *buffer, int cpu); | ||
101 | int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, | ||
102 | struct file *filp, poll_table *poll_table); | ||
103 | |||
104 | |||
99 | #define RING_BUFFER_ALL_CPUS -1 | 105 | #define RING_BUFFER_ALL_CPUS -1 |
100 | 106 | ||
101 | void ring_buffer_free(struct ring_buffer *buffer); | 107 | void ring_buffer_free(struct ring_buffer *buffer); |
diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h index d563f37e1a1d..1d7ca2739272 100644 --- a/include/linux/trace_clock.h +++ b/include/linux/trace_clock.h | |||
@@ -16,6 +16,7 @@ | |||
16 | 16 | ||
17 | extern u64 notrace trace_clock_local(void); | 17 | extern u64 notrace trace_clock_local(void); |
18 | extern u64 notrace trace_clock(void); | 18 | extern u64 notrace trace_clock(void); |
19 | extern u64 notrace trace_clock_jiffies(void); | ||
19 | extern u64 notrace trace_clock_global(void); | 20 | extern u64 notrace trace_clock_global(void); |
20 | extern u64 notrace trace_clock_counter(void); | 21 | extern u64 notrace trace_clock_counter(void); |
21 | 22 | ||
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 40dc5e8fe340..19edd7facaa1 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -227,29 +227,18 @@ static notrace enum print_line_t \ | |||
227 | ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \ | 227 | ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \ |
228 | struct trace_event *trace_event) \ | 228 | struct trace_event *trace_event) \ |
229 | { \ | 229 | { \ |
230 | struct ftrace_event_call *event; \ | ||
231 | struct trace_seq *s = &iter->seq; \ | 230 | struct trace_seq *s = &iter->seq; \ |
231 | struct trace_seq __maybe_unused *p = &iter->tmp_seq; \ | ||
232 | struct ftrace_raw_##call *field; \ | 232 | struct ftrace_raw_##call *field; \ |
233 | struct trace_entry *entry; \ | ||
234 | struct trace_seq *p = &iter->tmp_seq; \ | ||
235 | int ret; \ | 233 | int ret; \ |
236 | \ | 234 | \ |
237 | event = container_of(trace_event, struct ftrace_event_call, \ | 235 | field = (typeof(field))iter->ent; \ |
238 | event); \ | ||
239 | \ | ||
240 | entry = iter->ent; \ | ||
241 | \ | ||
242 | if (entry->type != event->event.type) { \ | ||
243 | WARN_ON_ONCE(1); \ | ||
244 | return TRACE_TYPE_UNHANDLED; \ | ||
245 | } \ | ||
246 | \ | ||
247 | field = (typeof(field))entry; \ | ||
248 | \ | 236 | \ |
249 | trace_seq_init(p); \ | 237 | ret = ftrace_raw_output_prep(iter, trace_event); \ |
250 | ret = trace_seq_printf(s, "%s: ", event->name); \ | ||
251 | if (ret) \ | 238 | if (ret) \ |
252 | ret = trace_seq_printf(s, print); \ | 239 | return ret; \ |
240 | \ | ||
241 | ret = trace_seq_printf(s, print); \ | ||
253 | if (!ret) \ | 242 | if (!ret) \ |
254 | return TRACE_TYPE_PARTIAL_LINE; \ | 243 | return TRACE_TYPE_PARTIAL_LINE; \ |
255 | \ | 244 | \ |
@@ -335,7 +324,7 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ | |||
335 | 324 | ||
336 | #undef DECLARE_EVENT_CLASS | 325 | #undef DECLARE_EVENT_CLASS |
337 | #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ | 326 | #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ |
338 | static int notrace \ | 327 | static int notrace __init \ |
339 | ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ | 328 | ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ |
340 | { \ | 329 | { \ |
341 | struct ftrace_raw_##call field; \ | 330 | struct ftrace_raw_##call field; \ |
@@ -414,7 +403,8 @@ static inline notrace int ftrace_get_offsets_##call( \ | |||
414 | * | 403 | * |
415 | * static void ftrace_raw_event_<call>(void *__data, proto) | 404 | * static void ftrace_raw_event_<call>(void *__data, proto) |
416 | * { | 405 | * { |
417 | * struct ftrace_event_call *event_call = __data; | 406 | * struct ftrace_event_file *ftrace_file = __data; |
407 | * struct ftrace_event_call *event_call = ftrace_file->event_call; | ||
418 | * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets; | 408 | * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets; |
419 | * struct ring_buffer_event *event; | 409 | * struct ring_buffer_event *event; |
420 | * struct ftrace_raw_<call> *entry; <-- defined in stage 1 | 410 | * struct ftrace_raw_<call> *entry; <-- defined in stage 1 |
@@ -423,12 +413,16 @@ static inline notrace int ftrace_get_offsets_##call( \ | |||
423 | * int __data_size; | 413 | * int __data_size; |
424 | * int pc; | 414 | * int pc; |
425 | * | 415 | * |
416 | * if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, | ||
417 | * &ftrace_file->flags)) | ||
418 | * return; | ||
419 | * | ||
426 | * local_save_flags(irq_flags); | 420 | * local_save_flags(irq_flags); |
427 | * pc = preempt_count(); | 421 | * pc = preempt_count(); |
428 | * | 422 | * |
429 | * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); | 423 | * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); |
430 | * | 424 | * |
431 | * event = trace_current_buffer_lock_reserve(&buffer, | 425 | * event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, |
432 | * event_<call>->event.type, | 426 | * event_<call>->event.type, |
433 | * sizeof(*entry) + __data_size, | 427 | * sizeof(*entry) + __data_size, |
434 | * irq_flags, pc); | 428 | * irq_flags, pc); |
@@ -440,7 +434,7 @@ static inline notrace int ftrace_get_offsets_##call( \ | |||
440 | * __array macros. | 434 | * __array macros. |
441 | * | 435 | * |
442 | * if (!filter_current_check_discard(buffer, event_call, entry, event)) | 436 | * if (!filter_current_check_discard(buffer, event_call, entry, event)) |
443 | * trace_current_buffer_unlock_commit(buffer, | 437 | * trace_nowake_buffer_unlock_commit(buffer, |
444 | * event, irq_flags, pc); | 438 | * event, irq_flags, pc); |
445 | * } | 439 | * } |
446 | * | 440 | * |
@@ -518,7 +512,8 @@ static inline notrace int ftrace_get_offsets_##call( \ | |||
518 | static notrace void \ | 512 | static notrace void \ |
519 | ftrace_raw_event_##call(void *__data, proto) \ | 513 | ftrace_raw_event_##call(void *__data, proto) \ |
520 | { \ | 514 | { \ |
521 | struct ftrace_event_call *event_call = __data; \ | 515 | struct ftrace_event_file *ftrace_file = __data; \ |
516 | struct ftrace_event_call *event_call = ftrace_file->event_call; \ | ||
522 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ | 517 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ |
523 | struct ring_buffer_event *event; \ | 518 | struct ring_buffer_event *event; \ |
524 | struct ftrace_raw_##call *entry; \ | 519 | struct ftrace_raw_##call *entry; \ |
@@ -527,12 +522,16 @@ ftrace_raw_event_##call(void *__data, proto) \ | |||
527 | int __data_size; \ | 522 | int __data_size; \ |
528 | int pc; \ | 523 | int pc; \ |
529 | \ | 524 | \ |
525 | if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, \ | ||
526 | &ftrace_file->flags)) \ | ||
527 | return; \ | ||
528 | \ | ||
530 | local_save_flags(irq_flags); \ | 529 | local_save_flags(irq_flags); \ |
531 | pc = preempt_count(); \ | 530 | pc = preempt_count(); \ |
532 | \ | 531 | \ |
533 | __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ | 532 | __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ |
534 | \ | 533 | \ |
535 | event = trace_current_buffer_lock_reserve(&buffer, \ | 534 | event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, \ |
536 | event_call->event.type, \ | 535 | event_call->event.type, \ |
537 | sizeof(*entry) + __data_size, \ | 536 | sizeof(*entry) + __data_size, \ |
538 | irq_flags, pc); \ | 537 | irq_flags, pc); \ |
@@ -581,7 +580,7 @@ static inline void ftrace_test_probe_##call(void) \ | |||
581 | #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ | 580 | #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ |
582 | _TRACE_PERF_PROTO(call, PARAMS(proto)); \ | 581 | _TRACE_PERF_PROTO(call, PARAMS(proto)); \ |
583 | static const char print_fmt_##call[] = print; \ | 582 | static const char print_fmt_##call[] = print; \ |
584 | static struct ftrace_event_class __used event_class_##call = { \ | 583 | static struct ftrace_event_class __used __refdata event_class_##call = { \ |
585 | .system = __stringify(TRACE_SYSTEM), \ | 584 | .system = __stringify(TRACE_SYSTEM), \ |
586 | .define_fields = ftrace_define_fields_##call, \ | 585 | .define_fields = ftrace_define_fields_##call, \ |
587 | .fields = LIST_HEAD_INIT(event_class_##call.fields),\ | 586 | .fields = LIST_HEAD_INIT(event_class_##call.fields),\ |
@@ -705,5 +704,3 @@ static inline void perf_test_probe_##call(void) \ | |||
705 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) | 704 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) |
706 | #endif /* CONFIG_PERF_EVENTS */ | 705 | #endif /* CONFIG_PERF_EVENTS */ |
707 | 706 | ||
708 | #undef _TRACE_PROFILE_INIT | ||
709 | |||
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index fc382d6e2765..5e9efd4b83a4 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -176,6 +176,8 @@ config IRQSOFF_TRACER | |||
176 | select GENERIC_TRACER | 176 | select GENERIC_TRACER |
177 | select TRACER_MAX_TRACE | 177 | select TRACER_MAX_TRACE |
178 | select RING_BUFFER_ALLOW_SWAP | 178 | select RING_BUFFER_ALLOW_SWAP |
179 | select TRACER_SNAPSHOT | ||
180 | select TRACER_SNAPSHOT_PER_CPU_SWAP | ||
179 | help | 181 | help |
180 | This option measures the time spent in irqs-off critical | 182 | This option measures the time spent in irqs-off critical |
181 | sections, with microsecond accuracy. | 183 | sections, with microsecond accuracy. |
@@ -198,6 +200,8 @@ config PREEMPT_TRACER | |||
198 | select GENERIC_TRACER | 200 | select GENERIC_TRACER |
199 | select TRACER_MAX_TRACE | 201 | select TRACER_MAX_TRACE |
200 | select RING_BUFFER_ALLOW_SWAP | 202 | select RING_BUFFER_ALLOW_SWAP |
203 | select TRACER_SNAPSHOT | ||
204 | select TRACER_SNAPSHOT_PER_CPU_SWAP | ||
201 | help | 205 | help |
202 | This option measures the time spent in preemption-off critical | 206 | This option measures the time spent in preemption-off critical |
203 | sections, with microsecond accuracy. | 207 | sections, with microsecond accuracy. |
@@ -217,6 +221,7 @@ config SCHED_TRACER | |||
217 | select GENERIC_TRACER | 221 | select GENERIC_TRACER |
218 | select CONTEXT_SWITCH_TRACER | 222 | select CONTEXT_SWITCH_TRACER |
219 | select TRACER_MAX_TRACE | 223 | select TRACER_MAX_TRACE |
224 | select TRACER_SNAPSHOT | ||
220 | help | 225 | help |
221 | This tracer tracks the latency of the highest priority task | 226 | This tracer tracks the latency of the highest priority task |
222 | to be scheduled in, starting from the point it has woken up. | 227 | to be scheduled in, starting from the point it has woken up. |
@@ -248,6 +253,27 @@ config TRACER_SNAPSHOT | |||
248 | echo 1 > /sys/kernel/debug/tracing/snapshot | 253 | echo 1 > /sys/kernel/debug/tracing/snapshot |
249 | cat snapshot | 254 | cat snapshot |
250 | 255 | ||
256 | config TRACER_SNAPSHOT_PER_CPU_SWAP | ||
257 | bool "Allow snapshot to swap per CPU" | ||
258 | depends on TRACER_SNAPSHOT | ||
259 | select RING_BUFFER_ALLOW_SWAP | ||
260 | help | ||
261 | Allow doing a snapshot of a single CPU buffer instead of a | ||
262 | full swap (all buffers). If this is set, then the following is | ||
263 | allowed: | ||
264 | |||
265 | echo 1 > /sys/kernel/debug/tracing/per_cpu/cpu2/snapshot | ||
266 | |||
267 | After which, only the tracing buffer for CPU 2 was swapped with | ||
268 | the main tracing buffer, and the other CPU buffers remain the same. | ||
269 | |||
270 | When this is enabled, this adds a little more overhead to the | ||
271 | trace recording, as it needs to add some checks to synchronize | ||
272 | recording with swaps. But this does not affect the performance | ||
273 | of the overall system. This is enabled by default when the preempt | ||
274 | or irq latency tracers are enabled, as those need to swap as well | ||
275 | and already adds the overhead (plus a lot more). | ||
276 | |||
251 | config TRACE_BRANCH_PROFILING | 277 | config TRACE_BRANCH_PROFILING |
252 | bool | 278 | bool |
253 | select GENERIC_TRACER | 279 | select GENERIC_TRACER |
@@ -524,6 +550,29 @@ config RING_BUFFER_BENCHMARK | |||
524 | 550 | ||
525 | If unsure, say N. | 551 | If unsure, say N. |
526 | 552 | ||
553 | config RING_BUFFER_STARTUP_TEST | ||
554 | bool "Ring buffer startup self test" | ||
555 | depends on RING_BUFFER | ||
556 | help | ||
557 | Run a simple self test on the ring buffer on boot up. Late in the | ||
558 | kernel boot sequence, the test will start that kicks off | ||
559 | a thread per cpu. Each thread will write various size events | ||
560 | into the ring buffer. Another thread is created to send IPIs | ||
561 | to each of the threads, where the IPI handler will also write | ||
562 | to the ring buffer, to test/stress the nesting ability. | ||
563 | If any anomalies are discovered, a warning will be displayed | ||
564 | and all ring buffers will be disabled. | ||
565 | |||
566 | The test runs for 10 seconds. This will slow your boot time | ||
567 | by at least 10 more seconds. | ||
568 | |||
569 | At the end of the test, statics and more checks are done. | ||
570 | It will output the stats of each per cpu buffer. What | ||
571 | was written, the sizes, what was read, what was lost, and | ||
572 | other similar details. | ||
573 | |||
574 | If unsure, say N | ||
575 | |||
527 | endif # FTRACE | 576 | endif # FTRACE |
528 | 577 | ||
529 | endif # TRACING_SUPPORT | 578 | endif # TRACING_SUPPORT |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 5a0f781cd729..ed58a3216a6d 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -72,7 +72,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, | |||
72 | bool blk_tracer = blk_tracer_enabled; | 72 | bool blk_tracer = blk_tracer_enabled; |
73 | 73 | ||
74 | if (blk_tracer) { | 74 | if (blk_tracer) { |
75 | buffer = blk_tr->buffer; | 75 | buffer = blk_tr->trace_buffer.buffer; |
76 | pc = preempt_count(); | 76 | pc = preempt_count(); |
77 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, | 77 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
78 | sizeof(*t) + len, | 78 | sizeof(*t) + len, |
@@ -218,7 +218,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
218 | if (blk_tracer) { | 218 | if (blk_tracer) { |
219 | tracing_record_cmdline(current); | 219 | tracing_record_cmdline(current); |
220 | 220 | ||
221 | buffer = blk_tr->buffer; | 221 | buffer = blk_tr->trace_buffer.buffer; |
222 | pc = preempt_count(); | 222 | pc = preempt_count(); |
223 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, | 223 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
224 | sizeof(*t) + pdu_len, | 224 | sizeof(*t) + pdu_len, |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b3fde6d7b7fc..8a5c017bb50c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -486,7 +486,6 @@ struct ftrace_profile_stat { | |||
486 | #define PROFILES_PER_PAGE \ | 486 | #define PROFILES_PER_PAGE \ |
487 | (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) | 487 | (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) |
488 | 488 | ||
489 | static int ftrace_profile_bits __read_mostly; | ||
490 | static int ftrace_profile_enabled __read_mostly; | 489 | static int ftrace_profile_enabled __read_mostly; |
491 | 490 | ||
492 | /* ftrace_profile_lock - synchronize the enable and disable of the profiler */ | 491 | /* ftrace_profile_lock - synchronize the enable and disable of the profiler */ |
@@ -494,7 +493,8 @@ static DEFINE_MUTEX(ftrace_profile_lock); | |||
494 | 493 | ||
495 | static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); | 494 | static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); |
496 | 495 | ||
497 | #define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ | 496 | #define FTRACE_PROFILE_HASH_BITS 10 |
497 | #define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS) | ||
498 | 498 | ||
499 | static void * | 499 | static void * |
500 | function_stat_next(void *v, int idx) | 500 | function_stat_next(void *v, int idx) |
@@ -676,7 +676,7 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) | |||
676 | 676 | ||
677 | pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); | 677 | pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); |
678 | 678 | ||
679 | for (i = 0; i < pages; i++) { | 679 | for (i = 1; i < pages; i++) { |
680 | pg->next = (void *)get_zeroed_page(GFP_KERNEL); | 680 | pg->next = (void *)get_zeroed_page(GFP_KERNEL); |
681 | if (!pg->next) | 681 | if (!pg->next) |
682 | goto out_free; | 682 | goto out_free; |
@@ -724,13 +724,6 @@ static int ftrace_profile_init_cpu(int cpu) | |||
724 | if (!stat->hash) | 724 | if (!stat->hash) |
725 | return -ENOMEM; | 725 | return -ENOMEM; |
726 | 726 | ||
727 | if (!ftrace_profile_bits) { | ||
728 | size--; | ||
729 | |||
730 | for (; size; size >>= 1) | ||
731 | ftrace_profile_bits++; | ||
732 | } | ||
733 | |||
734 | /* Preallocate the function profiling pages */ | 727 | /* Preallocate the function profiling pages */ |
735 | if (ftrace_profile_pages_init(stat) < 0) { | 728 | if (ftrace_profile_pages_init(stat) < 0) { |
736 | kfree(stat->hash); | 729 | kfree(stat->hash); |
@@ -763,7 +756,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) | |||
763 | struct hlist_head *hhd; | 756 | struct hlist_head *hhd; |
764 | unsigned long key; | 757 | unsigned long key; |
765 | 758 | ||
766 | key = hash_long(ip, ftrace_profile_bits); | 759 | key = hash_long(ip, FTRACE_PROFILE_HASH_BITS); |
767 | hhd = &stat->hash[key]; | 760 | hhd = &stat->hash[key]; |
768 | 761 | ||
769 | if (hlist_empty(hhd)) | 762 | if (hlist_empty(hhd)) |
@@ -782,7 +775,7 @@ static void ftrace_add_profile(struct ftrace_profile_stat *stat, | |||
782 | { | 775 | { |
783 | unsigned long key; | 776 | unsigned long key; |
784 | 777 | ||
785 | key = hash_long(rec->ip, ftrace_profile_bits); | 778 | key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS); |
786 | hlist_add_head_rcu(&rec->node, &stat->hash[key]); | 779 | hlist_add_head_rcu(&rec->node, &stat->hash[key]); |
787 | } | 780 | } |
788 | 781 | ||
@@ -1079,7 +1072,7 @@ struct ftrace_func_probe { | |||
1079 | unsigned long flags; | 1072 | unsigned long flags; |
1080 | unsigned long ip; | 1073 | unsigned long ip; |
1081 | void *data; | 1074 | void *data; |
1082 | struct rcu_head rcu; | 1075 | struct list_head free_list; |
1083 | }; | 1076 | }; |
1084 | 1077 | ||
1085 | struct ftrace_func_entry { | 1078 | struct ftrace_func_entry { |
@@ -1329,7 +1322,6 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, | |||
1329 | struct hlist_head *hhd; | 1322 | struct hlist_head *hhd; |
1330 | struct ftrace_hash *old_hash; | 1323 | struct ftrace_hash *old_hash; |
1331 | struct ftrace_hash *new_hash; | 1324 | struct ftrace_hash *new_hash; |
1332 | unsigned long key; | ||
1333 | int size = src->count; | 1325 | int size = src->count; |
1334 | int bits = 0; | 1326 | int bits = 0; |
1335 | int ret; | 1327 | int ret; |
@@ -1372,10 +1364,6 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, | |||
1372 | for (i = 0; i < size; i++) { | 1364 | for (i = 0; i < size; i++) { |
1373 | hhd = &src->buckets[i]; | 1365 | hhd = &src->buckets[i]; |
1374 | hlist_for_each_entry_safe(entry, tn, hhd, hlist) { | 1366 | hlist_for_each_entry_safe(entry, tn, hhd, hlist) { |
1375 | if (bits > 0) | ||
1376 | key = hash_long(entry->ip, bits); | ||
1377 | else | ||
1378 | key = 0; | ||
1379 | remove_hash_entry(src, entry); | 1367 | remove_hash_entry(src, entry); |
1380 | __add_hash_entry(new_hash, entry); | 1368 | __add_hash_entry(new_hash, entry); |
1381 | } | 1369 | } |
@@ -2973,28 +2961,27 @@ static void __disable_ftrace_function_probe(void) | |||
2973 | } | 2961 | } |
2974 | 2962 | ||
2975 | 2963 | ||
2976 | static void ftrace_free_entry_rcu(struct rcu_head *rhp) | 2964 | static void ftrace_free_entry(struct ftrace_func_probe *entry) |
2977 | { | 2965 | { |
2978 | struct ftrace_func_probe *entry = | ||
2979 | container_of(rhp, struct ftrace_func_probe, rcu); | ||
2980 | |||
2981 | if (entry->ops->free) | 2966 | if (entry->ops->free) |
2982 | entry->ops->free(&entry->data); | 2967 | entry->ops->free(entry->ops, entry->ip, &entry->data); |
2983 | kfree(entry); | 2968 | kfree(entry); |
2984 | } | 2969 | } |
2985 | 2970 | ||
2986 | |||
2987 | int | 2971 | int |
2988 | register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | 2972 | register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, |
2989 | void *data) | 2973 | void *data) |
2990 | { | 2974 | { |
2991 | struct ftrace_func_probe *entry; | 2975 | struct ftrace_func_probe *entry; |
2976 | struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; | ||
2977 | struct ftrace_hash *hash; | ||
2992 | struct ftrace_page *pg; | 2978 | struct ftrace_page *pg; |
2993 | struct dyn_ftrace *rec; | 2979 | struct dyn_ftrace *rec; |
2994 | int type, len, not; | 2980 | int type, len, not; |
2995 | unsigned long key; | 2981 | unsigned long key; |
2996 | int count = 0; | 2982 | int count = 0; |
2997 | char *search; | 2983 | char *search; |
2984 | int ret; | ||
2998 | 2985 | ||
2999 | type = filter_parse_regex(glob, strlen(glob), &search, ¬); | 2986 | type = filter_parse_regex(glob, strlen(glob), &search, ¬); |
3000 | len = strlen(search); | 2987 | len = strlen(search); |
@@ -3005,8 +2992,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3005 | 2992 | ||
3006 | mutex_lock(&ftrace_lock); | 2993 | mutex_lock(&ftrace_lock); |
3007 | 2994 | ||
3008 | if (unlikely(ftrace_disabled)) | 2995 | hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); |
2996 | if (!hash) { | ||
2997 | count = -ENOMEM; | ||
3009 | goto out_unlock; | 2998 | goto out_unlock; |
2999 | } | ||
3000 | |||
3001 | if (unlikely(ftrace_disabled)) { | ||
3002 | count = -ENODEV; | ||
3003 | goto out_unlock; | ||
3004 | } | ||
3010 | 3005 | ||
3011 | do_for_each_ftrace_rec(pg, rec) { | 3006 | do_for_each_ftrace_rec(pg, rec) { |
3012 | 3007 | ||
@@ -3030,14 +3025,21 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3030 | * for each function we find. We call the callback | 3025 | * for each function we find. We call the callback |
3031 | * to give the caller an opportunity to do so. | 3026 | * to give the caller an opportunity to do so. |
3032 | */ | 3027 | */ |
3033 | if (ops->callback) { | 3028 | if (ops->init) { |
3034 | if (ops->callback(rec->ip, &entry->data) < 0) { | 3029 | if (ops->init(ops, rec->ip, &entry->data) < 0) { |
3035 | /* caller does not like this func */ | 3030 | /* caller does not like this func */ |
3036 | kfree(entry); | 3031 | kfree(entry); |
3037 | continue; | 3032 | continue; |
3038 | } | 3033 | } |
3039 | } | 3034 | } |
3040 | 3035 | ||
3036 | ret = enter_record(hash, rec, 0); | ||
3037 | if (ret < 0) { | ||
3038 | kfree(entry); | ||
3039 | count = ret; | ||
3040 | goto out_unlock; | ||
3041 | } | ||
3042 | |||
3041 | entry->ops = ops; | 3043 | entry->ops = ops; |
3042 | entry->ip = rec->ip; | 3044 | entry->ip = rec->ip; |
3043 | 3045 | ||
@@ -3045,10 +3047,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3045 | hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]); | 3047 | hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]); |
3046 | 3048 | ||
3047 | } while_for_each_ftrace_rec(); | 3049 | } while_for_each_ftrace_rec(); |
3050 | |||
3051 | ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); | ||
3052 | if (ret < 0) | ||
3053 | count = ret; | ||
3054 | |||
3048 | __enable_ftrace_function_probe(); | 3055 | __enable_ftrace_function_probe(); |
3049 | 3056 | ||
3050 | out_unlock: | 3057 | out_unlock: |
3051 | mutex_unlock(&ftrace_lock); | 3058 | mutex_unlock(&ftrace_lock); |
3059 | free_ftrace_hash(hash); | ||
3052 | 3060 | ||
3053 | return count; | 3061 | return count; |
3054 | } | 3062 | } |
@@ -3062,7 +3070,12 @@ static void | |||
3062 | __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | 3070 | __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, |
3063 | void *data, int flags) | 3071 | void *data, int flags) |
3064 | { | 3072 | { |
3073 | struct ftrace_func_entry *rec_entry; | ||
3065 | struct ftrace_func_probe *entry; | 3074 | struct ftrace_func_probe *entry; |
3075 | struct ftrace_func_probe *p; | ||
3076 | struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; | ||
3077 | struct list_head free_list; | ||
3078 | struct ftrace_hash *hash; | ||
3066 | struct hlist_node *tmp; | 3079 | struct hlist_node *tmp; |
3067 | char str[KSYM_SYMBOL_LEN]; | 3080 | char str[KSYM_SYMBOL_LEN]; |
3068 | int type = MATCH_FULL; | 3081 | int type = MATCH_FULL; |
@@ -3083,6 +3096,14 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3083 | } | 3096 | } |
3084 | 3097 | ||
3085 | mutex_lock(&ftrace_lock); | 3098 | mutex_lock(&ftrace_lock); |
3099 | |||
3100 | hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); | ||
3101 | if (!hash) | ||
3102 | /* Hmm, should report this somehow */ | ||
3103 | goto out_unlock; | ||
3104 | |||
3105 | INIT_LIST_HEAD(&free_list); | ||
3106 | |||
3086 | for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { | 3107 | for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { |
3087 | struct hlist_head *hhd = &ftrace_func_hash[i]; | 3108 | struct hlist_head *hhd = &ftrace_func_hash[i]; |
3088 | 3109 | ||
@@ -3103,12 +3124,30 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3103 | continue; | 3124 | continue; |
3104 | } | 3125 | } |
3105 | 3126 | ||
3127 | rec_entry = ftrace_lookup_ip(hash, entry->ip); | ||
3128 | /* It is possible more than one entry had this ip */ | ||
3129 | if (rec_entry) | ||
3130 | free_hash_entry(hash, rec_entry); | ||
3131 | |||
3106 | hlist_del_rcu(&entry->node); | 3132 | hlist_del_rcu(&entry->node); |
3107 | call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu); | 3133 | list_add(&entry->free_list, &free_list); |
3108 | } | 3134 | } |
3109 | } | 3135 | } |
3110 | __disable_ftrace_function_probe(); | 3136 | __disable_ftrace_function_probe(); |
3137 | /* | ||
3138 | * Remove after the disable is called. Otherwise, if the last | ||
3139 | * probe is removed, a null hash means *all enabled*. | ||
3140 | */ | ||
3141 | ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); | ||
3142 | synchronize_sched(); | ||
3143 | list_for_each_entry_safe(entry, p, &free_list, free_list) { | ||
3144 | list_del(&entry->free_list); | ||
3145 | ftrace_free_entry(entry); | ||
3146 | } | ||
3147 | |||
3148 | out_unlock: | ||
3111 | mutex_unlock(&ftrace_lock); | 3149 | mutex_unlock(&ftrace_lock); |
3150 | free_ftrace_hash(hash); | ||
3112 | } | 3151 | } |
3113 | 3152 | ||
3114 | void | 3153 | void |
@@ -3736,7 +3775,8 @@ out: | |||
3736 | if (fail) | 3775 | if (fail) |
3737 | return -EINVAL; | 3776 | return -EINVAL; |
3738 | 3777 | ||
3739 | ftrace_graph_filter_enabled = 1; | 3778 | ftrace_graph_filter_enabled = !!(*idx); |
3779 | |||
3740 | return 0; | 3780 | return 0; |
3741 | } | 3781 | } |
3742 | 3782 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6989df2ba194..b59aea2c48c2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -8,13 +8,16 @@ | |||
8 | #include <linux/trace_clock.h> | 8 | #include <linux/trace_clock.h> |
9 | #include <linux/trace_seq.h> | 9 | #include <linux/trace_seq.h> |
10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
11 | #include <linux/irq_work.h> | ||
11 | #include <linux/debugfs.h> | 12 | #include <linux/debugfs.h> |
12 | #include <linux/uaccess.h> | 13 | #include <linux/uaccess.h> |
13 | #include <linux/hardirq.h> | 14 | #include <linux/hardirq.h> |
15 | #include <linux/kthread.h> /* for self test */ | ||
14 | #include <linux/kmemcheck.h> | 16 | #include <linux/kmemcheck.h> |
15 | #include <linux/module.h> | 17 | #include <linux/module.h> |
16 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
17 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
20 | #include <linux/delay.h> | ||
18 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
19 | #include <linux/init.h> | 22 | #include <linux/init.h> |
20 | #include <linux/hash.h> | 23 | #include <linux/hash.h> |
@@ -444,6 +447,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
444 | return ret; | 447 | return ret; |
445 | } | 448 | } |
446 | 449 | ||
450 | struct rb_irq_work { | ||
451 | struct irq_work work; | ||
452 | wait_queue_head_t waiters; | ||
453 | bool waiters_pending; | ||
454 | }; | ||
455 | |||
447 | /* | 456 | /* |
448 | * head_page == tail_page && head == tail then buffer is empty. | 457 | * head_page == tail_page && head == tail then buffer is empty. |
449 | */ | 458 | */ |
@@ -478,6 +487,8 @@ struct ring_buffer_per_cpu { | |||
478 | struct list_head new_pages; /* new pages to add */ | 487 | struct list_head new_pages; /* new pages to add */ |
479 | struct work_struct update_pages_work; | 488 | struct work_struct update_pages_work; |
480 | struct completion update_done; | 489 | struct completion update_done; |
490 | |||
491 | struct rb_irq_work irq_work; | ||
481 | }; | 492 | }; |
482 | 493 | ||
483 | struct ring_buffer { | 494 | struct ring_buffer { |
@@ -497,6 +508,8 @@ struct ring_buffer { | |||
497 | struct notifier_block cpu_notify; | 508 | struct notifier_block cpu_notify; |
498 | #endif | 509 | #endif |
499 | u64 (*clock)(void); | 510 | u64 (*clock)(void); |
511 | |||
512 | struct rb_irq_work irq_work; | ||
500 | }; | 513 | }; |
501 | 514 | ||
502 | struct ring_buffer_iter { | 515 | struct ring_buffer_iter { |
@@ -508,6 +521,118 @@ struct ring_buffer_iter { | |||
508 | u64 read_stamp; | 521 | u64 read_stamp; |
509 | }; | 522 | }; |
510 | 523 | ||
524 | /* | ||
525 | * rb_wake_up_waiters - wake up tasks waiting for ring buffer input | ||
526 | * | ||
527 | * Schedules a delayed work to wake up any task that is blocked on the | ||
528 | * ring buffer waiters queue. | ||
529 | */ | ||
530 | static void rb_wake_up_waiters(struct irq_work *work) | ||
531 | { | ||
532 | struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); | ||
533 | |||
534 | wake_up_all(&rbwork->waiters); | ||
535 | } | ||
536 | |||
537 | /** | ||
538 | * ring_buffer_wait - wait for input to the ring buffer | ||
539 | * @buffer: buffer to wait on | ||
540 | * @cpu: the cpu buffer to wait on | ||
541 | * | ||
542 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon | ||
543 | * as data is added to any of the @buffer's cpu buffers. Otherwise | ||
544 | * it will wait for data to be added to a specific cpu buffer. | ||
545 | */ | ||
546 | void ring_buffer_wait(struct ring_buffer *buffer, int cpu) | ||
547 | { | ||
548 | struct ring_buffer_per_cpu *cpu_buffer; | ||
549 | DEFINE_WAIT(wait); | ||
550 | struct rb_irq_work *work; | ||
551 | |||
552 | /* | ||
553 | * Depending on what the caller is waiting for, either any | ||
554 | * data in any cpu buffer, or a specific buffer, put the | ||
555 | * caller on the appropriate wait queue. | ||
556 | */ | ||
557 | if (cpu == RING_BUFFER_ALL_CPUS) | ||
558 | work = &buffer->irq_work; | ||
559 | else { | ||
560 | cpu_buffer = buffer->buffers[cpu]; | ||
561 | work = &cpu_buffer->irq_work; | ||
562 | } | ||
563 | |||
564 | |||
565 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | ||
566 | |||
567 | /* | ||
568 | * The events can happen in critical sections where | ||
569 | * checking a work queue can cause deadlocks. | ||
570 | * After adding a task to the queue, this flag is set | ||
571 | * only to notify events to try to wake up the queue | ||
572 | * using irq_work. | ||
573 | * | ||
574 | * We don't clear it even if the buffer is no longer | ||
575 | * empty. The flag only causes the next event to run | ||
576 | * irq_work to do the work queue wake up. The worse | ||
577 | * that can happen if we race with !trace_empty() is that | ||
578 | * an event will cause an irq_work to try to wake up | ||
579 | * an empty queue. | ||
580 | * | ||
581 | * There's no reason to protect this flag either, as | ||
582 | * the work queue and irq_work logic will do the necessary | ||
583 | * synchronization for the wake ups. The only thing | ||
584 | * that is necessary is that the wake up happens after | ||
585 | * a task has been queued. It's OK for spurious wake ups. | ||
586 | */ | ||
587 | work->waiters_pending = true; | ||
588 | |||
589 | if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || | ||
590 | (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) | ||
591 | schedule(); | ||
592 | |||
593 | finish_wait(&work->waiters, &wait); | ||
594 | } | ||
595 | |||
596 | /** | ||
597 | * ring_buffer_poll_wait - poll on buffer input | ||
598 | * @buffer: buffer to wait on | ||
599 | * @cpu: the cpu buffer to wait on | ||
600 | * @filp: the file descriptor | ||
601 | * @poll_table: The poll descriptor | ||
602 | * | ||
603 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon | ||
604 | * as data is added to any of the @buffer's cpu buffers. Otherwise | ||
605 | * it will wait for data to be added to a specific cpu buffer. | ||
606 | * | ||
607 | * Returns POLLIN | POLLRDNORM if data exists in the buffers, | ||
608 | * zero otherwise. | ||
609 | */ | ||
610 | int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, | ||
611 | struct file *filp, poll_table *poll_table) | ||
612 | { | ||
613 | struct ring_buffer_per_cpu *cpu_buffer; | ||
614 | struct rb_irq_work *work; | ||
615 | |||
616 | if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || | ||
617 | (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) | ||
618 | return POLLIN | POLLRDNORM; | ||
619 | |||
620 | if (cpu == RING_BUFFER_ALL_CPUS) | ||
621 | work = &buffer->irq_work; | ||
622 | else { | ||
623 | cpu_buffer = buffer->buffers[cpu]; | ||
624 | work = &cpu_buffer->irq_work; | ||
625 | } | ||
626 | |||
627 | work->waiters_pending = true; | ||
628 | poll_wait(filp, &work->waiters, poll_table); | ||
629 | |||
630 | if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || | ||
631 | (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) | ||
632 | return POLLIN | POLLRDNORM; | ||
633 | return 0; | ||
634 | } | ||
635 | |||
511 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ | 636 | /* buffer may be either ring_buffer or ring_buffer_per_cpu */ |
512 | #define RB_WARN_ON(b, cond) \ | 637 | #define RB_WARN_ON(b, cond) \ |
513 | ({ \ | 638 | ({ \ |
@@ -1063,6 +1188,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) | |||
1063 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 1188 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
1064 | INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); | 1189 | INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); |
1065 | init_completion(&cpu_buffer->update_done); | 1190 | init_completion(&cpu_buffer->update_done); |
1191 | init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); | ||
1192 | init_waitqueue_head(&cpu_buffer->irq_work.waiters); | ||
1066 | 1193 | ||
1067 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1194 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
1068 | GFP_KERNEL, cpu_to_node(cpu)); | 1195 | GFP_KERNEL, cpu_to_node(cpu)); |
@@ -1158,6 +1285,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
1158 | buffer->clock = trace_clock_local; | 1285 | buffer->clock = trace_clock_local; |
1159 | buffer->reader_lock_key = key; | 1286 | buffer->reader_lock_key = key; |
1160 | 1287 | ||
1288 | init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters); | ||
1289 | init_waitqueue_head(&buffer->irq_work.waiters); | ||
1290 | |||
1161 | /* need at least two pages */ | 1291 | /* need at least two pages */ |
1162 | if (nr_pages < 2) | 1292 | if (nr_pages < 2) |
1163 | nr_pages = 2; | 1293 | nr_pages = 2; |
@@ -1553,11 +1683,22 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, | |||
1553 | if (!cpu_buffer->nr_pages_to_update) | 1683 | if (!cpu_buffer->nr_pages_to_update) |
1554 | continue; | 1684 | continue; |
1555 | 1685 | ||
1556 | if (cpu_online(cpu)) | 1686 | /* The update must run on the CPU that is being updated. */ |
1687 | preempt_disable(); | ||
1688 | if (cpu == smp_processor_id() || !cpu_online(cpu)) { | ||
1689 | rb_update_pages(cpu_buffer); | ||
1690 | cpu_buffer->nr_pages_to_update = 0; | ||
1691 | } else { | ||
1692 | /* | ||
1693 | * Can not disable preemption for schedule_work_on() | ||
1694 | * on PREEMPT_RT. | ||
1695 | */ | ||
1696 | preempt_enable(); | ||
1557 | schedule_work_on(cpu, | 1697 | schedule_work_on(cpu, |
1558 | &cpu_buffer->update_pages_work); | 1698 | &cpu_buffer->update_pages_work); |
1559 | else | 1699 | preempt_disable(); |
1560 | rb_update_pages(cpu_buffer); | 1700 | } |
1701 | preempt_enable(); | ||
1561 | } | 1702 | } |
1562 | 1703 | ||
1563 | /* wait for all the updates to complete */ | 1704 | /* wait for all the updates to complete */ |
@@ -1595,12 +1736,22 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, | |||
1595 | 1736 | ||
1596 | get_online_cpus(); | 1737 | get_online_cpus(); |
1597 | 1738 | ||
1598 | if (cpu_online(cpu_id)) { | 1739 | preempt_disable(); |
1740 | /* The update must run on the CPU that is being updated. */ | ||
1741 | if (cpu_id == smp_processor_id() || !cpu_online(cpu_id)) | ||
1742 | rb_update_pages(cpu_buffer); | ||
1743 | else { | ||
1744 | /* | ||
1745 | * Can not disable preemption for schedule_work_on() | ||
1746 | * on PREEMPT_RT. | ||
1747 | */ | ||
1748 | preempt_enable(); | ||
1599 | schedule_work_on(cpu_id, | 1749 | schedule_work_on(cpu_id, |
1600 | &cpu_buffer->update_pages_work); | 1750 | &cpu_buffer->update_pages_work); |
1601 | wait_for_completion(&cpu_buffer->update_done); | 1751 | wait_for_completion(&cpu_buffer->update_done); |
1602 | } else | 1752 | preempt_disable(); |
1603 | rb_update_pages(cpu_buffer); | 1753 | } |
1754 | preempt_enable(); | ||
1604 | 1755 | ||
1605 | cpu_buffer->nr_pages_to_update = 0; | 1756 | cpu_buffer->nr_pages_to_update = 0; |
1606 | put_online_cpus(); | 1757 | put_online_cpus(); |
@@ -2612,6 +2763,22 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
2612 | rb_end_commit(cpu_buffer); | 2763 | rb_end_commit(cpu_buffer); |
2613 | } | 2764 | } |
2614 | 2765 | ||
2766 | static __always_inline void | ||
2767 | rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) | ||
2768 | { | ||
2769 | if (buffer->irq_work.waiters_pending) { | ||
2770 | buffer->irq_work.waiters_pending = false; | ||
2771 | /* irq_work_queue() supplies it's own memory barriers */ | ||
2772 | irq_work_queue(&buffer->irq_work.work); | ||
2773 | } | ||
2774 | |||
2775 | if (cpu_buffer->irq_work.waiters_pending) { | ||
2776 | cpu_buffer->irq_work.waiters_pending = false; | ||
2777 | /* irq_work_queue() supplies it's own memory barriers */ | ||
2778 | irq_work_queue(&cpu_buffer->irq_work.work); | ||
2779 | } | ||
2780 | } | ||
2781 | |||
2615 | /** | 2782 | /** |
2616 | * ring_buffer_unlock_commit - commit a reserved | 2783 | * ring_buffer_unlock_commit - commit a reserved |
2617 | * @buffer: The buffer to commit to | 2784 | * @buffer: The buffer to commit to |
@@ -2631,6 +2798,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
2631 | 2798 | ||
2632 | rb_commit(cpu_buffer, event); | 2799 | rb_commit(cpu_buffer, event); |
2633 | 2800 | ||
2801 | rb_wakeups(buffer, cpu_buffer); | ||
2802 | |||
2634 | trace_recursive_unlock(); | 2803 | trace_recursive_unlock(); |
2635 | 2804 | ||
2636 | preempt_enable_notrace(); | 2805 | preempt_enable_notrace(); |
@@ -2803,6 +2972,8 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
2803 | 2972 | ||
2804 | rb_commit(cpu_buffer, event); | 2973 | rb_commit(cpu_buffer, event); |
2805 | 2974 | ||
2975 | rb_wakeups(buffer, cpu_buffer); | ||
2976 | |||
2806 | ret = 0; | 2977 | ret = 0; |
2807 | out: | 2978 | out: |
2808 | preempt_enable_notrace(); | 2979 | preempt_enable_notrace(); |
@@ -4467,3 +4638,320 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
4467 | return NOTIFY_OK; | 4638 | return NOTIFY_OK; |
4468 | } | 4639 | } |
4469 | #endif | 4640 | #endif |
4641 | |||
4642 | #ifdef CONFIG_RING_BUFFER_STARTUP_TEST | ||
4643 | /* | ||
4644 | * This is a basic integrity check of the ring buffer. | ||
4645 | * Late in the boot cycle this test will run when configured in. | ||
4646 | * It will kick off a thread per CPU that will go into a loop | ||
4647 | * writing to the per cpu ring buffer various sizes of data. | ||
4648 | * Some of the data will be large items, some small. | ||
4649 | * | ||
4650 | * Another thread is created that goes into a spin, sending out | ||
4651 | * IPIs to the other CPUs to also write into the ring buffer. | ||
4652 | * this is to test the nesting ability of the buffer. | ||
4653 | * | ||
4654 | * Basic stats are recorded and reported. If something in the | ||
4655 | * ring buffer should happen that's not expected, a big warning | ||
4656 | * is displayed and all ring buffers are disabled. | ||
4657 | */ | ||
4658 | static struct task_struct *rb_threads[NR_CPUS] __initdata; | ||
4659 | |||
4660 | struct rb_test_data { | ||
4661 | struct ring_buffer *buffer; | ||
4662 | unsigned long events; | ||
4663 | unsigned long bytes_written; | ||
4664 | unsigned long bytes_alloc; | ||
4665 | unsigned long bytes_dropped; | ||
4666 | unsigned long events_nested; | ||
4667 | unsigned long bytes_written_nested; | ||
4668 | unsigned long bytes_alloc_nested; | ||
4669 | unsigned long bytes_dropped_nested; | ||
4670 | int min_size_nested; | ||
4671 | int max_size_nested; | ||
4672 | int max_size; | ||
4673 | int min_size; | ||
4674 | int cpu; | ||
4675 | int cnt; | ||
4676 | }; | ||
4677 | |||
4678 | static struct rb_test_data rb_data[NR_CPUS] __initdata; | ||
4679 | |||
4680 | /* 1 meg per cpu */ | ||
4681 | #define RB_TEST_BUFFER_SIZE 1048576 | ||
4682 | |||
4683 | static char rb_string[] __initdata = | ||
4684 | "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\" | ||
4685 | "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890" | ||
4686 | "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv"; | ||
4687 | |||
4688 | static bool rb_test_started __initdata; | ||
4689 | |||
4690 | struct rb_item { | ||
4691 | int size; | ||
4692 | char str[]; | ||
4693 | }; | ||
4694 | |||
4695 | static __init int rb_write_something(struct rb_test_data *data, bool nested) | ||
4696 | { | ||
4697 | struct ring_buffer_event *event; | ||
4698 | struct rb_item *item; | ||
4699 | bool started; | ||
4700 | int event_len; | ||
4701 | int size; | ||
4702 | int len; | ||
4703 | int cnt; | ||
4704 | |||
4705 | /* Have nested writes different that what is written */ | ||
4706 | cnt = data->cnt + (nested ? 27 : 0); | ||
4707 | |||
4708 | /* Multiply cnt by ~e, to make some unique increment */ | ||
4709 | size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1); | ||
4710 | |||
4711 | len = size + sizeof(struct rb_item); | ||
4712 | |||
4713 | started = rb_test_started; | ||
4714 | /* read rb_test_started before checking buffer enabled */ | ||
4715 | smp_rmb(); | ||
4716 | |||
4717 | event = ring_buffer_lock_reserve(data->buffer, len); | ||
4718 | if (!event) { | ||
4719 | /* Ignore dropped events before test starts. */ | ||
4720 | if (started) { | ||
4721 | if (nested) | ||
4722 | data->bytes_dropped += len; | ||
4723 | else | ||
4724 | data->bytes_dropped_nested += len; | ||
4725 | } | ||
4726 | return len; | ||
4727 | } | ||
4728 | |||
4729 | event_len = ring_buffer_event_length(event); | ||
4730 | |||
4731 | if (RB_WARN_ON(data->buffer, event_len < len)) | ||
4732 | goto out; | ||
4733 | |||
4734 | item = ring_buffer_event_data(event); | ||
4735 | item->size = size; | ||
4736 | memcpy(item->str, rb_string, size); | ||
4737 | |||
4738 | if (nested) { | ||
4739 | data->bytes_alloc_nested += event_len; | ||
4740 | data->bytes_written_nested += len; | ||
4741 | data->events_nested++; | ||
4742 | if (!data->min_size_nested || len < data->min_size_nested) | ||
4743 | data->min_size_nested = len; | ||
4744 | if (len > data->max_size_nested) | ||
4745 | data->max_size_nested = len; | ||
4746 | } else { | ||
4747 | data->bytes_alloc += event_len; | ||
4748 | data->bytes_written += len; | ||
4749 | data->events++; | ||
4750 | if (!data->min_size || len < data->min_size) | ||
4751 | data->max_size = len; | ||
4752 | if (len > data->max_size) | ||
4753 | data->max_size = len; | ||
4754 | } | ||
4755 | |||
4756 | out: | ||
4757 | ring_buffer_unlock_commit(data->buffer, event); | ||
4758 | |||
4759 | return 0; | ||
4760 | } | ||
4761 | |||
4762 | static __init int rb_test(void *arg) | ||
4763 | { | ||
4764 | struct rb_test_data *data = arg; | ||
4765 | |||
4766 | while (!kthread_should_stop()) { | ||
4767 | rb_write_something(data, false); | ||
4768 | data->cnt++; | ||
4769 | |||
4770 | set_current_state(TASK_INTERRUPTIBLE); | ||
4771 | /* Now sleep between a min of 100-300us and a max of 1ms */ | ||
4772 | usleep_range(((data->cnt % 3) + 1) * 100, 1000); | ||
4773 | } | ||
4774 | |||
4775 | return 0; | ||
4776 | } | ||
4777 | |||
4778 | static __init void rb_ipi(void *ignore) | ||
4779 | { | ||
4780 | struct rb_test_data *data; | ||
4781 | int cpu = smp_processor_id(); | ||
4782 | |||
4783 | data = &rb_data[cpu]; | ||
4784 | rb_write_something(data, true); | ||
4785 | } | ||
4786 | |||
4787 | static __init int rb_hammer_test(void *arg) | ||
4788 | { | ||
4789 | while (!kthread_should_stop()) { | ||
4790 | |||
4791 | /* Send an IPI to all cpus to write data! */ | ||
4792 | smp_call_function(rb_ipi, NULL, 1); | ||
4793 | /* No sleep, but for non preempt, let others run */ | ||
4794 | schedule(); | ||
4795 | } | ||
4796 | |||
4797 | return 0; | ||
4798 | } | ||
4799 | |||
4800 | static __init int test_ringbuffer(void) | ||
4801 | { | ||
4802 | struct task_struct *rb_hammer; | ||
4803 | struct ring_buffer *buffer; | ||
4804 | int cpu; | ||
4805 | int ret = 0; | ||
4806 | |||
4807 | pr_info("Running ring buffer tests...\n"); | ||
4808 | |||
4809 | buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE); | ||
4810 | if (WARN_ON(!buffer)) | ||
4811 | return 0; | ||
4812 | |||
4813 | /* Disable buffer so that threads can't write to it yet */ | ||
4814 | ring_buffer_record_off(buffer); | ||
4815 | |||
4816 | for_each_online_cpu(cpu) { | ||
4817 | rb_data[cpu].buffer = buffer; | ||
4818 | rb_data[cpu].cpu = cpu; | ||
4819 | rb_data[cpu].cnt = cpu; | ||
4820 | rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu], | ||
4821 | "rbtester/%d", cpu); | ||
4822 | if (WARN_ON(!rb_threads[cpu])) { | ||
4823 | pr_cont("FAILED\n"); | ||
4824 | ret = -1; | ||
4825 | goto out_free; | ||
4826 | } | ||
4827 | |||
4828 | kthread_bind(rb_threads[cpu], cpu); | ||
4829 | wake_up_process(rb_threads[cpu]); | ||
4830 | } | ||
4831 | |||
4832 | /* Now create the rb hammer! */ | ||
4833 | rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer"); | ||
4834 | if (WARN_ON(!rb_hammer)) { | ||
4835 | pr_cont("FAILED\n"); | ||
4836 | ret = -1; | ||
4837 | goto out_free; | ||
4838 | } | ||
4839 | |||
4840 | ring_buffer_record_on(buffer); | ||
4841 | /* | ||
4842 | * Show buffer is enabled before setting rb_test_started. | ||
4843 | * Yes there's a small race window where events could be | ||
4844 | * dropped and the thread wont catch it. But when a ring | ||
4845 | * buffer gets enabled, there will always be some kind of | ||
4846 | * delay before other CPUs see it. Thus, we don't care about | ||
4847 | * those dropped events. We care about events dropped after | ||
4848 | * the threads see that the buffer is active. | ||
4849 | */ | ||
4850 | smp_wmb(); | ||
4851 | rb_test_started = true; | ||
4852 | |||
4853 | set_current_state(TASK_INTERRUPTIBLE); | ||
4854 | /* Just run for 10 seconds */; | ||
4855 | schedule_timeout(10 * HZ); | ||
4856 | |||
4857 | kthread_stop(rb_hammer); | ||
4858 | |||
4859 | out_free: | ||
4860 | for_each_online_cpu(cpu) { | ||
4861 | if (!rb_threads[cpu]) | ||
4862 | break; | ||
4863 | kthread_stop(rb_threads[cpu]); | ||
4864 | } | ||
4865 | if (ret) { | ||
4866 | ring_buffer_free(buffer); | ||
4867 | return ret; | ||
4868 | } | ||
4869 | |||
4870 | /* Report! */ | ||
4871 | pr_info("finished\n"); | ||
4872 | for_each_online_cpu(cpu) { | ||
4873 | struct ring_buffer_event *event; | ||
4874 | struct rb_test_data *data = &rb_data[cpu]; | ||
4875 | struct rb_item *item; | ||
4876 | unsigned long total_events; | ||
4877 | unsigned long total_dropped; | ||
4878 | unsigned long total_written; | ||
4879 | unsigned long total_alloc; | ||
4880 | unsigned long total_read = 0; | ||
4881 | unsigned long total_size = 0; | ||
4882 | unsigned long total_len = 0; | ||
4883 | unsigned long total_lost = 0; | ||
4884 | unsigned long lost; | ||
4885 | int big_event_size; | ||
4886 | int small_event_size; | ||
4887 | |||
4888 | ret = -1; | ||
4889 | |||
4890 | total_events = data->events + data->events_nested; | ||
4891 | total_written = data->bytes_written + data->bytes_written_nested; | ||
4892 | total_alloc = data->bytes_alloc + data->bytes_alloc_nested; | ||
4893 | total_dropped = data->bytes_dropped + data->bytes_dropped_nested; | ||
4894 | |||
4895 | big_event_size = data->max_size + data->max_size_nested; | ||
4896 | small_event_size = data->min_size + data->min_size_nested; | ||
4897 | |||
4898 | pr_info("CPU %d:\n", cpu); | ||
4899 | pr_info(" events: %ld\n", total_events); | ||
4900 | pr_info(" dropped bytes: %ld\n", total_dropped); | ||
4901 | pr_info(" alloced bytes: %ld\n", total_alloc); | ||
4902 | pr_info(" written bytes: %ld\n", total_written); | ||
4903 | pr_info(" biggest event: %d\n", big_event_size); | ||
4904 | pr_info(" smallest event: %d\n", small_event_size); | ||
4905 | |||
4906 | if (RB_WARN_ON(buffer, total_dropped)) | ||
4907 | break; | ||
4908 | |||
4909 | ret = 0; | ||
4910 | |||
4911 | while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) { | ||
4912 | total_lost += lost; | ||
4913 | item = ring_buffer_event_data(event); | ||
4914 | total_len += ring_buffer_event_length(event); | ||
4915 | total_size += item->size + sizeof(struct rb_item); | ||
4916 | if (memcmp(&item->str[0], rb_string, item->size) != 0) { | ||
4917 | pr_info("FAILED!\n"); | ||
4918 | pr_info("buffer had: %.*s\n", item->size, item->str); | ||
4919 | pr_info("expected: %.*s\n", item->size, rb_string); | ||
4920 | RB_WARN_ON(buffer, 1); | ||
4921 | ret = -1; | ||
4922 | break; | ||
4923 | } | ||
4924 | total_read++; | ||
4925 | } | ||
4926 | if (ret) | ||
4927 | break; | ||
4928 | |||
4929 | ret = -1; | ||
4930 | |||
4931 | pr_info(" read events: %ld\n", total_read); | ||
4932 | pr_info(" lost events: %ld\n", total_lost); | ||
4933 | pr_info(" total events: %ld\n", total_lost + total_read); | ||
4934 | pr_info(" recorded len bytes: %ld\n", total_len); | ||
4935 | pr_info(" recorded size bytes: %ld\n", total_size); | ||
4936 | if (total_lost) | ||
4937 | pr_info(" With dropped events, record len and size may not match\n" | ||
4938 | " alloced and written from above\n"); | ||
4939 | if (!total_lost) { | ||
4940 | if (RB_WARN_ON(buffer, total_len != total_alloc || | ||
4941 | total_size != total_written)) | ||
4942 | break; | ||
4943 | } | ||
4944 | if (RB_WARN_ON(buffer, total_lost + total_read != total_events)) | ||
4945 | break; | ||
4946 | |||
4947 | ret = 0; | ||
4948 | } | ||
4949 | if (!ret) | ||
4950 | pr_info("Ring buffer PASSED!\n"); | ||
4951 | |||
4952 | ring_buffer_free(buffer); | ||
4953 | return 0; | ||
4954 | } | ||
4955 | |||
4956 | late_initcall(test_ringbuffer); | ||
4957 | #endif /* CONFIG_RING_BUFFER_STARTUP_TEST */ | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 66338c4f7f4b..581630a6387d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * ring buffer based function tracer | 2 | * ring buffer based function tracer |
3 | * | 3 | * |
4 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> |
5 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | 5 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> |
6 | * | 6 | * |
7 | * Originally taken from the RT patch by: | 7 | * Originally taken from the RT patch by: |
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/notifier.h> | 20 | #include <linux/notifier.h> |
21 | #include <linux/irqflags.h> | 21 | #include <linux/irqflags.h> |
22 | #include <linux/irq_work.h> | ||
23 | #include <linux/debugfs.h> | 22 | #include <linux/debugfs.h> |
24 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
25 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
@@ -48,7 +47,7 @@ | |||
48 | * On boot up, the ring buffer is set to the minimum size, so that | 47 | * On boot up, the ring buffer is set to the minimum size, so that |
49 | * we do not waste memory on systems that are not using tracing. | 48 | * we do not waste memory on systems that are not using tracing. |
50 | */ | 49 | */ |
51 | int ring_buffer_expanded; | 50 | bool ring_buffer_expanded; |
52 | 51 | ||
53 | /* | 52 | /* |
54 | * We need to change this state when a selftest is running. | 53 | * We need to change this state when a selftest is running. |
@@ -87,14 +86,6 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) | |||
87 | static DEFINE_PER_CPU(bool, trace_cmdline_save); | 86 | static DEFINE_PER_CPU(bool, trace_cmdline_save); |
88 | 87 | ||
89 | /* | 88 | /* |
90 | * When a reader is waiting for data, then this variable is | ||
91 | * set to true. | ||
92 | */ | ||
93 | static bool trace_wakeup_needed; | ||
94 | |||
95 | static struct irq_work trace_work_wakeup; | ||
96 | |||
97 | /* | ||
98 | * Kill all tracing for good (never come back). | 89 | * Kill all tracing for good (never come back). |
99 | * It is initialized to 1 but will turn to zero if the initialization | 90 | * It is initialized to 1 but will turn to zero if the initialization |
100 | * of the tracer is successful. But that is the only place that sets | 91 | * of the tracer is successful. But that is the only place that sets |
@@ -130,12 +121,14 @@ static int tracing_set_tracer(const char *buf); | |||
130 | static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; | 121 | static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; |
131 | static char *default_bootup_tracer; | 122 | static char *default_bootup_tracer; |
132 | 123 | ||
124 | static bool allocate_snapshot; | ||
125 | |||
133 | static int __init set_cmdline_ftrace(char *str) | 126 | static int __init set_cmdline_ftrace(char *str) |
134 | { | 127 | { |
135 | strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); | 128 | strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); |
136 | default_bootup_tracer = bootup_tracer_buf; | 129 | default_bootup_tracer = bootup_tracer_buf; |
137 | /* We are using ftrace early, expand it */ | 130 | /* We are using ftrace early, expand it */ |
138 | ring_buffer_expanded = 1; | 131 | ring_buffer_expanded = true; |
139 | return 1; | 132 | return 1; |
140 | } | 133 | } |
141 | __setup("ftrace=", set_cmdline_ftrace); | 134 | __setup("ftrace=", set_cmdline_ftrace); |
@@ -156,6 +149,15 @@ static int __init set_ftrace_dump_on_oops(char *str) | |||
156 | } | 149 | } |
157 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); | 150 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); |
158 | 151 | ||
152 | static int __init boot_alloc_snapshot(char *str) | ||
153 | { | ||
154 | allocate_snapshot = true; | ||
155 | /* We also need the main ring buffer expanded */ | ||
156 | ring_buffer_expanded = true; | ||
157 | return 1; | ||
158 | } | ||
159 | __setup("alloc_snapshot", boot_alloc_snapshot); | ||
160 | |||
159 | 161 | ||
160 | static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; | 162 | static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; |
161 | static char *trace_boot_options __initdata; | 163 | static char *trace_boot_options __initdata; |
@@ -189,7 +191,7 @@ unsigned long long ns2usecs(cycle_t nsec) | |||
189 | */ | 191 | */ |
190 | static struct trace_array global_trace; | 192 | static struct trace_array global_trace; |
191 | 193 | ||
192 | static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); | 194 | LIST_HEAD(ftrace_trace_arrays); |
193 | 195 | ||
194 | int filter_current_check_discard(struct ring_buffer *buffer, | 196 | int filter_current_check_discard(struct ring_buffer *buffer, |
195 | struct ftrace_event_call *call, void *rec, | 197 | struct ftrace_event_call *call, void *rec, |
@@ -204,29 +206,15 @@ cycle_t ftrace_now(int cpu) | |||
204 | u64 ts; | 206 | u64 ts; |
205 | 207 | ||
206 | /* Early boot up does not have a buffer yet */ | 208 | /* Early boot up does not have a buffer yet */ |
207 | if (!global_trace.buffer) | 209 | if (!global_trace.trace_buffer.buffer) |
208 | return trace_clock_local(); | 210 | return trace_clock_local(); |
209 | 211 | ||
210 | ts = ring_buffer_time_stamp(global_trace.buffer, cpu); | 212 | ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu); |
211 | ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts); | 213 | ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts); |
212 | 214 | ||
213 | return ts; | 215 | return ts; |
214 | } | 216 | } |
215 | 217 | ||
216 | /* | ||
217 | * The max_tr is used to snapshot the global_trace when a maximum | ||
218 | * latency is reached. Some tracers will use this to store a maximum | ||
219 | * trace while it continues examining live traces. | ||
220 | * | ||
221 | * The buffers for the max_tr are set up the same as the global_trace. | ||
222 | * When a snapshot is taken, the link list of the max_tr is swapped | ||
223 | * with the link list of the global_trace and the buffers are reset for | ||
224 | * the global_trace so the tracing can continue. | ||
225 | */ | ||
226 | static struct trace_array max_tr; | ||
227 | |||
228 | static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data); | ||
229 | |||
230 | int tracing_is_enabled(void) | 218 | int tracing_is_enabled(void) |
231 | { | 219 | { |
232 | return tracing_is_on(); | 220 | return tracing_is_on(); |
@@ -249,9 +237,6 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; | |||
249 | /* trace_types holds a link list of available tracers. */ | 237 | /* trace_types holds a link list of available tracers. */ |
250 | static struct tracer *trace_types __read_mostly; | 238 | static struct tracer *trace_types __read_mostly; |
251 | 239 | ||
252 | /* current_trace points to the tracer that is currently active */ | ||
253 | static struct tracer *current_trace __read_mostly = &nop_trace; | ||
254 | |||
255 | /* | 240 | /* |
256 | * trace_types_lock is used to protect the trace_types list. | 241 | * trace_types_lock is used to protect the trace_types list. |
257 | */ | 242 | */ |
@@ -285,13 +270,13 @@ static DEFINE_PER_CPU(struct mutex, cpu_access_lock); | |||
285 | 270 | ||
286 | static inline void trace_access_lock(int cpu) | 271 | static inline void trace_access_lock(int cpu) |
287 | { | 272 | { |
288 | if (cpu == TRACE_PIPE_ALL_CPU) { | 273 | if (cpu == RING_BUFFER_ALL_CPUS) { |
289 | /* gain it for accessing the whole ring buffer. */ | 274 | /* gain it for accessing the whole ring buffer. */ |
290 | down_write(&all_cpu_access_lock); | 275 | down_write(&all_cpu_access_lock); |
291 | } else { | 276 | } else { |
292 | /* gain it for accessing a cpu ring buffer. */ | 277 | /* gain it for accessing a cpu ring buffer. */ |
293 | 278 | ||
294 | /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ | 279 | /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ |
295 | down_read(&all_cpu_access_lock); | 280 | down_read(&all_cpu_access_lock); |
296 | 281 | ||
297 | /* Secondly block other access to this @cpu ring buffer. */ | 282 | /* Secondly block other access to this @cpu ring buffer. */ |
@@ -301,7 +286,7 @@ static inline void trace_access_lock(int cpu) | |||
301 | 286 | ||
302 | static inline void trace_access_unlock(int cpu) | 287 | static inline void trace_access_unlock(int cpu) |
303 | { | 288 | { |
304 | if (cpu == TRACE_PIPE_ALL_CPU) { | 289 | if (cpu == RING_BUFFER_ALL_CPUS) { |
305 | up_write(&all_cpu_access_lock); | 290 | up_write(&all_cpu_access_lock); |
306 | } else { | 291 | } else { |
307 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); | 292 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); |
@@ -339,30 +324,11 @@ static inline void trace_access_lock_init(void) | |||
339 | 324 | ||
340 | #endif | 325 | #endif |
341 | 326 | ||
342 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | ||
343 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | ||
344 | |||
345 | /* trace_flags holds trace_options default values */ | 327 | /* trace_flags holds trace_options default values */ |
346 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | 328 | unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | |
347 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | | 329 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | |
348 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | | 330 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | |
349 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; | 331 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; |
350 | |||
351 | static int trace_stop_count; | ||
352 | static DEFINE_RAW_SPINLOCK(tracing_start_lock); | ||
353 | |||
354 | /** | ||
355 | * trace_wake_up - wake up tasks waiting for trace input | ||
356 | * | ||
357 | * Schedules a delayed work to wake up any task that is blocked on the | ||
358 | * trace_wait queue. These is used with trace_poll for tasks polling the | ||
359 | * trace. | ||
360 | */ | ||
361 | static void trace_wake_up(struct irq_work *work) | ||
362 | { | ||
363 | wake_up_all(&trace_wait); | ||
364 | |||
365 | } | ||
366 | 332 | ||
367 | /** | 333 | /** |
368 | * tracing_on - enable tracing buffers | 334 | * tracing_on - enable tracing buffers |
@@ -372,8 +338,8 @@ static void trace_wake_up(struct irq_work *work) | |||
372 | */ | 338 | */ |
373 | void tracing_on(void) | 339 | void tracing_on(void) |
374 | { | 340 | { |
375 | if (global_trace.buffer) | 341 | if (global_trace.trace_buffer.buffer) |
376 | ring_buffer_record_on(global_trace.buffer); | 342 | ring_buffer_record_on(global_trace.trace_buffer.buffer); |
377 | /* | 343 | /* |
378 | * This flag is only looked at when buffers haven't been | 344 | * This flag is only looked at when buffers haven't been |
379 | * allocated yet. We don't really care about the race | 345 | * allocated yet. We don't really care about the race |
@@ -385,6 +351,196 @@ void tracing_on(void) | |||
385 | EXPORT_SYMBOL_GPL(tracing_on); | 351 | EXPORT_SYMBOL_GPL(tracing_on); |
386 | 352 | ||
387 | /** | 353 | /** |
354 | * __trace_puts - write a constant string into the trace buffer. | ||
355 | * @ip: The address of the caller | ||
356 | * @str: The constant string to write | ||
357 | * @size: The size of the string. | ||
358 | */ | ||
359 | int __trace_puts(unsigned long ip, const char *str, int size) | ||
360 | { | ||
361 | struct ring_buffer_event *event; | ||
362 | struct ring_buffer *buffer; | ||
363 | struct print_entry *entry; | ||
364 | unsigned long irq_flags; | ||
365 | int alloc; | ||
366 | |||
367 | alloc = sizeof(*entry) + size + 2; /* possible \n added */ | ||
368 | |||
369 | local_save_flags(irq_flags); | ||
370 | buffer = global_trace.trace_buffer.buffer; | ||
371 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, | ||
372 | irq_flags, preempt_count()); | ||
373 | if (!event) | ||
374 | return 0; | ||
375 | |||
376 | entry = ring_buffer_event_data(event); | ||
377 | entry->ip = ip; | ||
378 | |||
379 | memcpy(&entry->buf, str, size); | ||
380 | |||
381 | /* Add a newline if necessary */ | ||
382 | if (entry->buf[size - 1] != '\n') { | ||
383 | entry->buf[size] = '\n'; | ||
384 | entry->buf[size + 1] = '\0'; | ||
385 | } else | ||
386 | entry->buf[size] = '\0'; | ||
387 | |||
388 | __buffer_unlock_commit(buffer, event); | ||
389 | |||
390 | return size; | ||
391 | } | ||
392 | EXPORT_SYMBOL_GPL(__trace_puts); | ||
393 | |||
394 | /** | ||
395 | * __trace_bputs - write the pointer to a constant string into trace buffer | ||
396 | * @ip: The address of the caller | ||
397 | * @str: The constant string to write to the buffer to | ||
398 | */ | ||
399 | int __trace_bputs(unsigned long ip, const char *str) | ||
400 | { | ||
401 | struct ring_buffer_event *event; | ||
402 | struct ring_buffer *buffer; | ||
403 | struct bputs_entry *entry; | ||
404 | unsigned long irq_flags; | ||
405 | int size = sizeof(struct bputs_entry); | ||
406 | |||
407 | local_save_flags(irq_flags); | ||
408 | buffer = global_trace.trace_buffer.buffer; | ||
409 | event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, | ||
410 | irq_flags, preempt_count()); | ||
411 | if (!event) | ||
412 | return 0; | ||
413 | |||
414 | entry = ring_buffer_event_data(event); | ||
415 | entry->ip = ip; | ||
416 | entry->str = str; | ||
417 | |||
418 | __buffer_unlock_commit(buffer, event); | ||
419 | |||
420 | return 1; | ||
421 | } | ||
422 | EXPORT_SYMBOL_GPL(__trace_bputs); | ||
423 | |||
424 | #ifdef CONFIG_TRACER_SNAPSHOT | ||
425 | /** | ||
426 | * trace_snapshot - take a snapshot of the current buffer. | ||
427 | * | ||
428 | * This causes a swap between the snapshot buffer and the current live | ||
429 | * tracing buffer. You can use this to take snapshots of the live | ||
430 | * trace when some condition is triggered, but continue to trace. | ||
431 | * | ||
432 | * Note, make sure to allocate the snapshot with either | ||
433 | * a tracing_snapshot_alloc(), or by doing it manually | ||
434 | * with: echo 1 > /sys/kernel/debug/tracing/snapshot | ||
435 | * | ||
436 | * If the snapshot buffer is not allocated, it will stop tracing. | ||
437 | * Basically making a permanent snapshot. | ||
438 | */ | ||
439 | void tracing_snapshot(void) | ||
440 | { | ||
441 | struct trace_array *tr = &global_trace; | ||
442 | struct tracer *tracer = tr->current_trace; | ||
443 | unsigned long flags; | ||
444 | |||
445 | if (in_nmi()) { | ||
446 | internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); | ||
447 | internal_trace_puts("*** snapshot is being ignored ***\n"); | ||
448 | return; | ||
449 | } | ||
450 | |||
451 | if (!tr->allocated_snapshot) { | ||
452 | internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n"); | ||
453 | internal_trace_puts("*** stopping trace here! ***\n"); | ||
454 | tracing_off(); | ||
455 | return; | ||
456 | } | ||
457 | |||
458 | /* Note, snapshot can not be used when the tracer uses it */ | ||
459 | if (tracer->use_max_tr) { | ||
460 | internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n"); | ||
461 | internal_trace_puts("*** Can not use snapshot (sorry) ***\n"); | ||
462 | return; | ||
463 | } | ||
464 | |||
465 | local_irq_save(flags); | ||
466 | update_max_tr(tr, current, smp_processor_id()); | ||
467 | local_irq_restore(flags); | ||
468 | } | ||
469 | EXPORT_SYMBOL_GPL(tracing_snapshot); | ||
470 | |||
471 | static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, | ||
472 | struct trace_buffer *size_buf, int cpu_id); | ||
473 | static void set_buffer_entries(struct trace_buffer *buf, unsigned long val); | ||
474 | |||
475 | static int alloc_snapshot(struct trace_array *tr) | ||
476 | { | ||
477 | int ret; | ||
478 | |||
479 | if (!tr->allocated_snapshot) { | ||
480 | |||
481 | /* allocate spare buffer */ | ||
482 | ret = resize_buffer_duplicate_size(&tr->max_buffer, | ||
483 | &tr->trace_buffer, RING_BUFFER_ALL_CPUS); | ||
484 | if (ret < 0) | ||
485 | return ret; | ||
486 | |||
487 | tr->allocated_snapshot = true; | ||
488 | } | ||
489 | |||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | void free_snapshot(struct trace_array *tr) | ||
494 | { | ||
495 | /* | ||
496 | * We don't free the ring buffer. instead, resize it because | ||
497 | * The max_tr ring buffer has some state (e.g. ring->clock) and | ||
498 | * we want preserve it. | ||
499 | */ | ||
500 | ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); | ||
501 | set_buffer_entries(&tr->max_buffer, 1); | ||
502 | tracing_reset_online_cpus(&tr->max_buffer); | ||
503 | tr->allocated_snapshot = false; | ||
504 | } | ||
505 | |||
506 | /** | ||
507 | * trace_snapshot_alloc - allocate and take a snapshot of the current buffer. | ||
508 | * | ||
509 | * This is similar to trace_snapshot(), but it will allocate the | ||
510 | * snapshot buffer if it isn't already allocated. Use this only | ||
511 | * where it is safe to sleep, as the allocation may sleep. | ||
512 | * | ||
513 | * This causes a swap between the snapshot buffer and the current live | ||
514 | * tracing buffer. You can use this to take snapshots of the live | ||
515 | * trace when some condition is triggered, but continue to trace. | ||
516 | */ | ||
517 | void tracing_snapshot_alloc(void) | ||
518 | { | ||
519 | struct trace_array *tr = &global_trace; | ||
520 | int ret; | ||
521 | |||
522 | ret = alloc_snapshot(tr); | ||
523 | if (WARN_ON(ret < 0)) | ||
524 | return; | ||
525 | |||
526 | tracing_snapshot(); | ||
527 | } | ||
528 | EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); | ||
529 | #else | ||
530 | void tracing_snapshot(void) | ||
531 | { | ||
532 | WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); | ||
533 | } | ||
534 | EXPORT_SYMBOL_GPL(tracing_snapshot); | ||
535 | void tracing_snapshot_alloc(void) | ||
536 | { | ||
537 | /* Give warning */ | ||
538 | tracing_snapshot(); | ||
539 | } | ||
540 | EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); | ||
541 | #endif /* CONFIG_TRACER_SNAPSHOT */ | ||
542 | |||
543 | /** | ||
388 | * tracing_off - turn off tracing buffers | 544 | * tracing_off - turn off tracing buffers |
389 | * | 545 | * |
390 | * This function stops the tracing buffers from recording data. | 546 | * This function stops the tracing buffers from recording data. |
@@ -394,8 +550,8 @@ EXPORT_SYMBOL_GPL(tracing_on); | |||
394 | */ | 550 | */ |
395 | void tracing_off(void) | 551 | void tracing_off(void) |
396 | { | 552 | { |
397 | if (global_trace.buffer) | 553 | if (global_trace.trace_buffer.buffer) |
398 | ring_buffer_record_off(global_trace.buffer); | 554 | ring_buffer_record_off(global_trace.trace_buffer.buffer); |
399 | /* | 555 | /* |
400 | * This flag is only looked at when buffers haven't been | 556 | * This flag is only looked at when buffers haven't been |
401 | * allocated yet. We don't really care about the race | 557 | * allocated yet. We don't really care about the race |
@@ -411,8 +567,8 @@ EXPORT_SYMBOL_GPL(tracing_off); | |||
411 | */ | 567 | */ |
412 | int tracing_is_on(void) | 568 | int tracing_is_on(void) |
413 | { | 569 | { |
414 | if (global_trace.buffer) | 570 | if (global_trace.trace_buffer.buffer) |
415 | return ring_buffer_record_is_on(global_trace.buffer); | 571 | return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); |
416 | return !global_trace.buffer_disabled; | 572 | return !global_trace.buffer_disabled; |
417 | } | 573 | } |
418 | EXPORT_SYMBOL_GPL(tracing_is_on); | 574 | EXPORT_SYMBOL_GPL(tracing_is_on); |
@@ -479,6 +635,7 @@ static const char *trace_options[] = { | |||
479 | "disable_on_free", | 635 | "disable_on_free", |
480 | "irq-info", | 636 | "irq-info", |
481 | "markers", | 637 | "markers", |
638 | "function-trace", | ||
482 | NULL | 639 | NULL |
483 | }; | 640 | }; |
484 | 641 | ||
@@ -490,6 +647,8 @@ static struct { | |||
490 | { trace_clock_local, "local", 1 }, | 647 | { trace_clock_local, "local", 1 }, |
491 | { trace_clock_global, "global", 1 }, | 648 | { trace_clock_global, "global", 1 }, |
492 | { trace_clock_counter, "counter", 0 }, | 649 | { trace_clock_counter, "counter", 0 }, |
650 | { trace_clock_jiffies, "uptime", 1 }, | ||
651 | { trace_clock, "perf", 1 }, | ||
493 | ARCH_TRACE_CLOCKS | 652 | ARCH_TRACE_CLOCKS |
494 | }; | 653 | }; |
495 | 654 | ||
@@ -670,13 +829,14 @@ unsigned long __read_mostly tracing_max_latency; | |||
670 | static void | 829 | static void |
671 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | 830 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) |
672 | { | 831 | { |
673 | struct trace_array_cpu *data = tr->data[cpu]; | 832 | struct trace_buffer *trace_buf = &tr->trace_buffer; |
674 | struct trace_array_cpu *max_data; | 833 | struct trace_buffer *max_buf = &tr->max_buffer; |
834 | struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); | ||
835 | struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); | ||
675 | 836 | ||
676 | max_tr.cpu = cpu; | 837 | max_buf->cpu = cpu; |
677 | max_tr.time_start = data->preempt_timestamp; | 838 | max_buf->time_start = data->preempt_timestamp; |
678 | 839 | ||
679 | max_data = max_tr.data[cpu]; | ||
680 | max_data->saved_latency = tracing_max_latency; | 840 | max_data->saved_latency = tracing_max_latency; |
681 | max_data->critical_start = data->critical_start; | 841 | max_data->critical_start = data->critical_start; |
682 | max_data->critical_end = data->critical_end; | 842 | max_data->critical_end = data->critical_end; |
@@ -706,22 +866,22 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
706 | { | 866 | { |
707 | struct ring_buffer *buf; | 867 | struct ring_buffer *buf; |
708 | 868 | ||
709 | if (trace_stop_count) | 869 | if (tr->stop_count) |
710 | return; | 870 | return; |
711 | 871 | ||
712 | WARN_ON_ONCE(!irqs_disabled()); | 872 | WARN_ON_ONCE(!irqs_disabled()); |
713 | 873 | ||
714 | if (!current_trace->allocated_snapshot) { | 874 | if (!tr->allocated_snapshot) { |
715 | /* Only the nop tracer should hit this when disabling */ | 875 | /* Only the nop tracer should hit this when disabling */ |
716 | WARN_ON_ONCE(current_trace != &nop_trace); | 876 | WARN_ON_ONCE(tr->current_trace != &nop_trace); |
717 | return; | 877 | return; |
718 | } | 878 | } |
719 | 879 | ||
720 | arch_spin_lock(&ftrace_max_lock); | 880 | arch_spin_lock(&ftrace_max_lock); |
721 | 881 | ||
722 | buf = tr->buffer; | 882 | buf = tr->trace_buffer.buffer; |
723 | tr->buffer = max_tr.buffer; | 883 | tr->trace_buffer.buffer = tr->max_buffer.buffer; |
724 | max_tr.buffer = buf; | 884 | tr->max_buffer.buffer = buf; |
725 | 885 | ||
726 | __update_max_tr(tr, tsk, cpu); | 886 | __update_max_tr(tr, tsk, cpu); |
727 | arch_spin_unlock(&ftrace_max_lock); | 887 | arch_spin_unlock(&ftrace_max_lock); |
@@ -740,19 +900,19 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
740 | { | 900 | { |
741 | int ret; | 901 | int ret; |
742 | 902 | ||
743 | if (trace_stop_count) | 903 | if (tr->stop_count) |
744 | return; | 904 | return; |
745 | 905 | ||
746 | WARN_ON_ONCE(!irqs_disabled()); | 906 | WARN_ON_ONCE(!irqs_disabled()); |
747 | if (!current_trace->allocated_snapshot) { | 907 | if (tr->allocated_snapshot) { |
748 | /* Only the nop tracer should hit this when disabling */ | 908 | /* Only the nop tracer should hit this when disabling */ |
749 | WARN_ON_ONCE(current_trace != &nop_trace); | 909 | WARN_ON_ONCE(tr->current_trace != &nop_trace); |
750 | return; | 910 | return; |
751 | } | 911 | } |
752 | 912 | ||
753 | arch_spin_lock(&ftrace_max_lock); | 913 | arch_spin_lock(&ftrace_max_lock); |
754 | 914 | ||
755 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); | 915 | ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu); |
756 | 916 | ||
757 | if (ret == -EBUSY) { | 917 | if (ret == -EBUSY) { |
758 | /* | 918 | /* |
@@ -761,7 +921,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
761 | * the max trace buffer (no one writes directly to it) | 921 | * the max trace buffer (no one writes directly to it) |
762 | * and flag that it failed. | 922 | * and flag that it failed. |
763 | */ | 923 | */ |
764 | trace_array_printk(&max_tr, _THIS_IP_, | 924 | trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, |
765 | "Failed to swap buffers due to commit in progress\n"); | 925 | "Failed to swap buffers due to commit in progress\n"); |
766 | } | 926 | } |
767 | 927 | ||
@@ -774,37 +934,78 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
774 | 934 | ||
775 | static void default_wait_pipe(struct trace_iterator *iter) | 935 | static void default_wait_pipe(struct trace_iterator *iter) |
776 | { | 936 | { |
777 | DEFINE_WAIT(wait); | 937 | /* Iterators are static, they should be filled or empty */ |
938 | if (trace_buffer_iter(iter, iter->cpu_file)) | ||
939 | return; | ||
940 | |||
941 | ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); | ||
942 | } | ||
943 | |||
944 | #ifdef CONFIG_FTRACE_STARTUP_TEST | ||
945 | static int run_tracer_selftest(struct tracer *type) | ||
946 | { | ||
947 | struct trace_array *tr = &global_trace; | ||
948 | struct tracer *saved_tracer = tr->current_trace; | ||
949 | int ret; | ||
778 | 950 | ||
779 | prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); | 951 | if (!type->selftest || tracing_selftest_disabled) |
952 | return 0; | ||
780 | 953 | ||
781 | /* | 954 | /* |
782 | * The events can happen in critical sections where | 955 | * Run a selftest on this tracer. |
783 | * checking a work queue can cause deadlocks. | 956 | * Here we reset the trace buffer, and set the current |
784 | * After adding a task to the queue, this flag is set | 957 | * tracer to be this tracer. The tracer can then run some |
785 | * only to notify events to try to wake up the queue | 958 | * internal tracing to verify that everything is in order. |
786 | * using irq_work. | 959 | * If we fail, we do not register this tracer. |
787 | * | ||
788 | * We don't clear it even if the buffer is no longer | ||
789 | * empty. The flag only causes the next event to run | ||
790 | * irq_work to do the work queue wake up. The worse | ||
791 | * that can happen if we race with !trace_empty() is that | ||
792 | * an event will cause an irq_work to try to wake up | ||
793 | * an empty queue. | ||
794 | * | ||
795 | * There's no reason to protect this flag either, as | ||
796 | * the work queue and irq_work logic will do the necessary | ||
797 | * synchronization for the wake ups. The only thing | ||
798 | * that is necessary is that the wake up happens after | ||
799 | * a task has been queued. It's OK for spurious wake ups. | ||
800 | */ | 960 | */ |
801 | trace_wakeup_needed = true; | 961 | tracing_reset_online_cpus(&tr->trace_buffer); |
802 | 962 | ||
803 | if (trace_empty(iter)) | 963 | tr->current_trace = type; |
804 | schedule(); | ||
805 | 964 | ||
806 | finish_wait(&trace_wait, &wait); | 965 | #ifdef CONFIG_TRACER_MAX_TRACE |
966 | if (type->use_max_tr) { | ||
967 | /* If we expanded the buffers, make sure the max is expanded too */ | ||
968 | if (ring_buffer_expanded) | ||
969 | ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, | ||
970 | RING_BUFFER_ALL_CPUS); | ||
971 | tr->allocated_snapshot = true; | ||
972 | } | ||
973 | #endif | ||
974 | |||
975 | /* the test is responsible for initializing and enabling */ | ||
976 | pr_info("Testing tracer %s: ", type->name); | ||
977 | ret = type->selftest(type, tr); | ||
978 | /* the test is responsible for resetting too */ | ||
979 | tr->current_trace = saved_tracer; | ||
980 | if (ret) { | ||
981 | printk(KERN_CONT "FAILED!\n"); | ||
982 | /* Add the warning after printing 'FAILED' */ | ||
983 | WARN_ON(1); | ||
984 | return -1; | ||
985 | } | ||
986 | /* Only reset on passing, to avoid touching corrupted buffers */ | ||
987 | tracing_reset_online_cpus(&tr->trace_buffer); | ||
988 | |||
989 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
990 | if (type->use_max_tr) { | ||
991 | tr->allocated_snapshot = false; | ||
992 | |||
993 | /* Shrink the max buffer again */ | ||
994 | if (ring_buffer_expanded) | ||
995 | ring_buffer_resize(tr->max_buffer.buffer, 1, | ||
996 | RING_BUFFER_ALL_CPUS); | ||
997 | } | ||
998 | #endif | ||
999 | |||
1000 | printk(KERN_CONT "PASSED\n"); | ||
1001 | return 0; | ||
1002 | } | ||
1003 | #else | ||
1004 | static inline int run_tracer_selftest(struct tracer *type) | ||
1005 | { | ||
1006 | return 0; | ||
807 | } | 1007 | } |
1008 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | ||
808 | 1009 | ||
809 | /** | 1010 | /** |
810 | * register_tracer - register a tracer with the ftrace system. | 1011 | * register_tracer - register a tracer with the ftrace system. |
@@ -851,57 +1052,9 @@ int register_tracer(struct tracer *type) | |||
851 | if (!type->wait_pipe) | 1052 | if (!type->wait_pipe) |
852 | type->wait_pipe = default_wait_pipe; | 1053 | type->wait_pipe = default_wait_pipe; |
853 | 1054 | ||
854 | 1055 | ret = run_tracer_selftest(type); | |
855 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 1056 | if (ret < 0) |
856 | if (type->selftest && !tracing_selftest_disabled) { | 1057 | goto out; |
857 | struct tracer *saved_tracer = current_trace; | ||
858 | struct trace_array *tr = &global_trace; | ||
859 | |||
860 | /* | ||
861 | * Run a selftest on this tracer. | ||
862 | * Here we reset the trace buffer, and set the current | ||
863 | * tracer to be this tracer. The tracer can then run some | ||
864 | * internal tracing to verify that everything is in order. | ||
865 | * If we fail, we do not register this tracer. | ||
866 | */ | ||
867 | tracing_reset_online_cpus(tr); | ||
868 | |||
869 | current_trace = type; | ||
870 | |||
871 | if (type->use_max_tr) { | ||
872 | /* If we expanded the buffers, make sure the max is expanded too */ | ||
873 | if (ring_buffer_expanded) | ||
874 | ring_buffer_resize(max_tr.buffer, trace_buf_size, | ||
875 | RING_BUFFER_ALL_CPUS); | ||
876 | type->allocated_snapshot = true; | ||
877 | } | ||
878 | |||
879 | /* the test is responsible for initializing and enabling */ | ||
880 | pr_info("Testing tracer %s: ", type->name); | ||
881 | ret = type->selftest(type, tr); | ||
882 | /* the test is responsible for resetting too */ | ||
883 | current_trace = saved_tracer; | ||
884 | if (ret) { | ||
885 | printk(KERN_CONT "FAILED!\n"); | ||
886 | /* Add the warning after printing 'FAILED' */ | ||
887 | WARN_ON(1); | ||
888 | goto out; | ||
889 | } | ||
890 | /* Only reset on passing, to avoid touching corrupted buffers */ | ||
891 | tracing_reset_online_cpus(tr); | ||
892 | |||
893 | if (type->use_max_tr) { | ||
894 | type->allocated_snapshot = false; | ||
895 | |||
896 | /* Shrink the max buffer again */ | ||
897 | if (ring_buffer_expanded) | ||
898 | ring_buffer_resize(max_tr.buffer, 1, | ||
899 | RING_BUFFER_ALL_CPUS); | ||
900 | } | ||
901 | |||
902 | printk(KERN_CONT "PASSED\n"); | ||
903 | } | ||
904 | #endif | ||
905 | 1058 | ||
906 | type->next = trace_types; | 1059 | type->next = trace_types; |
907 | trace_types = type; | 1060 | trace_types = type; |
@@ -921,7 +1074,7 @@ int register_tracer(struct tracer *type) | |||
921 | tracing_set_tracer(type->name); | 1074 | tracing_set_tracer(type->name); |
922 | default_bootup_tracer = NULL; | 1075 | default_bootup_tracer = NULL; |
923 | /* disable other selftests, since this will break it. */ | 1076 | /* disable other selftests, since this will break it. */ |
924 | tracing_selftest_disabled = 1; | 1077 | tracing_selftest_disabled = true; |
925 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 1078 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
926 | printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n", | 1079 | printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n", |
927 | type->name); | 1080 | type->name); |
@@ -931,9 +1084,9 @@ int register_tracer(struct tracer *type) | |||
931 | return ret; | 1084 | return ret; |
932 | } | 1085 | } |
933 | 1086 | ||
934 | void tracing_reset(struct trace_array *tr, int cpu) | 1087 | void tracing_reset(struct trace_buffer *buf, int cpu) |
935 | { | 1088 | { |
936 | struct ring_buffer *buffer = tr->buffer; | 1089 | struct ring_buffer *buffer = buf->buffer; |
937 | 1090 | ||
938 | if (!buffer) | 1091 | if (!buffer) |
939 | return; | 1092 | return; |
@@ -947,9 +1100,9 @@ void tracing_reset(struct trace_array *tr, int cpu) | |||
947 | ring_buffer_record_enable(buffer); | 1100 | ring_buffer_record_enable(buffer); |
948 | } | 1101 | } |
949 | 1102 | ||
950 | void tracing_reset_online_cpus(struct trace_array *tr) | 1103 | void tracing_reset_online_cpus(struct trace_buffer *buf) |
951 | { | 1104 | { |
952 | struct ring_buffer *buffer = tr->buffer; | 1105 | struct ring_buffer *buffer = buf->buffer; |
953 | int cpu; | 1106 | int cpu; |
954 | 1107 | ||
955 | if (!buffer) | 1108 | if (!buffer) |
@@ -960,7 +1113,7 @@ void tracing_reset_online_cpus(struct trace_array *tr) | |||
960 | /* Make sure all commits have finished */ | 1113 | /* Make sure all commits have finished */ |
961 | synchronize_sched(); | 1114 | synchronize_sched(); |
962 | 1115 | ||
963 | tr->time_start = ftrace_now(tr->cpu); | 1116 | buf->time_start = ftrace_now(buf->cpu); |
964 | 1117 | ||
965 | for_each_online_cpu(cpu) | 1118 | for_each_online_cpu(cpu) |
966 | ring_buffer_reset_cpu(buffer, cpu); | 1119 | ring_buffer_reset_cpu(buffer, cpu); |
@@ -970,12 +1123,21 @@ void tracing_reset_online_cpus(struct trace_array *tr) | |||
970 | 1123 | ||
971 | void tracing_reset_current(int cpu) | 1124 | void tracing_reset_current(int cpu) |
972 | { | 1125 | { |
973 | tracing_reset(&global_trace, cpu); | 1126 | tracing_reset(&global_trace.trace_buffer, cpu); |
974 | } | 1127 | } |
975 | 1128 | ||
976 | void tracing_reset_current_online_cpus(void) | 1129 | void tracing_reset_all_online_cpus(void) |
977 | { | 1130 | { |
978 | tracing_reset_online_cpus(&global_trace); | 1131 | struct trace_array *tr; |
1132 | |||
1133 | mutex_lock(&trace_types_lock); | ||
1134 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | ||
1135 | tracing_reset_online_cpus(&tr->trace_buffer); | ||
1136 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
1137 | tracing_reset_online_cpus(&tr->max_buffer); | ||
1138 | #endif | ||
1139 | } | ||
1140 | mutex_unlock(&trace_types_lock); | ||
979 | } | 1141 | } |
980 | 1142 | ||
981 | #define SAVED_CMDLINES 128 | 1143 | #define SAVED_CMDLINES 128 |
@@ -998,7 +1160,7 @@ static void trace_init_cmdlines(void) | |||
998 | 1160 | ||
999 | int is_tracing_stopped(void) | 1161 | int is_tracing_stopped(void) |
1000 | { | 1162 | { |
1001 | return trace_stop_count; | 1163 | return global_trace.stop_count; |
1002 | } | 1164 | } |
1003 | 1165 | ||
1004 | /** | 1166 | /** |
@@ -1030,12 +1192,12 @@ void tracing_start(void) | |||
1030 | if (tracing_disabled) | 1192 | if (tracing_disabled) |
1031 | return; | 1193 | return; |
1032 | 1194 | ||
1033 | raw_spin_lock_irqsave(&tracing_start_lock, flags); | 1195 | raw_spin_lock_irqsave(&global_trace.start_lock, flags); |
1034 | if (--trace_stop_count) { | 1196 | if (--global_trace.stop_count) { |
1035 | if (trace_stop_count < 0) { | 1197 | if (global_trace.stop_count < 0) { |
1036 | /* Someone screwed up their debugging */ | 1198 | /* Someone screwed up their debugging */ |
1037 | WARN_ON_ONCE(1); | 1199 | WARN_ON_ONCE(1); |
1038 | trace_stop_count = 0; | 1200 | global_trace.stop_count = 0; |
1039 | } | 1201 | } |
1040 | goto out; | 1202 | goto out; |
1041 | } | 1203 | } |
@@ -1043,19 +1205,52 @@ void tracing_start(void) | |||
1043 | /* Prevent the buffers from switching */ | 1205 | /* Prevent the buffers from switching */ |
1044 | arch_spin_lock(&ftrace_max_lock); | 1206 | arch_spin_lock(&ftrace_max_lock); |
1045 | 1207 | ||
1046 | buffer = global_trace.buffer; | 1208 | buffer = global_trace.trace_buffer.buffer; |
1047 | if (buffer) | 1209 | if (buffer) |
1048 | ring_buffer_record_enable(buffer); | 1210 | ring_buffer_record_enable(buffer); |
1049 | 1211 | ||
1050 | buffer = max_tr.buffer; | 1212 | #ifdef CONFIG_TRACER_MAX_TRACE |
1213 | buffer = global_trace.max_buffer.buffer; | ||
1051 | if (buffer) | 1214 | if (buffer) |
1052 | ring_buffer_record_enable(buffer); | 1215 | ring_buffer_record_enable(buffer); |
1216 | #endif | ||
1053 | 1217 | ||
1054 | arch_spin_unlock(&ftrace_max_lock); | 1218 | arch_spin_unlock(&ftrace_max_lock); |
1055 | 1219 | ||
1056 | ftrace_start(); | 1220 | ftrace_start(); |
1057 | out: | 1221 | out: |
1058 | raw_spin_unlock_irqrestore(&tracing_start_lock, flags); | 1222 | raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); |
1223 | } | ||
1224 | |||
1225 | static void tracing_start_tr(struct trace_array *tr) | ||
1226 | { | ||
1227 | struct ring_buffer *buffer; | ||
1228 | unsigned long flags; | ||
1229 | |||
1230 | if (tracing_disabled) | ||
1231 | return; | ||
1232 | |||
1233 | /* If global, we need to also start the max tracer */ | ||
1234 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL) | ||
1235 | return tracing_start(); | ||
1236 | |||
1237 | raw_spin_lock_irqsave(&tr->start_lock, flags); | ||
1238 | |||
1239 | if (--tr->stop_count) { | ||
1240 | if (tr->stop_count < 0) { | ||
1241 | /* Someone screwed up their debugging */ | ||
1242 | WARN_ON_ONCE(1); | ||
1243 | tr->stop_count = 0; | ||
1244 | } | ||
1245 | goto out; | ||
1246 | } | ||
1247 | |||
1248 | buffer = tr->trace_buffer.buffer; | ||
1249 | if (buffer) | ||
1250 | ring_buffer_record_enable(buffer); | ||
1251 | |||
1252 | out: | ||
1253 | raw_spin_unlock_irqrestore(&tr->start_lock, flags); | ||
1059 | } | 1254 | } |
1060 | 1255 | ||
1061 | /** | 1256 | /** |
@@ -1070,25 +1265,48 @@ void tracing_stop(void) | |||
1070 | unsigned long flags; | 1265 | unsigned long flags; |
1071 | 1266 | ||
1072 | ftrace_stop(); | 1267 | ftrace_stop(); |
1073 | raw_spin_lock_irqsave(&tracing_start_lock, flags); | 1268 | raw_spin_lock_irqsave(&global_trace.start_lock, flags); |
1074 | if (trace_stop_count++) | 1269 | if (global_trace.stop_count++) |
1075 | goto out; | 1270 | goto out; |
1076 | 1271 | ||
1077 | /* Prevent the buffers from switching */ | 1272 | /* Prevent the buffers from switching */ |
1078 | arch_spin_lock(&ftrace_max_lock); | 1273 | arch_spin_lock(&ftrace_max_lock); |
1079 | 1274 | ||
1080 | buffer = global_trace.buffer; | 1275 | buffer = global_trace.trace_buffer.buffer; |
1081 | if (buffer) | 1276 | if (buffer) |
1082 | ring_buffer_record_disable(buffer); | 1277 | ring_buffer_record_disable(buffer); |
1083 | 1278 | ||
1084 | buffer = max_tr.buffer; | 1279 | #ifdef CONFIG_TRACER_MAX_TRACE |
1280 | buffer = global_trace.max_buffer.buffer; | ||
1085 | if (buffer) | 1281 | if (buffer) |
1086 | ring_buffer_record_disable(buffer); | 1282 | ring_buffer_record_disable(buffer); |
1283 | #endif | ||
1087 | 1284 | ||
1088 | arch_spin_unlock(&ftrace_max_lock); | 1285 | arch_spin_unlock(&ftrace_max_lock); |
1089 | 1286 | ||
1090 | out: | 1287 | out: |
1091 | raw_spin_unlock_irqrestore(&tracing_start_lock, flags); | 1288 | raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); |
1289 | } | ||
1290 | |||
1291 | static void tracing_stop_tr(struct trace_array *tr) | ||
1292 | { | ||
1293 | struct ring_buffer *buffer; | ||
1294 | unsigned long flags; | ||
1295 | |||
1296 | /* If global, we need to also stop the max tracer */ | ||
1297 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL) | ||
1298 | return tracing_stop(); | ||
1299 | |||
1300 | raw_spin_lock_irqsave(&tr->start_lock, flags); | ||
1301 | if (tr->stop_count++) | ||
1302 | goto out; | ||
1303 | |||
1304 | buffer = tr->trace_buffer.buffer; | ||
1305 | if (buffer) | ||
1306 | ring_buffer_record_disable(buffer); | ||
1307 | |||
1308 | out: | ||
1309 | raw_spin_unlock_irqrestore(&tr->start_lock, flags); | ||
1092 | } | 1310 | } |
1093 | 1311 | ||
1094 | void trace_stop_cmdline_recording(void); | 1312 | void trace_stop_cmdline_recording(void); |
@@ -1221,11 +1439,6 @@ void | |||
1221 | __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) | 1439 | __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) |
1222 | { | 1440 | { |
1223 | __this_cpu_write(trace_cmdline_save, true); | 1441 | __this_cpu_write(trace_cmdline_save, true); |
1224 | if (trace_wakeup_needed) { | ||
1225 | trace_wakeup_needed = false; | ||
1226 | /* irq_work_queue() supplies it's own memory barriers */ | ||
1227 | irq_work_queue(&trace_work_wakeup); | ||
1228 | } | ||
1229 | ring_buffer_unlock_commit(buffer, event); | 1442 | ring_buffer_unlock_commit(buffer, event); |
1230 | } | 1443 | } |
1231 | 1444 | ||
@@ -1249,11 +1462,23 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer, | |||
1249 | EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); | 1462 | EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); |
1250 | 1463 | ||
1251 | struct ring_buffer_event * | 1464 | struct ring_buffer_event * |
1465 | trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, | ||
1466 | struct ftrace_event_file *ftrace_file, | ||
1467 | int type, unsigned long len, | ||
1468 | unsigned long flags, int pc) | ||
1469 | { | ||
1470 | *current_rb = ftrace_file->tr->trace_buffer.buffer; | ||
1471 | return trace_buffer_lock_reserve(*current_rb, | ||
1472 | type, len, flags, pc); | ||
1473 | } | ||
1474 | EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); | ||
1475 | |||
1476 | struct ring_buffer_event * | ||
1252 | trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, | 1477 | trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, |
1253 | int type, unsigned long len, | 1478 | int type, unsigned long len, |
1254 | unsigned long flags, int pc) | 1479 | unsigned long flags, int pc) |
1255 | { | 1480 | { |
1256 | *current_rb = global_trace.buffer; | 1481 | *current_rb = global_trace.trace_buffer.buffer; |
1257 | return trace_buffer_lock_reserve(*current_rb, | 1482 | return trace_buffer_lock_reserve(*current_rb, |
1258 | type, len, flags, pc); | 1483 | type, len, flags, pc); |
1259 | } | 1484 | } |
@@ -1292,7 +1517,7 @@ trace_function(struct trace_array *tr, | |||
1292 | int pc) | 1517 | int pc) |
1293 | { | 1518 | { |
1294 | struct ftrace_event_call *call = &event_function; | 1519 | struct ftrace_event_call *call = &event_function; |
1295 | struct ring_buffer *buffer = tr->buffer; | 1520 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
1296 | struct ring_buffer_event *event; | 1521 | struct ring_buffer_event *event; |
1297 | struct ftrace_entry *entry; | 1522 | struct ftrace_entry *entry; |
1298 | 1523 | ||
@@ -1433,13 +1658,14 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, | |||
1433 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, | 1658 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, |
1434 | int pc) | 1659 | int pc) |
1435 | { | 1660 | { |
1436 | __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL); | 1661 | __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL); |
1437 | } | 1662 | } |
1438 | 1663 | ||
1439 | /** | 1664 | /** |
1440 | * trace_dump_stack - record a stack back trace in the trace buffer | 1665 | * trace_dump_stack - record a stack back trace in the trace buffer |
1666 | * @skip: Number of functions to skip (helper handlers) | ||
1441 | */ | 1667 | */ |
1442 | void trace_dump_stack(void) | 1668 | void trace_dump_stack(int skip) |
1443 | { | 1669 | { |
1444 | unsigned long flags; | 1670 | unsigned long flags; |
1445 | 1671 | ||
@@ -1448,8 +1674,13 @@ void trace_dump_stack(void) | |||
1448 | 1674 | ||
1449 | local_save_flags(flags); | 1675 | local_save_flags(flags); |
1450 | 1676 | ||
1451 | /* skipping 3 traces, seems to get us at the caller of this function */ | 1677 | /* |
1452 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL); | 1678 | * Skip 3 more, seems to get us at the caller of |
1679 | * this function. | ||
1680 | */ | ||
1681 | skip += 3; | ||
1682 | __ftrace_trace_stack(global_trace.trace_buffer.buffer, | ||
1683 | flags, skip, preempt_count(), NULL); | ||
1453 | } | 1684 | } |
1454 | 1685 | ||
1455 | static DEFINE_PER_CPU(int, user_stack_count); | 1686 | static DEFINE_PER_CPU(int, user_stack_count); |
@@ -1619,7 +1850,7 @@ void trace_printk_init_buffers(void) | |||
1619 | * directly here. If the global_trace.buffer is already | 1850 | * directly here. If the global_trace.buffer is already |
1620 | * allocated here, then this was called by module code. | 1851 | * allocated here, then this was called by module code. |
1621 | */ | 1852 | */ |
1622 | if (global_trace.buffer) | 1853 | if (global_trace.trace_buffer.buffer) |
1623 | tracing_start_cmdline_record(); | 1854 | tracing_start_cmdline_record(); |
1624 | } | 1855 | } |
1625 | 1856 | ||
@@ -1679,7 +1910,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | |||
1679 | 1910 | ||
1680 | local_save_flags(flags); | 1911 | local_save_flags(flags); |
1681 | size = sizeof(*entry) + sizeof(u32) * len; | 1912 | size = sizeof(*entry) + sizeof(u32) * len; |
1682 | buffer = tr->buffer; | 1913 | buffer = tr->trace_buffer.buffer; |
1683 | event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, | 1914 | event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, |
1684 | flags, pc); | 1915 | flags, pc); |
1685 | if (!event) | 1916 | if (!event) |
@@ -1702,27 +1933,12 @@ out: | |||
1702 | } | 1933 | } |
1703 | EXPORT_SYMBOL_GPL(trace_vbprintk); | 1934 | EXPORT_SYMBOL_GPL(trace_vbprintk); |
1704 | 1935 | ||
1705 | int trace_array_printk(struct trace_array *tr, | 1936 | static int |
1706 | unsigned long ip, const char *fmt, ...) | 1937 | __trace_array_vprintk(struct ring_buffer *buffer, |
1707 | { | 1938 | unsigned long ip, const char *fmt, va_list args) |
1708 | int ret; | ||
1709 | va_list ap; | ||
1710 | |||
1711 | if (!(trace_flags & TRACE_ITER_PRINTK)) | ||
1712 | return 0; | ||
1713 | |||
1714 | va_start(ap, fmt); | ||
1715 | ret = trace_array_vprintk(tr, ip, fmt, ap); | ||
1716 | va_end(ap); | ||
1717 | return ret; | ||
1718 | } | ||
1719 | |||
1720 | int trace_array_vprintk(struct trace_array *tr, | ||
1721 | unsigned long ip, const char *fmt, va_list args) | ||
1722 | { | 1939 | { |
1723 | struct ftrace_event_call *call = &event_print; | 1940 | struct ftrace_event_call *call = &event_print; |
1724 | struct ring_buffer_event *event; | 1941 | struct ring_buffer_event *event; |
1725 | struct ring_buffer *buffer; | ||
1726 | int len = 0, size, pc; | 1942 | int len = 0, size, pc; |
1727 | struct print_entry *entry; | 1943 | struct print_entry *entry; |
1728 | unsigned long flags; | 1944 | unsigned long flags; |
@@ -1750,7 +1966,6 @@ int trace_array_vprintk(struct trace_array *tr, | |||
1750 | 1966 | ||
1751 | local_save_flags(flags); | 1967 | local_save_flags(flags); |
1752 | size = sizeof(*entry) + len + 1; | 1968 | size = sizeof(*entry) + len + 1; |
1753 | buffer = tr->buffer; | ||
1754 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, | 1969 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, |
1755 | flags, pc); | 1970 | flags, pc); |
1756 | if (!event) | 1971 | if (!event) |
@@ -1771,6 +1986,42 @@ int trace_array_vprintk(struct trace_array *tr, | |||
1771 | return len; | 1986 | return len; |
1772 | } | 1987 | } |
1773 | 1988 | ||
1989 | int trace_array_vprintk(struct trace_array *tr, | ||
1990 | unsigned long ip, const char *fmt, va_list args) | ||
1991 | { | ||
1992 | return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); | ||
1993 | } | ||
1994 | |||
1995 | int trace_array_printk(struct trace_array *tr, | ||
1996 | unsigned long ip, const char *fmt, ...) | ||
1997 | { | ||
1998 | int ret; | ||
1999 | va_list ap; | ||
2000 | |||
2001 | if (!(trace_flags & TRACE_ITER_PRINTK)) | ||
2002 | return 0; | ||
2003 | |||
2004 | va_start(ap, fmt); | ||
2005 | ret = trace_array_vprintk(tr, ip, fmt, ap); | ||
2006 | va_end(ap); | ||
2007 | return ret; | ||
2008 | } | ||
2009 | |||
2010 | int trace_array_printk_buf(struct ring_buffer *buffer, | ||
2011 | unsigned long ip, const char *fmt, ...) | ||
2012 | { | ||
2013 | int ret; | ||
2014 | va_list ap; | ||
2015 | |||
2016 | if (!(trace_flags & TRACE_ITER_PRINTK)) | ||
2017 | return 0; | ||
2018 | |||
2019 | va_start(ap, fmt); | ||
2020 | ret = __trace_array_vprintk(buffer, ip, fmt, ap); | ||
2021 | va_end(ap); | ||
2022 | return ret; | ||
2023 | } | ||
2024 | |||
1774 | int trace_vprintk(unsigned long ip, const char *fmt, va_list args) | 2025 | int trace_vprintk(unsigned long ip, const char *fmt, va_list args) |
1775 | { | 2026 | { |
1776 | return trace_array_vprintk(&global_trace, ip, fmt, args); | 2027 | return trace_array_vprintk(&global_trace, ip, fmt, args); |
@@ -1796,7 +2047,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, | |||
1796 | if (buf_iter) | 2047 | if (buf_iter) |
1797 | event = ring_buffer_iter_peek(buf_iter, ts); | 2048 | event = ring_buffer_iter_peek(buf_iter, ts); |
1798 | else | 2049 | else |
1799 | event = ring_buffer_peek(iter->tr->buffer, cpu, ts, | 2050 | event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts, |
1800 | lost_events); | 2051 | lost_events); |
1801 | 2052 | ||
1802 | if (event) { | 2053 | if (event) { |
@@ -1811,7 +2062,7 @@ static struct trace_entry * | |||
1811 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, | 2062 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, |
1812 | unsigned long *missing_events, u64 *ent_ts) | 2063 | unsigned long *missing_events, u64 *ent_ts) |
1813 | { | 2064 | { |
1814 | struct ring_buffer *buffer = iter->tr->buffer; | 2065 | struct ring_buffer *buffer = iter->trace_buffer->buffer; |
1815 | struct trace_entry *ent, *next = NULL; | 2066 | struct trace_entry *ent, *next = NULL; |
1816 | unsigned long lost_events = 0, next_lost = 0; | 2067 | unsigned long lost_events = 0, next_lost = 0; |
1817 | int cpu_file = iter->cpu_file; | 2068 | int cpu_file = iter->cpu_file; |
@@ -1824,7 +2075,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, | |||
1824 | * If we are in a per_cpu trace file, don't bother by iterating over | 2075 | * If we are in a per_cpu trace file, don't bother by iterating over |
1825 | * all cpu and peek directly. | 2076 | * all cpu and peek directly. |
1826 | */ | 2077 | */ |
1827 | if (cpu_file > TRACE_PIPE_ALL_CPU) { | 2078 | if (cpu_file > RING_BUFFER_ALL_CPUS) { |
1828 | if (ring_buffer_empty_cpu(buffer, cpu_file)) | 2079 | if (ring_buffer_empty_cpu(buffer, cpu_file)) |
1829 | return NULL; | 2080 | return NULL; |
1830 | ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); | 2081 | ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); |
@@ -1888,7 +2139,7 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter) | |||
1888 | 2139 | ||
1889 | static void trace_consume(struct trace_iterator *iter) | 2140 | static void trace_consume(struct trace_iterator *iter) |
1890 | { | 2141 | { |
1891 | ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, | 2142 | ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts, |
1892 | &iter->lost_events); | 2143 | &iter->lost_events); |
1893 | } | 2144 | } |
1894 | 2145 | ||
@@ -1921,13 +2172,12 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) | |||
1921 | 2172 | ||
1922 | void tracing_iter_reset(struct trace_iterator *iter, int cpu) | 2173 | void tracing_iter_reset(struct trace_iterator *iter, int cpu) |
1923 | { | 2174 | { |
1924 | struct trace_array *tr = iter->tr; | ||
1925 | struct ring_buffer_event *event; | 2175 | struct ring_buffer_event *event; |
1926 | struct ring_buffer_iter *buf_iter; | 2176 | struct ring_buffer_iter *buf_iter; |
1927 | unsigned long entries = 0; | 2177 | unsigned long entries = 0; |
1928 | u64 ts; | 2178 | u64 ts; |
1929 | 2179 | ||
1930 | tr->data[cpu]->skipped_entries = 0; | 2180 | per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0; |
1931 | 2181 | ||
1932 | buf_iter = trace_buffer_iter(iter, cpu); | 2182 | buf_iter = trace_buffer_iter(iter, cpu); |
1933 | if (!buf_iter) | 2183 | if (!buf_iter) |
@@ -1941,13 +2191,13 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
1941 | * by the timestamp being before the start of the buffer. | 2191 | * by the timestamp being before the start of the buffer. |
1942 | */ | 2192 | */ |
1943 | while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { | 2193 | while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { |
1944 | if (ts >= iter->tr->time_start) | 2194 | if (ts >= iter->trace_buffer->time_start) |
1945 | break; | 2195 | break; |
1946 | entries++; | 2196 | entries++; |
1947 | ring_buffer_read(buf_iter, NULL); | 2197 | ring_buffer_read(buf_iter, NULL); |
1948 | } | 2198 | } |
1949 | 2199 | ||
1950 | tr->data[cpu]->skipped_entries = entries; | 2200 | per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries; |
1951 | } | 2201 | } |
1952 | 2202 | ||
1953 | /* | 2203 | /* |
@@ -1957,6 +2207,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
1957 | static void *s_start(struct seq_file *m, loff_t *pos) | 2207 | static void *s_start(struct seq_file *m, loff_t *pos) |
1958 | { | 2208 | { |
1959 | struct trace_iterator *iter = m->private; | 2209 | struct trace_iterator *iter = m->private; |
2210 | struct trace_array *tr = iter->tr; | ||
1960 | int cpu_file = iter->cpu_file; | 2211 | int cpu_file = iter->cpu_file; |
1961 | void *p = NULL; | 2212 | void *p = NULL; |
1962 | loff_t l = 0; | 2213 | loff_t l = 0; |
@@ -1969,12 +2220,14 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
1969 | * will point to the same string as current_trace->name. | 2220 | * will point to the same string as current_trace->name. |
1970 | */ | 2221 | */ |
1971 | mutex_lock(&trace_types_lock); | 2222 | mutex_lock(&trace_types_lock); |
1972 | if (unlikely(current_trace && iter->trace->name != current_trace->name)) | 2223 | if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) |
1973 | *iter->trace = *current_trace; | 2224 | *iter->trace = *tr->current_trace; |
1974 | mutex_unlock(&trace_types_lock); | 2225 | mutex_unlock(&trace_types_lock); |
1975 | 2226 | ||
2227 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
1976 | if (iter->snapshot && iter->trace->use_max_tr) | 2228 | if (iter->snapshot && iter->trace->use_max_tr) |
1977 | return ERR_PTR(-EBUSY); | 2229 | return ERR_PTR(-EBUSY); |
2230 | #endif | ||
1978 | 2231 | ||
1979 | if (!iter->snapshot) | 2232 | if (!iter->snapshot) |
1980 | atomic_inc(&trace_record_cmdline_disabled); | 2233 | atomic_inc(&trace_record_cmdline_disabled); |
@@ -1984,7 +2237,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
1984 | iter->cpu = 0; | 2237 | iter->cpu = 0; |
1985 | iter->idx = -1; | 2238 | iter->idx = -1; |
1986 | 2239 | ||
1987 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | 2240 | if (cpu_file == RING_BUFFER_ALL_CPUS) { |
1988 | for_each_tracing_cpu(cpu) | 2241 | for_each_tracing_cpu(cpu) |
1989 | tracing_iter_reset(iter, cpu); | 2242 | tracing_iter_reset(iter, cpu); |
1990 | } else | 2243 | } else |
@@ -2016,17 +2269,21 @@ static void s_stop(struct seq_file *m, void *p) | |||
2016 | { | 2269 | { |
2017 | struct trace_iterator *iter = m->private; | 2270 | struct trace_iterator *iter = m->private; |
2018 | 2271 | ||
2272 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
2019 | if (iter->snapshot && iter->trace->use_max_tr) | 2273 | if (iter->snapshot && iter->trace->use_max_tr) |
2020 | return; | 2274 | return; |
2275 | #endif | ||
2021 | 2276 | ||
2022 | if (!iter->snapshot) | 2277 | if (!iter->snapshot) |
2023 | atomic_dec(&trace_record_cmdline_disabled); | 2278 | atomic_dec(&trace_record_cmdline_disabled); |
2279 | |||
2024 | trace_access_unlock(iter->cpu_file); | 2280 | trace_access_unlock(iter->cpu_file); |
2025 | trace_event_read_unlock(); | 2281 | trace_event_read_unlock(); |
2026 | } | 2282 | } |
2027 | 2283 | ||
2028 | static void | 2284 | static void |
2029 | get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries) | 2285 | get_total_entries(struct trace_buffer *buf, |
2286 | unsigned long *total, unsigned long *entries) | ||
2030 | { | 2287 | { |
2031 | unsigned long count; | 2288 | unsigned long count; |
2032 | int cpu; | 2289 | int cpu; |
@@ -2035,19 +2292,19 @@ get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *e | |||
2035 | *entries = 0; | 2292 | *entries = 0; |
2036 | 2293 | ||
2037 | for_each_tracing_cpu(cpu) { | 2294 | for_each_tracing_cpu(cpu) { |
2038 | count = ring_buffer_entries_cpu(tr->buffer, cpu); | 2295 | count = ring_buffer_entries_cpu(buf->buffer, cpu); |
2039 | /* | 2296 | /* |
2040 | * If this buffer has skipped entries, then we hold all | 2297 | * If this buffer has skipped entries, then we hold all |
2041 | * entries for the trace and we need to ignore the | 2298 | * entries for the trace and we need to ignore the |
2042 | * ones before the time stamp. | 2299 | * ones before the time stamp. |
2043 | */ | 2300 | */ |
2044 | if (tr->data[cpu]->skipped_entries) { | 2301 | if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { |
2045 | count -= tr->data[cpu]->skipped_entries; | 2302 | count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; |
2046 | /* total is the same as the entries */ | 2303 | /* total is the same as the entries */ |
2047 | *total += count; | 2304 | *total += count; |
2048 | } else | 2305 | } else |
2049 | *total += count + | 2306 | *total += count + |
2050 | ring_buffer_overrun_cpu(tr->buffer, cpu); | 2307 | ring_buffer_overrun_cpu(buf->buffer, cpu); |
2051 | *entries += count; | 2308 | *entries += count; |
2052 | } | 2309 | } |
2053 | } | 2310 | } |
@@ -2064,27 +2321,27 @@ static void print_lat_help_header(struct seq_file *m) | |||
2064 | seq_puts(m, "# \\ / ||||| \\ | / \n"); | 2321 | seq_puts(m, "# \\ / ||||| \\ | / \n"); |
2065 | } | 2322 | } |
2066 | 2323 | ||
2067 | static void print_event_info(struct trace_array *tr, struct seq_file *m) | 2324 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) |
2068 | { | 2325 | { |
2069 | unsigned long total; | 2326 | unsigned long total; |
2070 | unsigned long entries; | 2327 | unsigned long entries; |
2071 | 2328 | ||
2072 | get_total_entries(tr, &total, &entries); | 2329 | get_total_entries(buf, &total, &entries); |
2073 | seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", | 2330 | seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", |
2074 | entries, total, num_online_cpus()); | 2331 | entries, total, num_online_cpus()); |
2075 | seq_puts(m, "#\n"); | 2332 | seq_puts(m, "#\n"); |
2076 | } | 2333 | } |
2077 | 2334 | ||
2078 | static void print_func_help_header(struct trace_array *tr, struct seq_file *m) | 2335 | static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) |
2079 | { | 2336 | { |
2080 | print_event_info(tr, m); | 2337 | print_event_info(buf, m); |
2081 | seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); | 2338 | seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); |
2082 | seq_puts(m, "# | | | | |\n"); | 2339 | seq_puts(m, "# | | | | |\n"); |
2083 | } | 2340 | } |
2084 | 2341 | ||
2085 | static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m) | 2342 | static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) |
2086 | { | 2343 | { |
2087 | print_event_info(tr, m); | 2344 | print_event_info(buf, m); |
2088 | seq_puts(m, "# _-----=> irqs-off\n"); | 2345 | seq_puts(m, "# _-----=> irqs-off\n"); |
2089 | seq_puts(m, "# / _----=> need-resched\n"); | 2346 | seq_puts(m, "# / _----=> need-resched\n"); |
2090 | seq_puts(m, "# | / _---=> hardirq/softirq\n"); | 2347 | seq_puts(m, "# | / _---=> hardirq/softirq\n"); |
@@ -2098,16 +2355,16 @@ void | |||
2098 | print_trace_header(struct seq_file *m, struct trace_iterator *iter) | 2355 | print_trace_header(struct seq_file *m, struct trace_iterator *iter) |
2099 | { | 2356 | { |
2100 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); | 2357 | unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); |
2101 | struct trace_array *tr = iter->tr; | 2358 | struct trace_buffer *buf = iter->trace_buffer; |
2102 | struct trace_array_cpu *data = tr->data[tr->cpu]; | 2359 | struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); |
2103 | struct tracer *type = current_trace; | 2360 | struct tracer *type = iter->trace; |
2104 | unsigned long entries; | 2361 | unsigned long entries; |
2105 | unsigned long total; | 2362 | unsigned long total; |
2106 | const char *name = "preemption"; | 2363 | const char *name = "preemption"; |
2107 | 2364 | ||
2108 | name = type->name; | 2365 | name = type->name; |
2109 | 2366 | ||
2110 | get_total_entries(tr, &total, &entries); | 2367 | get_total_entries(buf, &total, &entries); |
2111 | 2368 | ||
2112 | seq_printf(m, "# %s latency trace v1.1.5 on %s\n", | 2369 | seq_printf(m, "# %s latency trace v1.1.5 on %s\n", |
2113 | name, UTS_RELEASE); | 2370 | name, UTS_RELEASE); |
@@ -2118,7 +2375,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) | |||
2118 | nsecs_to_usecs(data->saved_latency), | 2375 | nsecs_to_usecs(data->saved_latency), |
2119 | entries, | 2376 | entries, |
2120 | total, | 2377 | total, |
2121 | tr->cpu, | 2378 | buf->cpu, |
2122 | #if defined(CONFIG_PREEMPT_NONE) | 2379 | #if defined(CONFIG_PREEMPT_NONE) |
2123 | "server", | 2380 | "server", |
2124 | #elif defined(CONFIG_PREEMPT_VOLUNTARY) | 2381 | #elif defined(CONFIG_PREEMPT_VOLUNTARY) |
@@ -2169,7 +2426,7 @@ static void test_cpu_buff_start(struct trace_iterator *iter) | |||
2169 | if (cpumask_test_cpu(iter->cpu, iter->started)) | 2426 | if (cpumask_test_cpu(iter->cpu, iter->started)) |
2170 | return; | 2427 | return; |
2171 | 2428 | ||
2172 | if (iter->tr->data[iter->cpu]->skipped_entries) | 2429 | if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries) |
2173 | return; | 2430 | return; |
2174 | 2431 | ||
2175 | cpumask_set_cpu(iter->cpu, iter->started); | 2432 | cpumask_set_cpu(iter->cpu, iter->started); |
@@ -2292,14 +2549,14 @@ int trace_empty(struct trace_iterator *iter) | |||
2292 | int cpu; | 2549 | int cpu; |
2293 | 2550 | ||
2294 | /* If we are looking at one CPU buffer, only check that one */ | 2551 | /* If we are looking at one CPU buffer, only check that one */ |
2295 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { | 2552 | if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { |
2296 | cpu = iter->cpu_file; | 2553 | cpu = iter->cpu_file; |
2297 | buf_iter = trace_buffer_iter(iter, cpu); | 2554 | buf_iter = trace_buffer_iter(iter, cpu); |
2298 | if (buf_iter) { | 2555 | if (buf_iter) { |
2299 | if (!ring_buffer_iter_empty(buf_iter)) | 2556 | if (!ring_buffer_iter_empty(buf_iter)) |
2300 | return 0; | 2557 | return 0; |
2301 | } else { | 2558 | } else { |
2302 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2559 | if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) |
2303 | return 0; | 2560 | return 0; |
2304 | } | 2561 | } |
2305 | return 1; | 2562 | return 1; |
@@ -2311,7 +2568,7 @@ int trace_empty(struct trace_iterator *iter) | |||
2311 | if (!ring_buffer_iter_empty(buf_iter)) | 2568 | if (!ring_buffer_iter_empty(buf_iter)) |
2312 | return 0; | 2569 | return 0; |
2313 | } else { | 2570 | } else { |
2314 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2571 | if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu)) |
2315 | return 0; | 2572 | return 0; |
2316 | } | 2573 | } |
2317 | } | 2574 | } |
@@ -2335,6 +2592,11 @@ enum print_line_t print_trace_line(struct trace_iterator *iter) | |||
2335 | return ret; | 2592 | return ret; |
2336 | } | 2593 | } |
2337 | 2594 | ||
2595 | if (iter->ent->type == TRACE_BPUTS && | ||
2596 | trace_flags & TRACE_ITER_PRINTK && | ||
2597 | trace_flags & TRACE_ITER_PRINTK_MSGONLY) | ||
2598 | return trace_print_bputs_msg_only(iter); | ||
2599 | |||
2338 | if (iter->ent->type == TRACE_BPRINT && | 2600 | if (iter->ent->type == TRACE_BPRINT && |
2339 | trace_flags & TRACE_ITER_PRINTK && | 2601 | trace_flags & TRACE_ITER_PRINTK && |
2340 | trace_flags & TRACE_ITER_PRINTK_MSGONLY) | 2602 | trace_flags & TRACE_ITER_PRINTK_MSGONLY) |
@@ -2389,9 +2651,9 @@ void trace_default_header(struct seq_file *m) | |||
2389 | } else { | 2651 | } else { |
2390 | if (!(trace_flags & TRACE_ITER_VERBOSE)) { | 2652 | if (!(trace_flags & TRACE_ITER_VERBOSE)) { |
2391 | if (trace_flags & TRACE_ITER_IRQ_INFO) | 2653 | if (trace_flags & TRACE_ITER_IRQ_INFO) |
2392 | print_func_help_header_irq(iter->tr, m); | 2654 | print_func_help_header_irq(iter->trace_buffer, m); |
2393 | else | 2655 | else |
2394 | print_func_help_header(iter->tr, m); | 2656 | print_func_help_header(iter->trace_buffer, m); |
2395 | } | 2657 | } |
2396 | } | 2658 | } |
2397 | } | 2659 | } |
@@ -2405,14 +2667,8 @@ static void test_ftrace_alive(struct seq_file *m) | |||
2405 | } | 2667 | } |
2406 | 2668 | ||
2407 | #ifdef CONFIG_TRACER_MAX_TRACE | 2669 | #ifdef CONFIG_TRACER_MAX_TRACE |
2408 | static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) | 2670 | static void show_snapshot_main_help(struct seq_file *m) |
2409 | { | 2671 | { |
2410 | if (iter->trace->allocated_snapshot) | ||
2411 | seq_printf(m, "#\n# * Snapshot is allocated *\n#\n"); | ||
2412 | else | ||
2413 | seq_printf(m, "#\n# * Snapshot is freed *\n#\n"); | ||
2414 | |||
2415 | seq_printf(m, "# Snapshot commands:\n"); | ||
2416 | seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); | 2672 | seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); |
2417 | seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); | 2673 | seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); |
2418 | seq_printf(m, "# Takes a snapshot of the main buffer.\n"); | 2674 | seq_printf(m, "# Takes a snapshot of the main buffer.\n"); |
@@ -2420,6 +2676,35 @@ static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) | |||
2420 | seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); | 2676 | seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); |
2421 | seq_printf(m, "# is not a '0' or '1')\n"); | 2677 | seq_printf(m, "# is not a '0' or '1')\n"); |
2422 | } | 2678 | } |
2679 | |||
2680 | static void show_snapshot_percpu_help(struct seq_file *m) | ||
2681 | { | ||
2682 | seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); | ||
2683 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | ||
2684 | seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); | ||
2685 | seq_printf(m, "# Takes a snapshot of the main buffer for this cpu.\n"); | ||
2686 | #else | ||
2687 | seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n"); | ||
2688 | seq_printf(m, "# Must use main snapshot file to allocate.\n"); | ||
2689 | #endif | ||
2690 | seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"); | ||
2691 | seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); | ||
2692 | seq_printf(m, "# is not a '0' or '1')\n"); | ||
2693 | } | ||
2694 | |||
2695 | static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) | ||
2696 | { | ||
2697 | if (iter->tr->allocated_snapshot) | ||
2698 | seq_printf(m, "#\n# * Snapshot is allocated *\n#\n"); | ||
2699 | else | ||
2700 | seq_printf(m, "#\n# * Snapshot is freed *\n#\n"); | ||
2701 | |||
2702 | seq_printf(m, "# Snapshot commands:\n"); | ||
2703 | if (iter->cpu_file == RING_BUFFER_ALL_CPUS) | ||
2704 | show_snapshot_main_help(m); | ||
2705 | else | ||
2706 | show_snapshot_percpu_help(m); | ||
2707 | } | ||
2423 | #else | 2708 | #else |
2424 | /* Should never be called */ | 2709 | /* Should never be called */ |
2425 | static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } | 2710 | static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } |
@@ -2479,7 +2764,8 @@ static const struct seq_operations tracer_seq_ops = { | |||
2479 | static struct trace_iterator * | 2764 | static struct trace_iterator * |
2480 | __tracing_open(struct inode *inode, struct file *file, bool snapshot) | 2765 | __tracing_open(struct inode *inode, struct file *file, bool snapshot) |
2481 | { | 2766 | { |
2482 | long cpu_file = (long) inode->i_private; | 2767 | struct trace_cpu *tc = inode->i_private; |
2768 | struct trace_array *tr = tc->tr; | ||
2483 | struct trace_iterator *iter; | 2769 | struct trace_iterator *iter; |
2484 | int cpu; | 2770 | int cpu; |
2485 | 2771 | ||
@@ -2504,26 +2790,31 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) | |||
2504 | if (!iter->trace) | 2790 | if (!iter->trace) |
2505 | goto fail; | 2791 | goto fail; |
2506 | 2792 | ||
2507 | *iter->trace = *current_trace; | 2793 | *iter->trace = *tr->current_trace; |
2508 | 2794 | ||
2509 | if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) | 2795 | if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) |
2510 | goto fail; | 2796 | goto fail; |
2511 | 2797 | ||
2512 | if (current_trace->print_max || snapshot) | 2798 | iter->tr = tr; |
2513 | iter->tr = &max_tr; | 2799 | |
2800 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
2801 | /* Currently only the top directory has a snapshot */ | ||
2802 | if (tr->current_trace->print_max || snapshot) | ||
2803 | iter->trace_buffer = &tr->max_buffer; | ||
2514 | else | 2804 | else |
2515 | iter->tr = &global_trace; | 2805 | #endif |
2806 | iter->trace_buffer = &tr->trace_buffer; | ||
2516 | iter->snapshot = snapshot; | 2807 | iter->snapshot = snapshot; |
2517 | iter->pos = -1; | 2808 | iter->pos = -1; |
2518 | mutex_init(&iter->mutex); | 2809 | mutex_init(&iter->mutex); |
2519 | iter->cpu_file = cpu_file; | 2810 | iter->cpu_file = tc->cpu; |
2520 | 2811 | ||
2521 | /* Notify the tracer early; before we stop tracing. */ | 2812 | /* Notify the tracer early; before we stop tracing. */ |
2522 | if (iter->trace && iter->trace->open) | 2813 | if (iter->trace && iter->trace->open) |
2523 | iter->trace->open(iter); | 2814 | iter->trace->open(iter); |
2524 | 2815 | ||
2525 | /* Annotate start of buffers if we had overruns */ | 2816 | /* Annotate start of buffers if we had overruns */ |
2526 | if (ring_buffer_overruns(iter->tr->buffer)) | 2817 | if (ring_buffer_overruns(iter->trace_buffer->buffer)) |
2527 | iter->iter_flags |= TRACE_FILE_ANNOTATE; | 2818 | iter->iter_flags |= TRACE_FILE_ANNOTATE; |
2528 | 2819 | ||
2529 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ | 2820 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ |
@@ -2532,12 +2823,12 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) | |||
2532 | 2823 | ||
2533 | /* stop the trace while dumping if we are not opening "snapshot" */ | 2824 | /* stop the trace while dumping if we are not opening "snapshot" */ |
2534 | if (!iter->snapshot) | 2825 | if (!iter->snapshot) |
2535 | tracing_stop(); | 2826 | tracing_stop_tr(tr); |
2536 | 2827 | ||
2537 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { | 2828 | if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { |
2538 | for_each_tracing_cpu(cpu) { | 2829 | for_each_tracing_cpu(cpu) { |
2539 | iter->buffer_iter[cpu] = | 2830 | iter->buffer_iter[cpu] = |
2540 | ring_buffer_read_prepare(iter->tr->buffer, cpu); | 2831 | ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu); |
2541 | } | 2832 | } |
2542 | ring_buffer_read_prepare_sync(); | 2833 | ring_buffer_read_prepare_sync(); |
2543 | for_each_tracing_cpu(cpu) { | 2834 | for_each_tracing_cpu(cpu) { |
@@ -2547,12 +2838,14 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) | |||
2547 | } else { | 2838 | } else { |
2548 | cpu = iter->cpu_file; | 2839 | cpu = iter->cpu_file; |
2549 | iter->buffer_iter[cpu] = | 2840 | iter->buffer_iter[cpu] = |
2550 | ring_buffer_read_prepare(iter->tr->buffer, cpu); | 2841 | ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu); |
2551 | ring_buffer_read_prepare_sync(); | 2842 | ring_buffer_read_prepare_sync(); |
2552 | ring_buffer_read_start(iter->buffer_iter[cpu]); | 2843 | ring_buffer_read_start(iter->buffer_iter[cpu]); |
2553 | tracing_iter_reset(iter, cpu); | 2844 | tracing_iter_reset(iter, cpu); |
2554 | } | 2845 | } |
2555 | 2846 | ||
2847 | tr->ref++; | ||
2848 | |||
2556 | mutex_unlock(&trace_types_lock); | 2849 | mutex_unlock(&trace_types_lock); |
2557 | 2850 | ||
2558 | return iter; | 2851 | return iter; |
@@ -2579,14 +2872,20 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
2579 | { | 2872 | { |
2580 | struct seq_file *m = file->private_data; | 2873 | struct seq_file *m = file->private_data; |
2581 | struct trace_iterator *iter; | 2874 | struct trace_iterator *iter; |
2875 | struct trace_array *tr; | ||
2582 | int cpu; | 2876 | int cpu; |
2583 | 2877 | ||
2584 | if (!(file->f_mode & FMODE_READ)) | 2878 | if (!(file->f_mode & FMODE_READ)) |
2585 | return 0; | 2879 | return 0; |
2586 | 2880 | ||
2587 | iter = m->private; | 2881 | iter = m->private; |
2882 | tr = iter->tr; | ||
2588 | 2883 | ||
2589 | mutex_lock(&trace_types_lock); | 2884 | mutex_lock(&trace_types_lock); |
2885 | |||
2886 | WARN_ON(!tr->ref); | ||
2887 | tr->ref--; | ||
2888 | |||
2590 | for_each_tracing_cpu(cpu) { | 2889 | for_each_tracing_cpu(cpu) { |
2591 | if (iter->buffer_iter[cpu]) | 2890 | if (iter->buffer_iter[cpu]) |
2592 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | 2891 | ring_buffer_read_finish(iter->buffer_iter[cpu]); |
@@ -2597,7 +2896,7 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
2597 | 2896 | ||
2598 | if (!iter->snapshot) | 2897 | if (!iter->snapshot) |
2599 | /* reenable tracing if it was previously enabled */ | 2898 | /* reenable tracing if it was previously enabled */ |
2600 | tracing_start(); | 2899 | tracing_start_tr(tr); |
2601 | mutex_unlock(&trace_types_lock); | 2900 | mutex_unlock(&trace_types_lock); |
2602 | 2901 | ||
2603 | mutex_destroy(&iter->mutex); | 2902 | mutex_destroy(&iter->mutex); |
@@ -2616,12 +2915,13 @@ static int tracing_open(struct inode *inode, struct file *file) | |||
2616 | /* If this file was open for write, then erase contents */ | 2915 | /* If this file was open for write, then erase contents */ |
2617 | if ((file->f_mode & FMODE_WRITE) && | 2916 | if ((file->f_mode & FMODE_WRITE) && |
2618 | (file->f_flags & O_TRUNC)) { | 2917 | (file->f_flags & O_TRUNC)) { |
2619 | long cpu = (long) inode->i_private; | 2918 | struct trace_cpu *tc = inode->i_private; |
2919 | struct trace_array *tr = tc->tr; | ||
2620 | 2920 | ||
2621 | if (cpu == TRACE_PIPE_ALL_CPU) | 2921 | if (tc->cpu == RING_BUFFER_ALL_CPUS) |
2622 | tracing_reset_online_cpus(&global_trace); | 2922 | tracing_reset_online_cpus(&tr->trace_buffer); |
2623 | else | 2923 | else |
2624 | tracing_reset(&global_trace, cpu); | 2924 | tracing_reset(&tr->trace_buffer, tc->cpu); |
2625 | } | 2925 | } |
2626 | 2926 | ||
2627 | if (file->f_mode & FMODE_READ) { | 2927 | if (file->f_mode & FMODE_READ) { |
@@ -2768,8 +3068,9 @@ static ssize_t | |||
2768 | tracing_cpumask_write(struct file *filp, const char __user *ubuf, | 3068 | tracing_cpumask_write(struct file *filp, const char __user *ubuf, |
2769 | size_t count, loff_t *ppos) | 3069 | size_t count, loff_t *ppos) |
2770 | { | 3070 | { |
2771 | int err, cpu; | 3071 | struct trace_array *tr = filp->private_data; |
2772 | cpumask_var_t tracing_cpumask_new; | 3072 | cpumask_var_t tracing_cpumask_new; |
3073 | int err, cpu; | ||
2773 | 3074 | ||
2774 | if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) | 3075 | if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) |
2775 | return -ENOMEM; | 3076 | return -ENOMEM; |
@@ -2789,13 +3090,13 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
2789 | */ | 3090 | */ |
2790 | if (cpumask_test_cpu(cpu, tracing_cpumask) && | 3091 | if (cpumask_test_cpu(cpu, tracing_cpumask) && |
2791 | !cpumask_test_cpu(cpu, tracing_cpumask_new)) { | 3092 | !cpumask_test_cpu(cpu, tracing_cpumask_new)) { |
2792 | atomic_inc(&global_trace.data[cpu]->disabled); | 3093 | atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); |
2793 | ring_buffer_record_disable_cpu(global_trace.buffer, cpu); | 3094 | ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu); |
2794 | } | 3095 | } |
2795 | if (!cpumask_test_cpu(cpu, tracing_cpumask) && | 3096 | if (!cpumask_test_cpu(cpu, tracing_cpumask) && |
2796 | cpumask_test_cpu(cpu, tracing_cpumask_new)) { | 3097 | cpumask_test_cpu(cpu, tracing_cpumask_new)) { |
2797 | atomic_dec(&global_trace.data[cpu]->disabled); | 3098 | atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); |
2798 | ring_buffer_record_enable_cpu(global_trace.buffer, cpu); | 3099 | ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu); |
2799 | } | 3100 | } |
2800 | } | 3101 | } |
2801 | arch_spin_unlock(&ftrace_max_lock); | 3102 | arch_spin_unlock(&ftrace_max_lock); |
@@ -2824,12 +3125,13 @@ static const struct file_operations tracing_cpumask_fops = { | |||
2824 | static int tracing_trace_options_show(struct seq_file *m, void *v) | 3125 | static int tracing_trace_options_show(struct seq_file *m, void *v) |
2825 | { | 3126 | { |
2826 | struct tracer_opt *trace_opts; | 3127 | struct tracer_opt *trace_opts; |
3128 | struct trace_array *tr = m->private; | ||
2827 | u32 tracer_flags; | 3129 | u32 tracer_flags; |
2828 | int i; | 3130 | int i; |
2829 | 3131 | ||
2830 | mutex_lock(&trace_types_lock); | 3132 | mutex_lock(&trace_types_lock); |
2831 | tracer_flags = current_trace->flags->val; | 3133 | tracer_flags = tr->current_trace->flags->val; |
2832 | trace_opts = current_trace->flags->opts; | 3134 | trace_opts = tr->current_trace->flags->opts; |
2833 | 3135 | ||
2834 | for (i = 0; trace_options[i]; i++) { | 3136 | for (i = 0; trace_options[i]; i++) { |
2835 | if (trace_flags & (1 << i)) | 3137 | if (trace_flags & (1 << i)) |
@@ -2893,15 +3195,15 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) | |||
2893 | return 0; | 3195 | return 0; |
2894 | } | 3196 | } |
2895 | 3197 | ||
2896 | int set_tracer_flag(unsigned int mask, int enabled) | 3198 | int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) |
2897 | { | 3199 | { |
2898 | /* do nothing if flag is already set */ | 3200 | /* do nothing if flag is already set */ |
2899 | if (!!(trace_flags & mask) == !!enabled) | 3201 | if (!!(trace_flags & mask) == !!enabled) |
2900 | return 0; | 3202 | return 0; |
2901 | 3203 | ||
2902 | /* Give the tracer a chance to approve the change */ | 3204 | /* Give the tracer a chance to approve the change */ |
2903 | if (current_trace->flag_changed) | 3205 | if (tr->current_trace->flag_changed) |
2904 | if (current_trace->flag_changed(current_trace, mask, !!enabled)) | 3206 | if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled)) |
2905 | return -EINVAL; | 3207 | return -EINVAL; |
2906 | 3208 | ||
2907 | if (enabled) | 3209 | if (enabled) |
@@ -2913,9 +3215,9 @@ int set_tracer_flag(unsigned int mask, int enabled) | |||
2913 | trace_event_enable_cmd_record(enabled); | 3215 | trace_event_enable_cmd_record(enabled); |
2914 | 3216 | ||
2915 | if (mask == TRACE_ITER_OVERWRITE) { | 3217 | if (mask == TRACE_ITER_OVERWRITE) { |
2916 | ring_buffer_change_overwrite(global_trace.buffer, enabled); | 3218 | ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled); |
2917 | #ifdef CONFIG_TRACER_MAX_TRACE | 3219 | #ifdef CONFIG_TRACER_MAX_TRACE |
2918 | ring_buffer_change_overwrite(max_tr.buffer, enabled); | 3220 | ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); |
2919 | #endif | 3221 | #endif |
2920 | } | 3222 | } |
2921 | 3223 | ||
@@ -2925,7 +3227,7 @@ int set_tracer_flag(unsigned int mask, int enabled) | |||
2925 | return 0; | 3227 | return 0; |
2926 | } | 3228 | } |
2927 | 3229 | ||
2928 | static int trace_set_options(char *option) | 3230 | static int trace_set_options(struct trace_array *tr, char *option) |
2929 | { | 3231 | { |
2930 | char *cmp; | 3232 | char *cmp; |
2931 | int neg = 0; | 3233 | int neg = 0; |
@@ -2943,14 +3245,14 @@ static int trace_set_options(char *option) | |||
2943 | 3245 | ||
2944 | for (i = 0; trace_options[i]; i++) { | 3246 | for (i = 0; trace_options[i]; i++) { |
2945 | if (strcmp(cmp, trace_options[i]) == 0) { | 3247 | if (strcmp(cmp, trace_options[i]) == 0) { |
2946 | ret = set_tracer_flag(1 << i, !neg); | 3248 | ret = set_tracer_flag(tr, 1 << i, !neg); |
2947 | break; | 3249 | break; |
2948 | } | 3250 | } |
2949 | } | 3251 | } |
2950 | 3252 | ||
2951 | /* If no option could be set, test the specific tracer options */ | 3253 | /* If no option could be set, test the specific tracer options */ |
2952 | if (!trace_options[i]) | 3254 | if (!trace_options[i]) |
2953 | ret = set_tracer_option(current_trace, cmp, neg); | 3255 | ret = set_tracer_option(tr->current_trace, cmp, neg); |
2954 | 3256 | ||
2955 | mutex_unlock(&trace_types_lock); | 3257 | mutex_unlock(&trace_types_lock); |
2956 | 3258 | ||
@@ -2961,6 +3263,8 @@ static ssize_t | |||
2961 | tracing_trace_options_write(struct file *filp, const char __user *ubuf, | 3263 | tracing_trace_options_write(struct file *filp, const char __user *ubuf, |
2962 | size_t cnt, loff_t *ppos) | 3264 | size_t cnt, loff_t *ppos) |
2963 | { | 3265 | { |
3266 | struct seq_file *m = filp->private_data; | ||
3267 | struct trace_array *tr = m->private; | ||
2964 | char buf[64]; | 3268 | char buf[64]; |
2965 | int ret; | 3269 | int ret; |
2966 | 3270 | ||
@@ -2972,7 +3276,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, | |||
2972 | 3276 | ||
2973 | buf[cnt] = 0; | 3277 | buf[cnt] = 0; |
2974 | 3278 | ||
2975 | ret = trace_set_options(buf); | 3279 | ret = trace_set_options(tr, buf); |
2976 | if (ret < 0) | 3280 | if (ret < 0) |
2977 | return ret; | 3281 | return ret; |
2978 | 3282 | ||
@@ -2985,7 +3289,8 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file) | |||
2985 | { | 3289 | { |
2986 | if (tracing_disabled) | 3290 | if (tracing_disabled) |
2987 | return -ENODEV; | 3291 | return -ENODEV; |
2988 | return single_open(file, tracing_trace_options_show, NULL); | 3292 | |
3293 | return single_open(file, tracing_trace_options_show, inode->i_private); | ||
2989 | } | 3294 | } |
2990 | 3295 | ||
2991 | static const struct file_operations tracing_iter_fops = { | 3296 | static const struct file_operations tracing_iter_fops = { |
@@ -2998,20 +3303,84 @@ static const struct file_operations tracing_iter_fops = { | |||
2998 | 3303 | ||
2999 | static const char readme_msg[] = | 3304 | static const char readme_msg[] = |
3000 | "tracing mini-HOWTO:\n\n" | 3305 | "tracing mini-HOWTO:\n\n" |
3001 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" | 3306 | "# echo 0 > tracing_on : quick way to disable tracing\n" |
3002 | "# cat /sys/kernel/debug/tracing/available_tracers\n" | 3307 | "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" |
3003 | "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n" | 3308 | " Important files:\n" |
3004 | "# cat /sys/kernel/debug/tracing/current_tracer\n" | 3309 | " trace\t\t\t- The static contents of the buffer\n" |
3005 | "nop\n" | 3310 | "\t\t\t To clear the buffer write into this file: echo > trace\n" |
3006 | "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n" | 3311 | " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" |
3007 | "# cat /sys/kernel/debug/tracing/current_tracer\n" | 3312 | " current_tracer\t- function and latency tracers\n" |
3008 | "wakeup\n" | 3313 | " available_tracers\t- list of configured tracers for current_tracer\n" |
3009 | "# cat /sys/kernel/debug/tracing/trace_options\n" | 3314 | " buffer_size_kb\t- view and modify size of per cpu buffer\n" |
3010 | "noprint-parent nosym-offset nosym-addr noverbose\n" | 3315 | " buffer_total_size_kb - view total size of all cpu buffers\n\n" |
3011 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" | 3316 | " trace_clock\t\t-change the clock used to order events\n" |
3012 | "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n" | 3317 | " local: Per cpu clock but may not be synced across CPUs\n" |
3013 | "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" | 3318 | " global: Synced across CPUs but slows tracing down.\n" |
3014 | "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n" | 3319 | " counter: Not a clock, but just an increment\n" |
3320 | " uptime: Jiffy counter from time of boot\n" | ||
3321 | " perf: Same clock that perf events use\n" | ||
3322 | #ifdef CONFIG_X86_64 | ||
3323 | " x86-tsc: TSC cycle counter\n" | ||
3324 | #endif | ||
3325 | "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" | ||
3326 | " tracing_cpumask\t- Limit which CPUs to trace\n" | ||
3327 | " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" | ||
3328 | "\t\t\t Remove sub-buffer with rmdir\n" | ||
3329 | " trace_options\t\t- Set format or modify how tracing happens\n" | ||
3330 | "\t\t\t Disable an option by adding a suffix 'no' to the option name\n" | ||
3331 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
3332 | "\n available_filter_functions - list of functions that can be filtered on\n" | ||
3333 | " set_ftrace_filter\t- echo function name in here to only trace these functions\n" | ||
3334 | " accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" | ||
3335 | " modules: Can select a group via module\n" | ||
3336 | " Format: :mod:<module-name>\n" | ||
3337 | " example: echo :mod:ext3 > set_ftrace_filter\n" | ||
3338 | " triggers: a command to perform when function is hit\n" | ||
3339 | " Format: <function>:<trigger>[:count]\n" | ||
3340 | " trigger: traceon, traceoff\n" | ||
3341 | " enable_event:<system>:<event>\n" | ||
3342 | " disable_event:<system>:<event>\n" | ||
3343 | #ifdef CONFIG_STACKTRACE | ||
3344 | " stacktrace\n" | ||
3345 | #endif | ||
3346 | #ifdef CONFIG_TRACER_SNAPSHOT | ||
3347 | " snapshot\n" | ||
3348 | #endif | ||
3349 | " example: echo do_fault:traceoff > set_ftrace_filter\n" | ||
3350 | " echo do_trap:traceoff:3 > set_ftrace_filter\n" | ||
3351 | " The first one will disable tracing every time do_fault is hit\n" | ||
3352 | " The second will disable tracing at most 3 times when do_trap is hit\n" | ||
3353 | " The first time do trap is hit and it disables tracing, the counter\n" | ||
3354 | " will decrement to 2. If tracing is already disabled, the counter\n" | ||
3355 | " will not decrement. It only decrements when the trigger did work\n" | ||
3356 | " To remove trigger without count:\n" | ||
3357 | " echo '!<function>:<trigger> > set_ftrace_filter\n" | ||
3358 | " To remove trigger with a count:\n" | ||
3359 | " echo '!<function>:<trigger>:0 > set_ftrace_filter\n" | ||
3360 | " set_ftrace_notrace\t- echo function name in here to never trace.\n" | ||
3361 | " accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" | ||
3362 | " modules: Can select a group via module command :mod:\n" | ||
3363 | " Does not accept triggers\n" | ||
3364 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
3365 | #ifdef CONFIG_FUNCTION_TRACER | ||
3366 | " set_ftrace_pid\t- Write pid(s) to only function trace those pids (function)\n" | ||
3367 | #endif | ||
3368 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
3369 | " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" | ||
3370 | " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" | ||
3371 | #endif | ||
3372 | #ifdef CONFIG_TRACER_SNAPSHOT | ||
3373 | "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" | ||
3374 | "\t\t\t Read the contents for more information\n" | ||
3375 | #endif | ||
3376 | #ifdef CONFIG_STACKTRACE | ||
3377 | " stack_trace\t\t- Shows the max stack trace when active\n" | ||
3378 | " stack_max_size\t- Shows current max stack size that was traced\n" | ||
3379 | "\t\t\t Write into this file to reset the max size (trigger a new trace)\n" | ||
3380 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
3381 | " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" | ||
3382 | #endif | ||
3383 | #endif /* CONFIG_STACKTRACE */ | ||
3015 | ; | 3384 | ; |
3016 | 3385 | ||
3017 | static ssize_t | 3386 | static ssize_t |
@@ -3083,11 +3452,12 @@ static ssize_t | |||
3083 | tracing_set_trace_read(struct file *filp, char __user *ubuf, | 3452 | tracing_set_trace_read(struct file *filp, char __user *ubuf, |
3084 | size_t cnt, loff_t *ppos) | 3453 | size_t cnt, loff_t *ppos) |
3085 | { | 3454 | { |
3455 | struct trace_array *tr = filp->private_data; | ||
3086 | char buf[MAX_TRACER_SIZE+2]; | 3456 | char buf[MAX_TRACER_SIZE+2]; |
3087 | int r; | 3457 | int r; |
3088 | 3458 | ||
3089 | mutex_lock(&trace_types_lock); | 3459 | mutex_lock(&trace_types_lock); |
3090 | r = sprintf(buf, "%s\n", current_trace->name); | 3460 | r = sprintf(buf, "%s\n", tr->current_trace->name); |
3091 | mutex_unlock(&trace_types_lock); | 3461 | mutex_unlock(&trace_types_lock); |
3092 | 3462 | ||
3093 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 3463 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
@@ -3095,43 +3465,48 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, | |||
3095 | 3465 | ||
3096 | int tracer_init(struct tracer *t, struct trace_array *tr) | 3466 | int tracer_init(struct tracer *t, struct trace_array *tr) |
3097 | { | 3467 | { |
3098 | tracing_reset_online_cpus(tr); | 3468 | tracing_reset_online_cpus(&tr->trace_buffer); |
3099 | return t->init(tr); | 3469 | return t->init(tr); |
3100 | } | 3470 | } |
3101 | 3471 | ||
3102 | static void set_buffer_entries(struct trace_array *tr, unsigned long val) | 3472 | static void set_buffer_entries(struct trace_buffer *buf, unsigned long val) |
3103 | { | 3473 | { |
3104 | int cpu; | 3474 | int cpu; |
3475 | |||
3105 | for_each_tracing_cpu(cpu) | 3476 | for_each_tracing_cpu(cpu) |
3106 | tr->data[cpu]->entries = val; | 3477 | per_cpu_ptr(buf->data, cpu)->entries = val; |
3107 | } | 3478 | } |
3108 | 3479 | ||
3480 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
3109 | /* resize @tr's buffer to the size of @size_tr's entries */ | 3481 | /* resize @tr's buffer to the size of @size_tr's entries */ |
3110 | static int resize_buffer_duplicate_size(struct trace_array *tr, | 3482 | static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, |
3111 | struct trace_array *size_tr, int cpu_id) | 3483 | struct trace_buffer *size_buf, int cpu_id) |
3112 | { | 3484 | { |
3113 | int cpu, ret = 0; | 3485 | int cpu, ret = 0; |
3114 | 3486 | ||
3115 | if (cpu_id == RING_BUFFER_ALL_CPUS) { | 3487 | if (cpu_id == RING_BUFFER_ALL_CPUS) { |
3116 | for_each_tracing_cpu(cpu) { | 3488 | for_each_tracing_cpu(cpu) { |
3117 | ret = ring_buffer_resize(tr->buffer, | 3489 | ret = ring_buffer_resize(trace_buf->buffer, |
3118 | size_tr->data[cpu]->entries, cpu); | 3490 | per_cpu_ptr(size_buf->data, cpu)->entries, cpu); |
3119 | if (ret < 0) | 3491 | if (ret < 0) |
3120 | break; | 3492 | break; |
3121 | tr->data[cpu]->entries = size_tr->data[cpu]->entries; | 3493 | per_cpu_ptr(trace_buf->data, cpu)->entries = |
3494 | per_cpu_ptr(size_buf->data, cpu)->entries; | ||
3122 | } | 3495 | } |
3123 | } else { | 3496 | } else { |
3124 | ret = ring_buffer_resize(tr->buffer, | 3497 | ret = ring_buffer_resize(trace_buf->buffer, |
3125 | size_tr->data[cpu_id]->entries, cpu_id); | 3498 | per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); |
3126 | if (ret == 0) | 3499 | if (ret == 0) |
3127 | tr->data[cpu_id]->entries = | 3500 | per_cpu_ptr(trace_buf->data, cpu_id)->entries = |
3128 | size_tr->data[cpu_id]->entries; | 3501 | per_cpu_ptr(size_buf->data, cpu_id)->entries; |
3129 | } | 3502 | } |
3130 | 3503 | ||
3131 | return ret; | 3504 | return ret; |
3132 | } | 3505 | } |
3506 | #endif /* CONFIG_TRACER_MAX_TRACE */ | ||
3133 | 3507 | ||
3134 | static int __tracing_resize_ring_buffer(unsigned long size, int cpu) | 3508 | static int __tracing_resize_ring_buffer(struct trace_array *tr, |
3509 | unsigned long size, int cpu) | ||
3135 | { | 3510 | { |
3136 | int ret; | 3511 | int ret; |
3137 | 3512 | ||
@@ -3140,23 +3515,25 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu) | |||
3140 | * we use the size that was given, and we can forget about | 3515 | * we use the size that was given, and we can forget about |
3141 | * expanding it later. | 3516 | * expanding it later. |
3142 | */ | 3517 | */ |
3143 | ring_buffer_expanded = 1; | 3518 | ring_buffer_expanded = true; |
3144 | 3519 | ||
3145 | /* May be called before buffers are initialized */ | 3520 | /* May be called before buffers are initialized */ |
3146 | if (!global_trace.buffer) | 3521 | if (!tr->trace_buffer.buffer) |
3147 | return 0; | 3522 | return 0; |
3148 | 3523 | ||
3149 | ret = ring_buffer_resize(global_trace.buffer, size, cpu); | 3524 | ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu); |
3150 | if (ret < 0) | 3525 | if (ret < 0) |
3151 | return ret; | 3526 | return ret; |
3152 | 3527 | ||
3153 | if (!current_trace->use_max_tr) | 3528 | #ifdef CONFIG_TRACER_MAX_TRACE |
3529 | if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) || | ||
3530 | !tr->current_trace->use_max_tr) | ||
3154 | goto out; | 3531 | goto out; |
3155 | 3532 | ||
3156 | ret = ring_buffer_resize(max_tr.buffer, size, cpu); | 3533 | ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); |
3157 | if (ret < 0) { | 3534 | if (ret < 0) { |
3158 | int r = resize_buffer_duplicate_size(&global_trace, | 3535 | int r = resize_buffer_duplicate_size(&tr->trace_buffer, |
3159 | &global_trace, cpu); | 3536 | &tr->trace_buffer, cpu); |
3160 | if (r < 0) { | 3537 | if (r < 0) { |
3161 | /* | 3538 | /* |
3162 | * AARGH! We are left with different | 3539 | * AARGH! We are left with different |
@@ -3179,20 +3556,23 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu) | |||
3179 | } | 3556 | } |
3180 | 3557 | ||
3181 | if (cpu == RING_BUFFER_ALL_CPUS) | 3558 | if (cpu == RING_BUFFER_ALL_CPUS) |
3182 | set_buffer_entries(&max_tr, size); | 3559 | set_buffer_entries(&tr->max_buffer, size); |
3183 | else | 3560 | else |
3184 | max_tr.data[cpu]->entries = size; | 3561 | per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size; |
3185 | 3562 | ||
3186 | out: | 3563 | out: |
3564 | #endif /* CONFIG_TRACER_MAX_TRACE */ | ||
3565 | |||
3187 | if (cpu == RING_BUFFER_ALL_CPUS) | 3566 | if (cpu == RING_BUFFER_ALL_CPUS) |
3188 | set_buffer_entries(&global_trace, size); | 3567 | set_buffer_entries(&tr->trace_buffer, size); |
3189 | else | 3568 | else |
3190 | global_trace.data[cpu]->entries = size; | 3569 | per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size; |
3191 | 3570 | ||
3192 | return ret; | 3571 | return ret; |
3193 | } | 3572 | } |
3194 | 3573 | ||
3195 | static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id) | 3574 | static ssize_t tracing_resize_ring_buffer(struct trace_array *tr, |
3575 | unsigned long size, int cpu_id) | ||
3196 | { | 3576 | { |
3197 | int ret = size; | 3577 | int ret = size; |
3198 | 3578 | ||
@@ -3206,7 +3586,7 @@ static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id) | |||
3206 | } | 3586 | } |
3207 | } | 3587 | } |
3208 | 3588 | ||
3209 | ret = __tracing_resize_ring_buffer(size, cpu_id); | 3589 | ret = __tracing_resize_ring_buffer(tr, size, cpu_id); |
3210 | if (ret < 0) | 3590 | if (ret < 0) |
3211 | ret = -ENOMEM; | 3591 | ret = -ENOMEM; |
3212 | 3592 | ||
@@ -3233,7 +3613,7 @@ int tracing_update_buffers(void) | |||
3233 | 3613 | ||
3234 | mutex_lock(&trace_types_lock); | 3614 | mutex_lock(&trace_types_lock); |
3235 | if (!ring_buffer_expanded) | 3615 | if (!ring_buffer_expanded) |
3236 | ret = __tracing_resize_ring_buffer(trace_buf_size, | 3616 | ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size, |
3237 | RING_BUFFER_ALL_CPUS); | 3617 | RING_BUFFER_ALL_CPUS); |
3238 | mutex_unlock(&trace_types_lock); | 3618 | mutex_unlock(&trace_types_lock); |
3239 | 3619 | ||
@@ -3243,7 +3623,7 @@ int tracing_update_buffers(void) | |||
3243 | struct trace_option_dentry; | 3623 | struct trace_option_dentry; |
3244 | 3624 | ||
3245 | static struct trace_option_dentry * | 3625 | static struct trace_option_dentry * |
3246 | create_trace_option_files(struct tracer *tracer); | 3626 | create_trace_option_files(struct trace_array *tr, struct tracer *tracer); |
3247 | 3627 | ||
3248 | static void | 3628 | static void |
3249 | destroy_trace_option_files(struct trace_option_dentry *topts); | 3629 | destroy_trace_option_files(struct trace_option_dentry *topts); |
@@ -3253,13 +3633,15 @@ static int tracing_set_tracer(const char *buf) | |||
3253 | static struct trace_option_dentry *topts; | 3633 | static struct trace_option_dentry *topts; |
3254 | struct trace_array *tr = &global_trace; | 3634 | struct trace_array *tr = &global_trace; |
3255 | struct tracer *t; | 3635 | struct tracer *t; |
3636 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
3256 | bool had_max_tr; | 3637 | bool had_max_tr; |
3638 | #endif | ||
3257 | int ret = 0; | 3639 | int ret = 0; |
3258 | 3640 | ||
3259 | mutex_lock(&trace_types_lock); | 3641 | mutex_lock(&trace_types_lock); |
3260 | 3642 | ||
3261 | if (!ring_buffer_expanded) { | 3643 | if (!ring_buffer_expanded) { |
3262 | ret = __tracing_resize_ring_buffer(trace_buf_size, | 3644 | ret = __tracing_resize_ring_buffer(tr, trace_buf_size, |
3263 | RING_BUFFER_ALL_CPUS); | 3645 | RING_BUFFER_ALL_CPUS); |
3264 | if (ret < 0) | 3646 | if (ret < 0) |
3265 | goto out; | 3647 | goto out; |
@@ -3274,18 +3656,21 @@ static int tracing_set_tracer(const char *buf) | |||
3274 | ret = -EINVAL; | 3656 | ret = -EINVAL; |
3275 | goto out; | 3657 | goto out; |
3276 | } | 3658 | } |
3277 | if (t == current_trace) | 3659 | if (t == tr->current_trace) |
3278 | goto out; | 3660 | goto out; |
3279 | 3661 | ||
3280 | trace_branch_disable(); | 3662 | trace_branch_disable(); |
3281 | 3663 | ||
3282 | current_trace->enabled = false; | 3664 | tr->current_trace->enabled = false; |
3283 | 3665 | ||
3284 | if (current_trace->reset) | 3666 | if (tr->current_trace->reset) |
3285 | current_trace->reset(tr); | 3667 | tr->current_trace->reset(tr); |
3286 | 3668 | ||
3287 | had_max_tr = current_trace->allocated_snapshot; | 3669 | /* Current trace needs to be nop_trace before synchronize_sched */ |
3288 | current_trace = &nop_trace; | 3670 | tr->current_trace = &nop_trace; |
3671 | |||
3672 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
3673 | had_max_tr = tr->allocated_snapshot; | ||
3289 | 3674 | ||
3290 | if (had_max_tr && !t->use_max_tr) { | 3675 | if (had_max_tr && !t->use_max_tr) { |
3291 | /* | 3676 | /* |
@@ -3296,27 +3681,20 @@ static int tracing_set_tracer(const char *buf) | |||
3296 | * so a synchronized_sched() is sufficient. | 3681 | * so a synchronized_sched() is sufficient. |
3297 | */ | 3682 | */ |
3298 | synchronize_sched(); | 3683 | synchronize_sched(); |
3299 | /* | 3684 | free_snapshot(tr); |
3300 | * We don't free the ring buffer. instead, resize it because | ||
3301 | * The max_tr ring buffer has some state (e.g. ring->clock) and | ||
3302 | * we want preserve it. | ||
3303 | */ | ||
3304 | ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); | ||
3305 | set_buffer_entries(&max_tr, 1); | ||
3306 | tracing_reset_online_cpus(&max_tr); | ||
3307 | current_trace->allocated_snapshot = false; | ||
3308 | } | 3685 | } |
3686 | #endif | ||
3309 | destroy_trace_option_files(topts); | 3687 | destroy_trace_option_files(topts); |
3310 | 3688 | ||
3311 | topts = create_trace_option_files(t); | 3689 | topts = create_trace_option_files(tr, t); |
3690 | |||
3691 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
3312 | if (t->use_max_tr && !had_max_tr) { | 3692 | if (t->use_max_tr && !had_max_tr) { |
3313 | /* we need to make per cpu buffer sizes equivalent */ | 3693 | ret = alloc_snapshot(tr); |
3314 | ret = resize_buffer_duplicate_size(&max_tr, &global_trace, | ||
3315 | RING_BUFFER_ALL_CPUS); | ||
3316 | if (ret < 0) | 3694 | if (ret < 0) |
3317 | goto out; | 3695 | goto out; |
3318 | t->allocated_snapshot = true; | ||
3319 | } | 3696 | } |
3697 | #endif | ||
3320 | 3698 | ||
3321 | if (t->init) { | 3699 | if (t->init) { |
3322 | ret = tracer_init(t, tr); | 3700 | ret = tracer_init(t, tr); |
@@ -3324,8 +3702,8 @@ static int tracing_set_tracer(const char *buf) | |||
3324 | goto out; | 3702 | goto out; |
3325 | } | 3703 | } |
3326 | 3704 | ||
3327 | current_trace = t; | 3705 | tr->current_trace = t; |
3328 | current_trace->enabled = true; | 3706 | tr->current_trace->enabled = true; |
3329 | trace_branch_enable(tr); | 3707 | trace_branch_enable(tr); |
3330 | out: | 3708 | out: |
3331 | mutex_unlock(&trace_types_lock); | 3709 | mutex_unlock(&trace_types_lock); |
@@ -3399,7 +3777,8 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, | |||
3399 | 3777 | ||
3400 | static int tracing_open_pipe(struct inode *inode, struct file *filp) | 3778 | static int tracing_open_pipe(struct inode *inode, struct file *filp) |
3401 | { | 3779 | { |
3402 | long cpu_file = (long) inode->i_private; | 3780 | struct trace_cpu *tc = inode->i_private; |
3781 | struct trace_array *tr = tc->tr; | ||
3403 | struct trace_iterator *iter; | 3782 | struct trace_iterator *iter; |
3404 | int ret = 0; | 3783 | int ret = 0; |
3405 | 3784 | ||
@@ -3424,7 +3803,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
3424 | ret = -ENOMEM; | 3803 | ret = -ENOMEM; |
3425 | goto fail; | 3804 | goto fail; |
3426 | } | 3805 | } |
3427 | *iter->trace = *current_trace; | 3806 | *iter->trace = *tr->current_trace; |
3428 | 3807 | ||
3429 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { | 3808 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { |
3430 | ret = -ENOMEM; | 3809 | ret = -ENOMEM; |
@@ -3441,8 +3820,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
3441 | if (trace_clocks[trace_clock_id].in_ns) | 3820 | if (trace_clocks[trace_clock_id].in_ns) |
3442 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; | 3821 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; |
3443 | 3822 | ||
3444 | iter->cpu_file = cpu_file; | 3823 | iter->cpu_file = tc->cpu; |
3445 | iter->tr = &global_trace; | 3824 | iter->tr = tc->tr; |
3825 | iter->trace_buffer = &tc->tr->trace_buffer; | ||
3446 | mutex_init(&iter->mutex); | 3826 | mutex_init(&iter->mutex); |
3447 | filp->private_data = iter; | 3827 | filp->private_data = iter; |
3448 | 3828 | ||
@@ -3481,24 +3861,28 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
3481 | } | 3861 | } |
3482 | 3862 | ||
3483 | static unsigned int | 3863 | static unsigned int |
3484 | tracing_poll_pipe(struct file *filp, poll_table *poll_table) | 3864 | trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) |
3485 | { | 3865 | { |
3486 | struct trace_iterator *iter = filp->private_data; | 3866 | /* Iterators are static, they should be filled or empty */ |
3867 | if (trace_buffer_iter(iter, iter->cpu_file)) | ||
3868 | return POLLIN | POLLRDNORM; | ||
3487 | 3869 | ||
3488 | if (trace_flags & TRACE_ITER_BLOCK) { | 3870 | if (trace_flags & TRACE_ITER_BLOCK) |
3489 | /* | 3871 | /* |
3490 | * Always select as readable when in blocking mode | 3872 | * Always select as readable when in blocking mode |
3491 | */ | 3873 | */ |
3492 | return POLLIN | POLLRDNORM; | 3874 | return POLLIN | POLLRDNORM; |
3493 | } else { | 3875 | else |
3494 | if (!trace_empty(iter)) | 3876 | return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file, |
3495 | return POLLIN | POLLRDNORM; | 3877 | filp, poll_table); |
3496 | poll_wait(filp, &trace_wait, poll_table); | 3878 | } |
3497 | if (!trace_empty(iter)) | ||
3498 | return POLLIN | POLLRDNORM; | ||
3499 | 3879 | ||
3500 | return 0; | 3880 | static unsigned int |
3501 | } | 3881 | tracing_poll_pipe(struct file *filp, poll_table *poll_table) |
3882 | { | ||
3883 | struct trace_iterator *iter = filp->private_data; | ||
3884 | |||
3885 | return trace_poll(iter, filp, poll_table); | ||
3502 | } | 3886 | } |
3503 | 3887 | ||
3504 | /* | 3888 | /* |
@@ -3564,6 +3948,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, | |||
3564 | size_t cnt, loff_t *ppos) | 3948 | size_t cnt, loff_t *ppos) |
3565 | { | 3949 | { |
3566 | struct trace_iterator *iter = filp->private_data; | 3950 | struct trace_iterator *iter = filp->private_data; |
3951 | struct trace_array *tr = iter->tr; | ||
3567 | ssize_t sret; | 3952 | ssize_t sret; |
3568 | 3953 | ||
3569 | /* return any leftover data */ | 3954 | /* return any leftover data */ |
@@ -3575,8 +3960,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, | |||
3575 | 3960 | ||
3576 | /* copy the tracer to avoid using a global lock all around */ | 3961 | /* copy the tracer to avoid using a global lock all around */ |
3577 | mutex_lock(&trace_types_lock); | 3962 | mutex_lock(&trace_types_lock); |
3578 | if (unlikely(iter->trace->name != current_trace->name)) | 3963 | if (unlikely(iter->trace->name != tr->current_trace->name)) |
3579 | *iter->trace = *current_trace; | 3964 | *iter->trace = *tr->current_trace; |
3580 | mutex_unlock(&trace_types_lock); | 3965 | mutex_unlock(&trace_types_lock); |
3581 | 3966 | ||
3582 | /* | 3967 | /* |
@@ -3732,6 +4117,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3732 | .ops = &tracing_pipe_buf_ops, | 4117 | .ops = &tracing_pipe_buf_ops, |
3733 | .spd_release = tracing_spd_release_pipe, | 4118 | .spd_release = tracing_spd_release_pipe, |
3734 | }; | 4119 | }; |
4120 | struct trace_array *tr = iter->tr; | ||
3735 | ssize_t ret; | 4121 | ssize_t ret; |
3736 | size_t rem; | 4122 | size_t rem; |
3737 | unsigned int i; | 4123 | unsigned int i; |
@@ -3741,8 +4127,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3741 | 4127 | ||
3742 | /* copy the tracer to avoid using a global lock all around */ | 4128 | /* copy the tracer to avoid using a global lock all around */ |
3743 | mutex_lock(&trace_types_lock); | 4129 | mutex_lock(&trace_types_lock); |
3744 | if (unlikely(iter->trace->name != current_trace->name)) | 4130 | if (unlikely(iter->trace->name != tr->current_trace->name)) |
3745 | *iter->trace = *current_trace; | 4131 | *iter->trace = *tr->current_trace; |
3746 | mutex_unlock(&trace_types_lock); | 4132 | mutex_unlock(&trace_types_lock); |
3747 | 4133 | ||
3748 | mutex_lock(&iter->mutex); | 4134 | mutex_lock(&iter->mutex); |
@@ -3804,43 +4190,19 @@ out_err: | |||
3804 | goto out; | 4190 | goto out; |
3805 | } | 4191 | } |
3806 | 4192 | ||
3807 | struct ftrace_entries_info { | ||
3808 | struct trace_array *tr; | ||
3809 | int cpu; | ||
3810 | }; | ||
3811 | |||
3812 | static int tracing_entries_open(struct inode *inode, struct file *filp) | ||
3813 | { | ||
3814 | struct ftrace_entries_info *info; | ||
3815 | |||
3816 | if (tracing_disabled) | ||
3817 | return -ENODEV; | ||
3818 | |||
3819 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
3820 | if (!info) | ||
3821 | return -ENOMEM; | ||
3822 | |||
3823 | info->tr = &global_trace; | ||
3824 | info->cpu = (unsigned long)inode->i_private; | ||
3825 | |||
3826 | filp->private_data = info; | ||
3827 | |||
3828 | return 0; | ||
3829 | } | ||
3830 | |||
3831 | static ssize_t | 4193 | static ssize_t |
3832 | tracing_entries_read(struct file *filp, char __user *ubuf, | 4194 | tracing_entries_read(struct file *filp, char __user *ubuf, |
3833 | size_t cnt, loff_t *ppos) | 4195 | size_t cnt, loff_t *ppos) |
3834 | { | 4196 | { |
3835 | struct ftrace_entries_info *info = filp->private_data; | 4197 | struct trace_cpu *tc = filp->private_data; |
3836 | struct trace_array *tr = info->tr; | 4198 | struct trace_array *tr = tc->tr; |
3837 | char buf[64]; | 4199 | char buf[64]; |
3838 | int r = 0; | 4200 | int r = 0; |
3839 | ssize_t ret; | 4201 | ssize_t ret; |
3840 | 4202 | ||
3841 | mutex_lock(&trace_types_lock); | 4203 | mutex_lock(&trace_types_lock); |
3842 | 4204 | ||
3843 | if (info->cpu == RING_BUFFER_ALL_CPUS) { | 4205 | if (tc->cpu == RING_BUFFER_ALL_CPUS) { |
3844 | int cpu, buf_size_same; | 4206 | int cpu, buf_size_same; |
3845 | unsigned long size; | 4207 | unsigned long size; |
3846 | 4208 | ||
@@ -3850,8 +4212,8 @@ tracing_entries_read(struct file *filp, char __user *ubuf, | |||
3850 | for_each_tracing_cpu(cpu) { | 4212 | for_each_tracing_cpu(cpu) { |
3851 | /* fill in the size from first enabled cpu */ | 4213 | /* fill in the size from first enabled cpu */ |
3852 | if (size == 0) | 4214 | if (size == 0) |
3853 | size = tr->data[cpu]->entries; | 4215 | size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries; |
3854 | if (size != tr->data[cpu]->entries) { | 4216 | if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) { |
3855 | buf_size_same = 0; | 4217 | buf_size_same = 0; |
3856 | break; | 4218 | break; |
3857 | } | 4219 | } |
@@ -3867,7 +4229,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, | |||
3867 | } else | 4229 | } else |
3868 | r = sprintf(buf, "X\n"); | 4230 | r = sprintf(buf, "X\n"); |
3869 | } else | 4231 | } else |
3870 | r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10); | 4232 | r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10); |
3871 | 4233 | ||
3872 | mutex_unlock(&trace_types_lock); | 4234 | mutex_unlock(&trace_types_lock); |
3873 | 4235 | ||
@@ -3879,7 +4241,7 @@ static ssize_t | |||
3879 | tracing_entries_write(struct file *filp, const char __user *ubuf, | 4241 | tracing_entries_write(struct file *filp, const char __user *ubuf, |
3880 | size_t cnt, loff_t *ppos) | 4242 | size_t cnt, loff_t *ppos) |
3881 | { | 4243 | { |
3882 | struct ftrace_entries_info *info = filp->private_data; | 4244 | struct trace_cpu *tc = filp->private_data; |
3883 | unsigned long val; | 4245 | unsigned long val; |
3884 | int ret; | 4246 | int ret; |
3885 | 4247 | ||
@@ -3894,7 +4256,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, | |||
3894 | /* value is in KB */ | 4256 | /* value is in KB */ |
3895 | val <<= 10; | 4257 | val <<= 10; |
3896 | 4258 | ||
3897 | ret = tracing_resize_ring_buffer(val, info->cpu); | 4259 | ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu); |
3898 | if (ret < 0) | 4260 | if (ret < 0) |
3899 | return ret; | 4261 | return ret; |
3900 | 4262 | ||
@@ -3903,16 +4265,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, | |||
3903 | return cnt; | 4265 | return cnt; |
3904 | } | 4266 | } |
3905 | 4267 | ||
3906 | static int | ||
3907 | tracing_entries_release(struct inode *inode, struct file *filp) | ||
3908 | { | ||
3909 | struct ftrace_entries_info *info = filp->private_data; | ||
3910 | |||
3911 | kfree(info); | ||
3912 | |||
3913 | return 0; | ||
3914 | } | ||
3915 | |||
3916 | static ssize_t | 4268 | static ssize_t |
3917 | tracing_total_entries_read(struct file *filp, char __user *ubuf, | 4269 | tracing_total_entries_read(struct file *filp, char __user *ubuf, |
3918 | size_t cnt, loff_t *ppos) | 4270 | size_t cnt, loff_t *ppos) |
@@ -3924,7 +4276,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, | |||
3924 | 4276 | ||
3925 | mutex_lock(&trace_types_lock); | 4277 | mutex_lock(&trace_types_lock); |
3926 | for_each_tracing_cpu(cpu) { | 4278 | for_each_tracing_cpu(cpu) { |
3927 | size += tr->data[cpu]->entries >> 10; | 4279 | size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10; |
3928 | if (!ring_buffer_expanded) | 4280 | if (!ring_buffer_expanded) |
3929 | expanded_size += trace_buf_size >> 10; | 4281 | expanded_size += trace_buf_size >> 10; |
3930 | } | 4282 | } |
@@ -3954,11 +4306,13 @@ tracing_free_buffer_write(struct file *filp, const char __user *ubuf, | |||
3954 | static int | 4306 | static int |
3955 | tracing_free_buffer_release(struct inode *inode, struct file *filp) | 4307 | tracing_free_buffer_release(struct inode *inode, struct file *filp) |
3956 | { | 4308 | { |
4309 | struct trace_array *tr = inode->i_private; | ||
4310 | |||
3957 | /* disable tracing ? */ | 4311 | /* disable tracing ? */ |
3958 | if (trace_flags & TRACE_ITER_STOP_ON_FREE) | 4312 | if (trace_flags & TRACE_ITER_STOP_ON_FREE) |
3959 | tracing_off(); | 4313 | tracing_off(); |
3960 | /* resize the ring buffer to 0 */ | 4314 | /* resize the ring buffer to 0 */ |
3961 | tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS); | 4315 | tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); |
3962 | 4316 | ||
3963 | return 0; | 4317 | return 0; |
3964 | } | 4318 | } |
@@ -4027,7 +4381,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
4027 | 4381 | ||
4028 | local_save_flags(irq_flags); | 4382 | local_save_flags(irq_flags); |
4029 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ | 4383 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ |
4030 | buffer = global_trace.buffer; | 4384 | buffer = global_trace.trace_buffer.buffer; |
4031 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, | 4385 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, |
4032 | irq_flags, preempt_count()); | 4386 | irq_flags, preempt_count()); |
4033 | if (!event) { | 4387 | if (!event) { |
@@ -4069,13 +4423,14 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
4069 | 4423 | ||
4070 | static int tracing_clock_show(struct seq_file *m, void *v) | 4424 | static int tracing_clock_show(struct seq_file *m, void *v) |
4071 | { | 4425 | { |
4426 | struct trace_array *tr = m->private; | ||
4072 | int i; | 4427 | int i; |
4073 | 4428 | ||
4074 | for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) | 4429 | for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) |
4075 | seq_printf(m, | 4430 | seq_printf(m, |
4076 | "%s%s%s%s", i ? " " : "", | 4431 | "%s%s%s%s", i ? " " : "", |
4077 | i == trace_clock_id ? "[" : "", trace_clocks[i].name, | 4432 | i == tr->clock_id ? "[" : "", trace_clocks[i].name, |
4078 | i == trace_clock_id ? "]" : ""); | 4433 | i == tr->clock_id ? "]" : ""); |
4079 | seq_putc(m, '\n'); | 4434 | seq_putc(m, '\n'); |
4080 | 4435 | ||
4081 | return 0; | 4436 | return 0; |
@@ -4084,6 +4439,8 @@ static int tracing_clock_show(struct seq_file *m, void *v) | |||
4084 | static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, | 4439 | static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, |
4085 | size_t cnt, loff_t *fpos) | 4440 | size_t cnt, loff_t *fpos) |
4086 | { | 4441 | { |
4442 | struct seq_file *m = filp->private_data; | ||
4443 | struct trace_array *tr = m->private; | ||
4087 | char buf[64]; | 4444 | char buf[64]; |
4088 | const char *clockstr; | 4445 | const char *clockstr; |
4089 | int i; | 4446 | int i; |
@@ -4105,20 +4462,23 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, | |||
4105 | if (i == ARRAY_SIZE(trace_clocks)) | 4462 | if (i == ARRAY_SIZE(trace_clocks)) |
4106 | return -EINVAL; | 4463 | return -EINVAL; |
4107 | 4464 | ||
4108 | trace_clock_id = i; | ||
4109 | |||
4110 | mutex_lock(&trace_types_lock); | 4465 | mutex_lock(&trace_types_lock); |
4111 | 4466 | ||
4112 | ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func); | 4467 | tr->clock_id = i; |
4113 | if (max_tr.buffer) | 4468 | |
4114 | ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func); | 4469 | ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func); |
4115 | 4470 | ||
4116 | /* | 4471 | /* |
4117 | * New clock may not be consistent with the previous clock. | 4472 | * New clock may not be consistent with the previous clock. |
4118 | * Reset the buffer so that it doesn't have incomparable timestamps. | 4473 | * Reset the buffer so that it doesn't have incomparable timestamps. |
4119 | */ | 4474 | */ |
4120 | tracing_reset_online_cpus(&global_trace); | 4475 | tracing_reset_online_cpus(&global_trace.trace_buffer); |
4121 | tracing_reset_online_cpus(&max_tr); | 4476 | |
4477 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
4478 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) | ||
4479 | ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); | ||
4480 | tracing_reset_online_cpus(&global_trace.max_buffer); | ||
4481 | #endif | ||
4122 | 4482 | ||
4123 | mutex_unlock(&trace_types_lock); | 4483 | mutex_unlock(&trace_types_lock); |
4124 | 4484 | ||
@@ -4131,20 +4491,45 @@ static int tracing_clock_open(struct inode *inode, struct file *file) | |||
4131 | { | 4491 | { |
4132 | if (tracing_disabled) | 4492 | if (tracing_disabled) |
4133 | return -ENODEV; | 4493 | return -ENODEV; |
4134 | return single_open(file, tracing_clock_show, NULL); | 4494 | |
4495 | return single_open(file, tracing_clock_show, inode->i_private); | ||
4135 | } | 4496 | } |
4136 | 4497 | ||
4498 | struct ftrace_buffer_info { | ||
4499 | struct trace_iterator iter; | ||
4500 | void *spare; | ||
4501 | unsigned int read; | ||
4502 | }; | ||
4503 | |||
4137 | #ifdef CONFIG_TRACER_SNAPSHOT | 4504 | #ifdef CONFIG_TRACER_SNAPSHOT |
4138 | static int tracing_snapshot_open(struct inode *inode, struct file *file) | 4505 | static int tracing_snapshot_open(struct inode *inode, struct file *file) |
4139 | { | 4506 | { |
4507 | struct trace_cpu *tc = inode->i_private; | ||
4140 | struct trace_iterator *iter; | 4508 | struct trace_iterator *iter; |
4509 | struct seq_file *m; | ||
4141 | int ret = 0; | 4510 | int ret = 0; |
4142 | 4511 | ||
4143 | if (file->f_mode & FMODE_READ) { | 4512 | if (file->f_mode & FMODE_READ) { |
4144 | iter = __tracing_open(inode, file, true); | 4513 | iter = __tracing_open(inode, file, true); |
4145 | if (IS_ERR(iter)) | 4514 | if (IS_ERR(iter)) |
4146 | ret = PTR_ERR(iter); | 4515 | ret = PTR_ERR(iter); |
4516 | } else { | ||
4517 | /* Writes still need the seq_file to hold the private data */ | ||
4518 | m = kzalloc(sizeof(*m), GFP_KERNEL); | ||
4519 | if (!m) | ||
4520 | return -ENOMEM; | ||
4521 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | ||
4522 | if (!iter) { | ||
4523 | kfree(m); | ||
4524 | return -ENOMEM; | ||
4525 | } | ||
4526 | iter->tr = tc->tr; | ||
4527 | iter->trace_buffer = &tc->tr->max_buffer; | ||
4528 | iter->cpu_file = tc->cpu; | ||
4529 | m->private = iter; | ||
4530 | file->private_data = m; | ||
4147 | } | 4531 | } |
4532 | |||
4148 | return ret; | 4533 | return ret; |
4149 | } | 4534 | } |
4150 | 4535 | ||
@@ -4152,6 +4537,9 @@ static ssize_t | |||
4152 | tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, | 4537 | tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, |
4153 | loff_t *ppos) | 4538 | loff_t *ppos) |
4154 | { | 4539 | { |
4540 | struct seq_file *m = filp->private_data; | ||
4541 | struct trace_iterator *iter = m->private; | ||
4542 | struct trace_array *tr = iter->tr; | ||
4155 | unsigned long val; | 4543 | unsigned long val; |
4156 | int ret; | 4544 | int ret; |
4157 | 4545 | ||
@@ -4165,40 +4553,48 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
4165 | 4553 | ||
4166 | mutex_lock(&trace_types_lock); | 4554 | mutex_lock(&trace_types_lock); |
4167 | 4555 | ||
4168 | if (current_trace->use_max_tr) { | 4556 | if (tr->current_trace->use_max_tr) { |
4169 | ret = -EBUSY; | 4557 | ret = -EBUSY; |
4170 | goto out; | 4558 | goto out; |
4171 | } | 4559 | } |
4172 | 4560 | ||
4173 | switch (val) { | 4561 | switch (val) { |
4174 | case 0: | 4562 | case 0: |
4175 | if (current_trace->allocated_snapshot) { | 4563 | if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { |
4176 | /* free spare buffer */ | 4564 | ret = -EINVAL; |
4177 | ring_buffer_resize(max_tr.buffer, 1, | 4565 | break; |
4178 | RING_BUFFER_ALL_CPUS); | ||
4179 | set_buffer_entries(&max_tr, 1); | ||
4180 | tracing_reset_online_cpus(&max_tr); | ||
4181 | current_trace->allocated_snapshot = false; | ||
4182 | } | 4566 | } |
4567 | if (tr->allocated_snapshot) | ||
4568 | free_snapshot(tr); | ||
4183 | break; | 4569 | break; |
4184 | case 1: | 4570 | case 1: |
4185 | if (!current_trace->allocated_snapshot) { | 4571 | /* Only allow per-cpu swap if the ring buffer supports it */ |
4186 | /* allocate spare buffer */ | 4572 | #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP |
4187 | ret = resize_buffer_duplicate_size(&max_tr, | 4573 | if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { |
4188 | &global_trace, RING_BUFFER_ALL_CPUS); | 4574 | ret = -EINVAL; |
4575 | break; | ||
4576 | } | ||
4577 | #endif | ||
4578 | if (!tr->allocated_snapshot) { | ||
4579 | ret = alloc_snapshot(tr); | ||
4189 | if (ret < 0) | 4580 | if (ret < 0) |
4190 | break; | 4581 | break; |
4191 | current_trace->allocated_snapshot = true; | ||
4192 | } | 4582 | } |
4193 | |||
4194 | local_irq_disable(); | 4583 | local_irq_disable(); |
4195 | /* Now, we're going to swap */ | 4584 | /* Now, we're going to swap */ |
4196 | update_max_tr(&global_trace, current, smp_processor_id()); | 4585 | if (iter->cpu_file == RING_BUFFER_ALL_CPUS) |
4586 | update_max_tr(tr, current, smp_processor_id()); | ||
4587 | else | ||
4588 | update_max_tr_single(tr, current, iter->cpu_file); | ||
4197 | local_irq_enable(); | 4589 | local_irq_enable(); |
4198 | break; | 4590 | break; |
4199 | default: | 4591 | default: |
4200 | if (current_trace->allocated_snapshot) | 4592 | if (tr->allocated_snapshot) { |
4201 | tracing_reset_online_cpus(&max_tr); | 4593 | if (iter->cpu_file == RING_BUFFER_ALL_CPUS) |
4594 | tracing_reset_online_cpus(&tr->max_buffer); | ||
4595 | else | ||
4596 | tracing_reset(&tr->max_buffer, iter->cpu_file); | ||
4597 | } | ||
4202 | break; | 4598 | break; |
4203 | } | 4599 | } |
4204 | 4600 | ||
@@ -4210,6 +4606,51 @@ out: | |||
4210 | mutex_unlock(&trace_types_lock); | 4606 | mutex_unlock(&trace_types_lock); |
4211 | return ret; | 4607 | return ret; |
4212 | } | 4608 | } |
4609 | |||
4610 | static int tracing_snapshot_release(struct inode *inode, struct file *file) | ||
4611 | { | ||
4612 | struct seq_file *m = file->private_data; | ||
4613 | |||
4614 | if (file->f_mode & FMODE_READ) | ||
4615 | return tracing_release(inode, file); | ||
4616 | |||
4617 | /* If write only, the seq_file is just a stub */ | ||
4618 | if (m) | ||
4619 | kfree(m->private); | ||
4620 | kfree(m); | ||
4621 | |||
4622 | return 0; | ||
4623 | } | ||
4624 | |||
4625 | static int tracing_buffers_open(struct inode *inode, struct file *filp); | ||
4626 | static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, | ||
4627 | size_t count, loff_t *ppos); | ||
4628 | static int tracing_buffers_release(struct inode *inode, struct file *file); | ||
4629 | static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, | ||
4630 | struct pipe_inode_info *pipe, size_t len, unsigned int flags); | ||
4631 | |||
4632 | static int snapshot_raw_open(struct inode *inode, struct file *filp) | ||
4633 | { | ||
4634 | struct ftrace_buffer_info *info; | ||
4635 | int ret; | ||
4636 | |||
4637 | ret = tracing_buffers_open(inode, filp); | ||
4638 | if (ret < 0) | ||
4639 | return ret; | ||
4640 | |||
4641 | info = filp->private_data; | ||
4642 | |||
4643 | if (info->iter.trace->use_max_tr) { | ||
4644 | tracing_buffers_release(inode, filp); | ||
4645 | return -EBUSY; | ||
4646 | } | ||
4647 | |||
4648 | info->iter.snapshot = true; | ||
4649 | info->iter.trace_buffer = &info->iter.tr->max_buffer; | ||
4650 | |||
4651 | return ret; | ||
4652 | } | ||
4653 | |||
4213 | #endif /* CONFIG_TRACER_SNAPSHOT */ | 4654 | #endif /* CONFIG_TRACER_SNAPSHOT */ |
4214 | 4655 | ||
4215 | 4656 | ||
@@ -4237,10 +4678,9 @@ static const struct file_operations tracing_pipe_fops = { | |||
4237 | }; | 4678 | }; |
4238 | 4679 | ||
4239 | static const struct file_operations tracing_entries_fops = { | 4680 | static const struct file_operations tracing_entries_fops = { |
4240 | .open = tracing_entries_open, | 4681 | .open = tracing_open_generic, |
4241 | .read = tracing_entries_read, | 4682 | .read = tracing_entries_read, |
4242 | .write = tracing_entries_write, | 4683 | .write = tracing_entries_write, |
4243 | .release = tracing_entries_release, | ||
4244 | .llseek = generic_file_llseek, | 4684 | .llseek = generic_file_llseek, |
4245 | }; | 4685 | }; |
4246 | 4686 | ||
@@ -4275,20 +4715,23 @@ static const struct file_operations snapshot_fops = { | |||
4275 | .read = seq_read, | 4715 | .read = seq_read, |
4276 | .write = tracing_snapshot_write, | 4716 | .write = tracing_snapshot_write, |
4277 | .llseek = tracing_seek, | 4717 | .llseek = tracing_seek, |
4278 | .release = tracing_release, | 4718 | .release = tracing_snapshot_release, |
4279 | }; | 4719 | }; |
4280 | #endif /* CONFIG_TRACER_SNAPSHOT */ | ||
4281 | 4720 | ||
4282 | struct ftrace_buffer_info { | 4721 | static const struct file_operations snapshot_raw_fops = { |
4283 | struct trace_array *tr; | 4722 | .open = snapshot_raw_open, |
4284 | void *spare; | 4723 | .read = tracing_buffers_read, |
4285 | int cpu; | 4724 | .release = tracing_buffers_release, |
4286 | unsigned int read; | 4725 | .splice_read = tracing_buffers_splice_read, |
4726 | .llseek = no_llseek, | ||
4287 | }; | 4727 | }; |
4288 | 4728 | ||
4729 | #endif /* CONFIG_TRACER_SNAPSHOT */ | ||
4730 | |||
4289 | static int tracing_buffers_open(struct inode *inode, struct file *filp) | 4731 | static int tracing_buffers_open(struct inode *inode, struct file *filp) |
4290 | { | 4732 | { |
4291 | int cpu = (int)(long)inode->i_private; | 4733 | struct trace_cpu *tc = inode->i_private; |
4734 | struct trace_array *tr = tc->tr; | ||
4292 | struct ftrace_buffer_info *info; | 4735 | struct ftrace_buffer_info *info; |
4293 | 4736 | ||
4294 | if (tracing_disabled) | 4737 | if (tracing_disabled) |
@@ -4298,72 +4741,131 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) | |||
4298 | if (!info) | 4741 | if (!info) |
4299 | return -ENOMEM; | 4742 | return -ENOMEM; |
4300 | 4743 | ||
4301 | info->tr = &global_trace; | 4744 | mutex_lock(&trace_types_lock); |
4302 | info->cpu = cpu; | 4745 | |
4303 | info->spare = NULL; | 4746 | tr->ref++; |
4747 | |||
4748 | info->iter.tr = tr; | ||
4749 | info->iter.cpu_file = tc->cpu; | ||
4750 | info->iter.trace = tr->current_trace; | ||
4751 | info->iter.trace_buffer = &tr->trace_buffer; | ||
4752 | info->spare = NULL; | ||
4304 | /* Force reading ring buffer for first read */ | 4753 | /* Force reading ring buffer for first read */ |
4305 | info->read = (unsigned int)-1; | 4754 | info->read = (unsigned int)-1; |
4306 | 4755 | ||
4307 | filp->private_data = info; | 4756 | filp->private_data = info; |
4308 | 4757 | ||
4758 | mutex_unlock(&trace_types_lock); | ||
4759 | |||
4309 | return nonseekable_open(inode, filp); | 4760 | return nonseekable_open(inode, filp); |
4310 | } | 4761 | } |
4311 | 4762 | ||
4763 | static unsigned int | ||
4764 | tracing_buffers_poll(struct file *filp, poll_table *poll_table) | ||
4765 | { | ||
4766 | struct ftrace_buffer_info *info = filp->private_data; | ||
4767 | struct trace_iterator *iter = &info->iter; | ||
4768 | |||
4769 | return trace_poll(iter, filp, poll_table); | ||
4770 | } | ||
4771 | |||
4312 | static ssize_t | 4772 | static ssize_t |
4313 | tracing_buffers_read(struct file *filp, char __user *ubuf, | 4773 | tracing_buffers_read(struct file *filp, char __user *ubuf, |
4314 | size_t count, loff_t *ppos) | 4774 | size_t count, loff_t *ppos) |
4315 | { | 4775 | { |
4316 | struct ftrace_buffer_info *info = filp->private_data; | 4776 | struct ftrace_buffer_info *info = filp->private_data; |
4777 | struct trace_iterator *iter = &info->iter; | ||
4317 | ssize_t ret; | 4778 | ssize_t ret; |
4318 | size_t size; | 4779 | ssize_t size; |
4319 | 4780 | ||
4320 | if (!count) | 4781 | if (!count) |
4321 | return 0; | 4782 | return 0; |
4322 | 4783 | ||
4784 | mutex_lock(&trace_types_lock); | ||
4785 | |||
4786 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
4787 | if (iter->snapshot && iter->tr->current_trace->use_max_tr) { | ||
4788 | size = -EBUSY; | ||
4789 | goto out_unlock; | ||
4790 | } | ||
4791 | #endif | ||
4792 | |||
4323 | if (!info->spare) | 4793 | if (!info->spare) |
4324 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu); | 4794 | info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, |
4795 | iter->cpu_file); | ||
4796 | size = -ENOMEM; | ||
4325 | if (!info->spare) | 4797 | if (!info->spare) |
4326 | return -ENOMEM; | 4798 | goto out_unlock; |
4327 | 4799 | ||
4328 | /* Do we have previous read data to read? */ | 4800 | /* Do we have previous read data to read? */ |
4329 | if (info->read < PAGE_SIZE) | 4801 | if (info->read < PAGE_SIZE) |
4330 | goto read; | 4802 | goto read; |
4331 | 4803 | ||
4332 | trace_access_lock(info->cpu); | 4804 | again: |
4333 | ret = ring_buffer_read_page(info->tr->buffer, | 4805 | trace_access_lock(iter->cpu_file); |
4806 | ret = ring_buffer_read_page(iter->trace_buffer->buffer, | ||
4334 | &info->spare, | 4807 | &info->spare, |
4335 | count, | 4808 | count, |
4336 | info->cpu, 0); | 4809 | iter->cpu_file, 0); |
4337 | trace_access_unlock(info->cpu); | 4810 | trace_access_unlock(iter->cpu_file); |
4338 | if (ret < 0) | ||
4339 | return 0; | ||
4340 | 4811 | ||
4341 | info->read = 0; | 4812 | if (ret < 0) { |
4813 | if (trace_empty(iter)) { | ||
4814 | if ((filp->f_flags & O_NONBLOCK)) { | ||
4815 | size = -EAGAIN; | ||
4816 | goto out_unlock; | ||
4817 | } | ||
4818 | mutex_unlock(&trace_types_lock); | ||
4819 | iter->trace->wait_pipe(iter); | ||
4820 | mutex_lock(&trace_types_lock); | ||
4821 | if (signal_pending(current)) { | ||
4822 | size = -EINTR; | ||
4823 | goto out_unlock; | ||
4824 | } | ||
4825 | goto again; | ||
4826 | } | ||
4827 | size = 0; | ||
4828 | goto out_unlock; | ||
4829 | } | ||
4342 | 4830 | ||
4343 | read: | 4831 | info->read = 0; |
4832 | read: | ||
4344 | size = PAGE_SIZE - info->read; | 4833 | size = PAGE_SIZE - info->read; |
4345 | if (size > count) | 4834 | if (size > count) |
4346 | size = count; | 4835 | size = count; |
4347 | 4836 | ||
4348 | ret = copy_to_user(ubuf, info->spare + info->read, size); | 4837 | ret = copy_to_user(ubuf, info->spare + info->read, size); |
4349 | if (ret == size) | 4838 | if (ret == size) { |
4350 | return -EFAULT; | 4839 | size = -EFAULT; |
4840 | goto out_unlock; | ||
4841 | } | ||
4351 | size -= ret; | 4842 | size -= ret; |
4352 | 4843 | ||
4353 | *ppos += size; | 4844 | *ppos += size; |
4354 | info->read += size; | 4845 | info->read += size; |
4355 | 4846 | ||
4847 | out_unlock: | ||
4848 | mutex_unlock(&trace_types_lock); | ||
4849 | |||
4356 | return size; | 4850 | return size; |
4357 | } | 4851 | } |
4358 | 4852 | ||
4359 | static int tracing_buffers_release(struct inode *inode, struct file *file) | 4853 | static int tracing_buffers_release(struct inode *inode, struct file *file) |
4360 | { | 4854 | { |
4361 | struct ftrace_buffer_info *info = file->private_data; | 4855 | struct ftrace_buffer_info *info = file->private_data; |
4856 | struct trace_iterator *iter = &info->iter; | ||
4857 | |||
4858 | mutex_lock(&trace_types_lock); | ||
4859 | |||
4860 | WARN_ON(!iter->tr->ref); | ||
4861 | iter->tr->ref--; | ||
4362 | 4862 | ||
4363 | if (info->spare) | 4863 | if (info->spare) |
4364 | ring_buffer_free_read_page(info->tr->buffer, info->spare); | 4864 | ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); |
4365 | kfree(info); | 4865 | kfree(info); |
4366 | 4866 | ||
4867 | mutex_unlock(&trace_types_lock); | ||
4868 | |||
4367 | return 0; | 4869 | return 0; |
4368 | } | 4870 | } |
4369 | 4871 | ||
@@ -4428,6 +4930,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
4428 | unsigned int flags) | 4930 | unsigned int flags) |
4429 | { | 4931 | { |
4430 | struct ftrace_buffer_info *info = file->private_data; | 4932 | struct ftrace_buffer_info *info = file->private_data; |
4933 | struct trace_iterator *iter = &info->iter; | ||
4431 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; | 4934 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; |
4432 | struct page *pages_def[PIPE_DEF_BUFFERS]; | 4935 | struct page *pages_def[PIPE_DEF_BUFFERS]; |
4433 | struct splice_pipe_desc spd = { | 4936 | struct splice_pipe_desc spd = { |
@@ -4440,10 +4943,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
4440 | }; | 4943 | }; |
4441 | struct buffer_ref *ref; | 4944 | struct buffer_ref *ref; |
4442 | int entries, size, i; | 4945 | int entries, size, i; |
4443 | size_t ret; | 4946 | ssize_t ret; |
4444 | 4947 | ||
4445 | if (splice_grow_spd(pipe, &spd)) | 4948 | mutex_lock(&trace_types_lock); |
4446 | return -ENOMEM; | 4949 | |
4950 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
4951 | if (iter->snapshot && iter->tr->current_trace->use_max_tr) { | ||
4952 | ret = -EBUSY; | ||
4953 | goto out; | ||
4954 | } | ||
4955 | #endif | ||
4956 | |||
4957 | if (splice_grow_spd(pipe, &spd)) { | ||
4958 | ret = -ENOMEM; | ||
4959 | goto out; | ||
4960 | } | ||
4447 | 4961 | ||
4448 | if (*ppos & (PAGE_SIZE - 1)) { | 4962 | if (*ppos & (PAGE_SIZE - 1)) { |
4449 | ret = -EINVAL; | 4963 | ret = -EINVAL; |
@@ -4458,8 +4972,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
4458 | len &= PAGE_MASK; | 4972 | len &= PAGE_MASK; |
4459 | } | 4973 | } |
4460 | 4974 | ||
4461 | trace_access_lock(info->cpu); | 4975 | again: |
4462 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 4976 | trace_access_lock(iter->cpu_file); |
4977 | entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); | ||
4463 | 4978 | ||
4464 | for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { | 4979 | for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { |
4465 | struct page *page; | 4980 | struct page *page; |
@@ -4470,15 +4985,15 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
4470 | break; | 4985 | break; |
4471 | 4986 | ||
4472 | ref->ref = 1; | 4987 | ref->ref = 1; |
4473 | ref->buffer = info->tr->buffer; | 4988 | ref->buffer = iter->trace_buffer->buffer; |
4474 | ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu); | 4989 | ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); |
4475 | if (!ref->page) { | 4990 | if (!ref->page) { |
4476 | kfree(ref); | 4991 | kfree(ref); |
4477 | break; | 4992 | break; |
4478 | } | 4993 | } |
4479 | 4994 | ||
4480 | r = ring_buffer_read_page(ref->buffer, &ref->page, | 4995 | r = ring_buffer_read_page(ref->buffer, &ref->page, |
4481 | len, info->cpu, 1); | 4996 | len, iter->cpu_file, 1); |
4482 | if (r < 0) { | 4997 | if (r < 0) { |
4483 | ring_buffer_free_read_page(ref->buffer, ref->page); | 4998 | ring_buffer_free_read_page(ref->buffer, ref->page); |
4484 | kfree(ref); | 4999 | kfree(ref); |
@@ -4502,31 +5017,40 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
4502 | spd.nr_pages++; | 5017 | spd.nr_pages++; |
4503 | *ppos += PAGE_SIZE; | 5018 | *ppos += PAGE_SIZE; |
4504 | 5019 | ||
4505 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 5020 | entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); |
4506 | } | 5021 | } |
4507 | 5022 | ||
4508 | trace_access_unlock(info->cpu); | 5023 | trace_access_unlock(iter->cpu_file); |
4509 | spd.nr_pages = i; | 5024 | spd.nr_pages = i; |
4510 | 5025 | ||
4511 | /* did we read anything? */ | 5026 | /* did we read anything? */ |
4512 | if (!spd.nr_pages) { | 5027 | if (!spd.nr_pages) { |
4513 | if (flags & SPLICE_F_NONBLOCK) | 5028 | if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) { |
4514 | ret = -EAGAIN; | 5029 | ret = -EAGAIN; |
4515 | else | 5030 | goto out; |
4516 | ret = 0; | 5031 | } |
4517 | /* TODO: block */ | 5032 | mutex_unlock(&trace_types_lock); |
4518 | goto out; | 5033 | iter->trace->wait_pipe(iter); |
5034 | mutex_lock(&trace_types_lock); | ||
5035 | if (signal_pending(current)) { | ||
5036 | ret = -EINTR; | ||
5037 | goto out; | ||
5038 | } | ||
5039 | goto again; | ||
4519 | } | 5040 | } |
4520 | 5041 | ||
4521 | ret = splice_to_pipe(pipe, &spd); | 5042 | ret = splice_to_pipe(pipe, &spd); |
4522 | splice_shrink_spd(&spd); | 5043 | splice_shrink_spd(&spd); |
4523 | out: | 5044 | out: |
5045 | mutex_unlock(&trace_types_lock); | ||
5046 | |||
4524 | return ret; | 5047 | return ret; |
4525 | } | 5048 | } |
4526 | 5049 | ||
4527 | static const struct file_operations tracing_buffers_fops = { | 5050 | static const struct file_operations tracing_buffers_fops = { |
4528 | .open = tracing_buffers_open, | 5051 | .open = tracing_buffers_open, |
4529 | .read = tracing_buffers_read, | 5052 | .read = tracing_buffers_read, |
5053 | .poll = tracing_buffers_poll, | ||
4530 | .release = tracing_buffers_release, | 5054 | .release = tracing_buffers_release, |
4531 | .splice_read = tracing_buffers_splice_read, | 5055 | .splice_read = tracing_buffers_splice_read, |
4532 | .llseek = no_llseek, | 5056 | .llseek = no_llseek, |
@@ -4536,12 +5060,14 @@ static ssize_t | |||
4536 | tracing_stats_read(struct file *filp, char __user *ubuf, | 5060 | tracing_stats_read(struct file *filp, char __user *ubuf, |
4537 | size_t count, loff_t *ppos) | 5061 | size_t count, loff_t *ppos) |
4538 | { | 5062 | { |
4539 | unsigned long cpu = (unsigned long)filp->private_data; | 5063 | struct trace_cpu *tc = filp->private_data; |
4540 | struct trace_array *tr = &global_trace; | 5064 | struct trace_array *tr = tc->tr; |
5065 | struct trace_buffer *trace_buf = &tr->trace_buffer; | ||
4541 | struct trace_seq *s; | 5066 | struct trace_seq *s; |
4542 | unsigned long cnt; | 5067 | unsigned long cnt; |
4543 | unsigned long long t; | 5068 | unsigned long long t; |
4544 | unsigned long usec_rem; | 5069 | unsigned long usec_rem; |
5070 | int cpu = tc->cpu; | ||
4545 | 5071 | ||
4546 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 5072 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
4547 | if (!s) | 5073 | if (!s) |
@@ -4549,41 +5075,41 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
4549 | 5075 | ||
4550 | trace_seq_init(s); | 5076 | trace_seq_init(s); |
4551 | 5077 | ||
4552 | cnt = ring_buffer_entries_cpu(tr->buffer, cpu); | 5078 | cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); |
4553 | trace_seq_printf(s, "entries: %ld\n", cnt); | 5079 | trace_seq_printf(s, "entries: %ld\n", cnt); |
4554 | 5080 | ||
4555 | cnt = ring_buffer_overrun_cpu(tr->buffer, cpu); | 5081 | cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); |
4556 | trace_seq_printf(s, "overrun: %ld\n", cnt); | 5082 | trace_seq_printf(s, "overrun: %ld\n", cnt); |
4557 | 5083 | ||
4558 | cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); | 5084 | cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); |
4559 | trace_seq_printf(s, "commit overrun: %ld\n", cnt); | 5085 | trace_seq_printf(s, "commit overrun: %ld\n", cnt); |
4560 | 5086 | ||
4561 | cnt = ring_buffer_bytes_cpu(tr->buffer, cpu); | 5087 | cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); |
4562 | trace_seq_printf(s, "bytes: %ld\n", cnt); | 5088 | trace_seq_printf(s, "bytes: %ld\n", cnt); |
4563 | 5089 | ||
4564 | if (trace_clocks[trace_clock_id].in_ns) { | 5090 | if (trace_clocks[trace_clock_id].in_ns) { |
4565 | /* local or global for trace_clock */ | 5091 | /* local or global for trace_clock */ |
4566 | t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); | 5092 | t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); |
4567 | usec_rem = do_div(t, USEC_PER_SEC); | 5093 | usec_rem = do_div(t, USEC_PER_SEC); |
4568 | trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", | 5094 | trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", |
4569 | t, usec_rem); | 5095 | t, usec_rem); |
4570 | 5096 | ||
4571 | t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); | 5097 | t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu)); |
4572 | usec_rem = do_div(t, USEC_PER_SEC); | 5098 | usec_rem = do_div(t, USEC_PER_SEC); |
4573 | trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); | 5099 | trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); |
4574 | } else { | 5100 | } else { |
4575 | /* counter or tsc mode for trace_clock */ | 5101 | /* counter or tsc mode for trace_clock */ |
4576 | trace_seq_printf(s, "oldest event ts: %llu\n", | 5102 | trace_seq_printf(s, "oldest event ts: %llu\n", |
4577 | ring_buffer_oldest_event_ts(tr->buffer, cpu)); | 5103 | ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); |
4578 | 5104 | ||
4579 | trace_seq_printf(s, "now ts: %llu\n", | 5105 | trace_seq_printf(s, "now ts: %llu\n", |
4580 | ring_buffer_time_stamp(tr->buffer, cpu)); | 5106 | ring_buffer_time_stamp(trace_buf->buffer, cpu)); |
4581 | } | 5107 | } |
4582 | 5108 | ||
4583 | cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); | 5109 | cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); |
4584 | trace_seq_printf(s, "dropped events: %ld\n", cnt); | 5110 | trace_seq_printf(s, "dropped events: %ld\n", cnt); |
4585 | 5111 | ||
4586 | cnt = ring_buffer_read_events_cpu(tr->buffer, cpu); | 5112 | cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); |
4587 | trace_seq_printf(s, "read events: %ld\n", cnt); | 5113 | trace_seq_printf(s, "read events: %ld\n", cnt); |
4588 | 5114 | ||
4589 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | 5115 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); |
@@ -4635,60 +5161,161 @@ static const struct file_operations tracing_dyn_info_fops = { | |||
4635 | .read = tracing_read_dyn_info, | 5161 | .read = tracing_read_dyn_info, |
4636 | .llseek = generic_file_llseek, | 5162 | .llseek = generic_file_llseek, |
4637 | }; | 5163 | }; |
4638 | #endif | 5164 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
4639 | 5165 | ||
4640 | static struct dentry *d_tracer; | 5166 | #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) |
5167 | static void | ||
5168 | ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data) | ||
5169 | { | ||
5170 | tracing_snapshot(); | ||
5171 | } | ||
4641 | 5172 | ||
4642 | struct dentry *tracing_init_dentry(void) | 5173 | static void |
5174 | ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data) | ||
5175 | { | ||
5176 | unsigned long *count = (long *)data; | ||
5177 | |||
5178 | if (!*count) | ||
5179 | return; | ||
5180 | |||
5181 | if (*count != -1) | ||
5182 | (*count)--; | ||
5183 | |||
5184 | tracing_snapshot(); | ||
5185 | } | ||
5186 | |||
5187 | static int | ||
5188 | ftrace_snapshot_print(struct seq_file *m, unsigned long ip, | ||
5189 | struct ftrace_probe_ops *ops, void *data) | ||
5190 | { | ||
5191 | long count = (long)data; | ||
5192 | |||
5193 | seq_printf(m, "%ps:", (void *)ip); | ||
5194 | |||
5195 | seq_printf(m, "snapshot"); | ||
5196 | |||
5197 | if (count == -1) | ||
5198 | seq_printf(m, ":unlimited\n"); | ||
5199 | else | ||
5200 | seq_printf(m, ":count=%ld\n", count); | ||
5201 | |||
5202 | return 0; | ||
5203 | } | ||
5204 | |||
5205 | static struct ftrace_probe_ops snapshot_probe_ops = { | ||
5206 | .func = ftrace_snapshot, | ||
5207 | .print = ftrace_snapshot_print, | ||
5208 | }; | ||
5209 | |||
5210 | static struct ftrace_probe_ops snapshot_count_probe_ops = { | ||
5211 | .func = ftrace_count_snapshot, | ||
5212 | .print = ftrace_snapshot_print, | ||
5213 | }; | ||
5214 | |||
5215 | static int | ||
5216 | ftrace_trace_snapshot_callback(struct ftrace_hash *hash, | ||
5217 | char *glob, char *cmd, char *param, int enable) | ||
4643 | { | 5218 | { |
4644 | static int once; | 5219 | struct ftrace_probe_ops *ops; |
5220 | void *count = (void *)-1; | ||
5221 | char *number; | ||
5222 | int ret; | ||
4645 | 5223 | ||
4646 | if (d_tracer) | 5224 | /* hash funcs only work with set_ftrace_filter */ |
4647 | return d_tracer; | 5225 | if (!enable) |
5226 | return -EINVAL; | ||
5227 | |||
5228 | ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; | ||
5229 | |||
5230 | if (glob[0] == '!') { | ||
5231 | unregister_ftrace_function_probe_func(glob+1, ops); | ||
5232 | return 0; | ||
5233 | } | ||
5234 | |||
5235 | if (!param) | ||
5236 | goto out_reg; | ||
5237 | |||
5238 | number = strsep(¶m, ":"); | ||
5239 | |||
5240 | if (!strlen(number)) | ||
5241 | goto out_reg; | ||
5242 | |||
5243 | /* | ||
5244 | * We use the callback data field (which is a pointer) | ||
5245 | * as our counter. | ||
5246 | */ | ||
5247 | ret = kstrtoul(number, 0, (unsigned long *)&count); | ||
5248 | if (ret) | ||
5249 | return ret; | ||
5250 | |||
5251 | out_reg: | ||
5252 | ret = register_ftrace_function_probe(glob, ops, count); | ||
5253 | |||
5254 | if (ret >= 0) | ||
5255 | alloc_snapshot(&global_trace); | ||
5256 | |||
5257 | return ret < 0 ? ret : 0; | ||
5258 | } | ||
5259 | |||
5260 | static struct ftrace_func_command ftrace_snapshot_cmd = { | ||
5261 | .name = "snapshot", | ||
5262 | .func = ftrace_trace_snapshot_callback, | ||
5263 | }; | ||
5264 | |||
5265 | static int register_snapshot_cmd(void) | ||
5266 | { | ||
5267 | return register_ftrace_command(&ftrace_snapshot_cmd); | ||
5268 | } | ||
5269 | #else | ||
5270 | static inline int register_snapshot_cmd(void) { return 0; } | ||
5271 | #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ | ||
5272 | |||
5273 | struct dentry *tracing_init_dentry_tr(struct trace_array *tr) | ||
5274 | { | ||
5275 | if (tr->dir) | ||
5276 | return tr->dir; | ||
4648 | 5277 | ||
4649 | if (!debugfs_initialized()) | 5278 | if (!debugfs_initialized()) |
4650 | return NULL; | 5279 | return NULL; |
4651 | 5280 | ||
4652 | d_tracer = debugfs_create_dir("tracing", NULL); | 5281 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL) |
5282 | tr->dir = debugfs_create_dir("tracing", NULL); | ||
4653 | 5283 | ||
4654 | if (!d_tracer && !once) { | 5284 | if (!tr->dir) |
4655 | once = 1; | 5285 | pr_warn_once("Could not create debugfs directory 'tracing'\n"); |
4656 | pr_warning("Could not create debugfs directory 'tracing'\n"); | ||
4657 | return NULL; | ||
4658 | } | ||
4659 | 5286 | ||
4660 | return d_tracer; | 5287 | return tr->dir; |
4661 | } | 5288 | } |
4662 | 5289 | ||
4663 | static struct dentry *d_percpu; | 5290 | struct dentry *tracing_init_dentry(void) |
5291 | { | ||
5292 | return tracing_init_dentry_tr(&global_trace); | ||
5293 | } | ||
4664 | 5294 | ||
4665 | static struct dentry *tracing_dentry_percpu(void) | 5295 | static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) |
4666 | { | 5296 | { |
4667 | static int once; | ||
4668 | struct dentry *d_tracer; | 5297 | struct dentry *d_tracer; |
4669 | 5298 | ||
4670 | if (d_percpu) | 5299 | if (tr->percpu_dir) |
4671 | return d_percpu; | 5300 | return tr->percpu_dir; |
4672 | |||
4673 | d_tracer = tracing_init_dentry(); | ||
4674 | 5301 | ||
5302 | d_tracer = tracing_init_dentry_tr(tr); | ||
4675 | if (!d_tracer) | 5303 | if (!d_tracer) |
4676 | return NULL; | 5304 | return NULL; |
4677 | 5305 | ||
4678 | d_percpu = debugfs_create_dir("per_cpu", d_tracer); | 5306 | tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer); |
4679 | 5307 | ||
4680 | if (!d_percpu && !once) { | 5308 | WARN_ONCE(!tr->percpu_dir, |
4681 | once = 1; | 5309 | "Could not create debugfs directory 'per_cpu/%d'\n", cpu); |
4682 | pr_warning("Could not create debugfs directory 'per_cpu'\n"); | ||
4683 | return NULL; | ||
4684 | } | ||
4685 | 5310 | ||
4686 | return d_percpu; | 5311 | return tr->percpu_dir; |
4687 | } | 5312 | } |
4688 | 5313 | ||
4689 | static void tracing_init_debugfs_percpu(long cpu) | 5314 | static void |
5315 | tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) | ||
4690 | { | 5316 | { |
4691 | struct dentry *d_percpu = tracing_dentry_percpu(); | 5317 | struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
5318 | struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); | ||
4692 | struct dentry *d_cpu; | 5319 | struct dentry *d_cpu; |
4693 | char cpu_dir[30]; /* 30 characters should be more than enough */ | 5320 | char cpu_dir[30]; /* 30 characters should be more than enough */ |
4694 | 5321 | ||
@@ -4704,20 +5331,28 @@ static void tracing_init_debugfs_percpu(long cpu) | |||
4704 | 5331 | ||
4705 | /* per cpu trace_pipe */ | 5332 | /* per cpu trace_pipe */ |
4706 | trace_create_file("trace_pipe", 0444, d_cpu, | 5333 | trace_create_file("trace_pipe", 0444, d_cpu, |
4707 | (void *) cpu, &tracing_pipe_fops); | 5334 | (void *)&data->trace_cpu, &tracing_pipe_fops); |
4708 | 5335 | ||
4709 | /* per cpu trace */ | 5336 | /* per cpu trace */ |
4710 | trace_create_file("trace", 0644, d_cpu, | 5337 | trace_create_file("trace", 0644, d_cpu, |
4711 | (void *) cpu, &tracing_fops); | 5338 | (void *)&data->trace_cpu, &tracing_fops); |
4712 | 5339 | ||
4713 | trace_create_file("trace_pipe_raw", 0444, d_cpu, | 5340 | trace_create_file("trace_pipe_raw", 0444, d_cpu, |
4714 | (void *) cpu, &tracing_buffers_fops); | 5341 | (void *)&data->trace_cpu, &tracing_buffers_fops); |
4715 | 5342 | ||
4716 | trace_create_file("stats", 0444, d_cpu, | 5343 | trace_create_file("stats", 0444, d_cpu, |
4717 | (void *) cpu, &tracing_stats_fops); | 5344 | (void *)&data->trace_cpu, &tracing_stats_fops); |
4718 | 5345 | ||
4719 | trace_create_file("buffer_size_kb", 0444, d_cpu, | 5346 | trace_create_file("buffer_size_kb", 0444, d_cpu, |
4720 | (void *) cpu, &tracing_entries_fops); | 5347 | (void *)&data->trace_cpu, &tracing_entries_fops); |
5348 | |||
5349 | #ifdef CONFIG_TRACER_SNAPSHOT | ||
5350 | trace_create_file("snapshot", 0644, d_cpu, | ||
5351 | (void *)&data->trace_cpu, &snapshot_fops); | ||
5352 | |||
5353 | trace_create_file("snapshot_raw", 0444, d_cpu, | ||
5354 | (void *)&data->trace_cpu, &snapshot_raw_fops); | ||
5355 | #endif | ||
4721 | } | 5356 | } |
4722 | 5357 | ||
4723 | #ifdef CONFIG_FTRACE_SELFTEST | 5358 | #ifdef CONFIG_FTRACE_SELFTEST |
@@ -4728,6 +5363,7 @@ static void tracing_init_debugfs_percpu(long cpu) | |||
4728 | struct trace_option_dentry { | 5363 | struct trace_option_dentry { |
4729 | struct tracer_opt *opt; | 5364 | struct tracer_opt *opt; |
4730 | struct tracer_flags *flags; | 5365 | struct tracer_flags *flags; |
5366 | struct trace_array *tr; | ||
4731 | struct dentry *entry; | 5367 | struct dentry *entry; |
4732 | }; | 5368 | }; |
4733 | 5369 | ||
@@ -4763,7 +5399,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
4763 | 5399 | ||
4764 | if (!!(topt->flags->val & topt->opt->bit) != val) { | 5400 | if (!!(topt->flags->val & topt->opt->bit) != val) { |
4765 | mutex_lock(&trace_types_lock); | 5401 | mutex_lock(&trace_types_lock); |
4766 | ret = __set_tracer_option(current_trace, topt->flags, | 5402 | ret = __set_tracer_option(topt->tr->current_trace, topt->flags, |
4767 | topt->opt, !val); | 5403 | topt->opt, !val); |
4768 | mutex_unlock(&trace_types_lock); | 5404 | mutex_unlock(&trace_types_lock); |
4769 | if (ret) | 5405 | if (ret) |
@@ -4802,6 +5438,7 @@ static ssize_t | |||
4802 | trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, | 5438 | trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, |
4803 | loff_t *ppos) | 5439 | loff_t *ppos) |
4804 | { | 5440 | { |
5441 | struct trace_array *tr = &global_trace; | ||
4805 | long index = (long)filp->private_data; | 5442 | long index = (long)filp->private_data; |
4806 | unsigned long val; | 5443 | unsigned long val; |
4807 | int ret; | 5444 | int ret; |
@@ -4814,7 +5451,7 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
4814 | return -EINVAL; | 5451 | return -EINVAL; |
4815 | 5452 | ||
4816 | mutex_lock(&trace_types_lock); | 5453 | mutex_lock(&trace_types_lock); |
4817 | ret = set_tracer_flag(1 << index, val); | 5454 | ret = set_tracer_flag(tr, 1 << index, val); |
4818 | mutex_unlock(&trace_types_lock); | 5455 | mutex_unlock(&trace_types_lock); |
4819 | 5456 | ||
4820 | if (ret < 0) | 5457 | if (ret < 0) |
@@ -4848,40 +5485,41 @@ struct dentry *trace_create_file(const char *name, | |||
4848 | } | 5485 | } |
4849 | 5486 | ||
4850 | 5487 | ||
4851 | static struct dentry *trace_options_init_dentry(void) | 5488 | static struct dentry *trace_options_init_dentry(struct trace_array *tr) |
4852 | { | 5489 | { |
4853 | struct dentry *d_tracer; | 5490 | struct dentry *d_tracer; |
4854 | static struct dentry *t_options; | ||
4855 | 5491 | ||
4856 | if (t_options) | 5492 | if (tr->options) |
4857 | return t_options; | 5493 | return tr->options; |
4858 | 5494 | ||
4859 | d_tracer = tracing_init_dentry(); | 5495 | d_tracer = tracing_init_dentry_tr(tr); |
4860 | if (!d_tracer) | 5496 | if (!d_tracer) |
4861 | return NULL; | 5497 | return NULL; |
4862 | 5498 | ||
4863 | t_options = debugfs_create_dir("options", d_tracer); | 5499 | tr->options = debugfs_create_dir("options", d_tracer); |
4864 | if (!t_options) { | 5500 | if (!tr->options) { |
4865 | pr_warning("Could not create debugfs directory 'options'\n"); | 5501 | pr_warning("Could not create debugfs directory 'options'\n"); |
4866 | return NULL; | 5502 | return NULL; |
4867 | } | 5503 | } |
4868 | 5504 | ||
4869 | return t_options; | 5505 | return tr->options; |
4870 | } | 5506 | } |
4871 | 5507 | ||
4872 | static void | 5508 | static void |
4873 | create_trace_option_file(struct trace_option_dentry *topt, | 5509 | create_trace_option_file(struct trace_array *tr, |
5510 | struct trace_option_dentry *topt, | ||
4874 | struct tracer_flags *flags, | 5511 | struct tracer_flags *flags, |
4875 | struct tracer_opt *opt) | 5512 | struct tracer_opt *opt) |
4876 | { | 5513 | { |
4877 | struct dentry *t_options; | 5514 | struct dentry *t_options; |
4878 | 5515 | ||
4879 | t_options = trace_options_init_dentry(); | 5516 | t_options = trace_options_init_dentry(tr); |
4880 | if (!t_options) | 5517 | if (!t_options) |
4881 | return; | 5518 | return; |
4882 | 5519 | ||
4883 | topt->flags = flags; | 5520 | topt->flags = flags; |
4884 | topt->opt = opt; | 5521 | topt->opt = opt; |
5522 | topt->tr = tr; | ||
4885 | 5523 | ||
4886 | topt->entry = trace_create_file(opt->name, 0644, t_options, topt, | 5524 | topt->entry = trace_create_file(opt->name, 0644, t_options, topt, |
4887 | &trace_options_fops); | 5525 | &trace_options_fops); |
@@ -4889,7 +5527,7 @@ create_trace_option_file(struct trace_option_dentry *topt, | |||
4889 | } | 5527 | } |
4890 | 5528 | ||
4891 | static struct trace_option_dentry * | 5529 | static struct trace_option_dentry * |
4892 | create_trace_option_files(struct tracer *tracer) | 5530 | create_trace_option_files(struct trace_array *tr, struct tracer *tracer) |
4893 | { | 5531 | { |
4894 | struct trace_option_dentry *topts; | 5532 | struct trace_option_dentry *topts; |
4895 | struct tracer_flags *flags; | 5533 | struct tracer_flags *flags; |
@@ -4914,7 +5552,7 @@ create_trace_option_files(struct tracer *tracer) | |||
4914 | return NULL; | 5552 | return NULL; |
4915 | 5553 | ||
4916 | for (cnt = 0; opts[cnt].name; cnt++) | 5554 | for (cnt = 0; opts[cnt].name; cnt++) |
4917 | create_trace_option_file(&topts[cnt], flags, | 5555 | create_trace_option_file(tr, &topts[cnt], flags, |
4918 | &opts[cnt]); | 5556 | &opts[cnt]); |
4919 | 5557 | ||
4920 | return topts; | 5558 | return topts; |
@@ -4937,11 +5575,12 @@ destroy_trace_option_files(struct trace_option_dentry *topts) | |||
4937 | } | 5575 | } |
4938 | 5576 | ||
4939 | static struct dentry * | 5577 | static struct dentry * |
4940 | create_trace_option_core_file(const char *option, long index) | 5578 | create_trace_option_core_file(struct trace_array *tr, |
5579 | const char *option, long index) | ||
4941 | { | 5580 | { |
4942 | struct dentry *t_options; | 5581 | struct dentry *t_options; |
4943 | 5582 | ||
4944 | t_options = trace_options_init_dentry(); | 5583 | t_options = trace_options_init_dentry(tr); |
4945 | if (!t_options) | 5584 | if (!t_options) |
4946 | return NULL; | 5585 | return NULL; |
4947 | 5586 | ||
@@ -4949,17 +5588,17 @@ create_trace_option_core_file(const char *option, long index) | |||
4949 | &trace_options_core_fops); | 5588 | &trace_options_core_fops); |
4950 | } | 5589 | } |
4951 | 5590 | ||
4952 | static __init void create_trace_options_dir(void) | 5591 | static __init void create_trace_options_dir(struct trace_array *tr) |
4953 | { | 5592 | { |
4954 | struct dentry *t_options; | 5593 | struct dentry *t_options; |
4955 | int i; | 5594 | int i; |
4956 | 5595 | ||
4957 | t_options = trace_options_init_dentry(); | 5596 | t_options = trace_options_init_dentry(tr); |
4958 | if (!t_options) | 5597 | if (!t_options) |
4959 | return; | 5598 | return; |
4960 | 5599 | ||
4961 | for (i = 0; trace_options[i]; i++) | 5600 | for (i = 0; trace_options[i]; i++) |
4962 | create_trace_option_core_file(trace_options[i], i); | 5601 | create_trace_option_core_file(tr, trace_options[i], i); |
4963 | } | 5602 | } |
4964 | 5603 | ||
4965 | static ssize_t | 5604 | static ssize_t |
@@ -4967,7 +5606,7 @@ rb_simple_read(struct file *filp, char __user *ubuf, | |||
4967 | size_t cnt, loff_t *ppos) | 5606 | size_t cnt, loff_t *ppos) |
4968 | { | 5607 | { |
4969 | struct trace_array *tr = filp->private_data; | 5608 | struct trace_array *tr = filp->private_data; |
4970 | struct ring_buffer *buffer = tr->buffer; | 5609 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
4971 | char buf[64]; | 5610 | char buf[64]; |
4972 | int r; | 5611 | int r; |
4973 | 5612 | ||
@@ -4986,7 +5625,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
4986 | size_t cnt, loff_t *ppos) | 5625 | size_t cnt, loff_t *ppos) |
4987 | { | 5626 | { |
4988 | struct trace_array *tr = filp->private_data; | 5627 | struct trace_array *tr = filp->private_data; |
4989 | struct ring_buffer *buffer = tr->buffer; | 5628 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
4990 | unsigned long val; | 5629 | unsigned long val; |
4991 | int ret; | 5630 | int ret; |
4992 | 5631 | ||
@@ -4998,12 +5637,12 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
4998 | mutex_lock(&trace_types_lock); | 5637 | mutex_lock(&trace_types_lock); |
4999 | if (val) { | 5638 | if (val) { |
5000 | ring_buffer_record_on(buffer); | 5639 | ring_buffer_record_on(buffer); |
5001 | if (current_trace->start) | 5640 | if (tr->current_trace->start) |
5002 | current_trace->start(tr); | 5641 | tr->current_trace->start(tr); |
5003 | } else { | 5642 | } else { |
5004 | ring_buffer_record_off(buffer); | 5643 | ring_buffer_record_off(buffer); |
5005 | if (current_trace->stop) | 5644 | if (tr->current_trace->stop) |
5006 | current_trace->stop(tr); | 5645 | tr->current_trace->stop(tr); |
5007 | } | 5646 | } |
5008 | mutex_unlock(&trace_types_lock); | 5647 | mutex_unlock(&trace_types_lock); |
5009 | } | 5648 | } |
@@ -5020,23 +5659,310 @@ static const struct file_operations rb_simple_fops = { | |||
5020 | .llseek = default_llseek, | 5659 | .llseek = default_llseek, |
5021 | }; | 5660 | }; |
5022 | 5661 | ||
5662 | struct dentry *trace_instance_dir; | ||
5663 | |||
5664 | static void | ||
5665 | init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer); | ||
5666 | |||
5667 | static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf) | ||
5668 | { | ||
5669 | int cpu; | ||
5670 | |||
5671 | for_each_tracing_cpu(cpu) { | ||
5672 | memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu)); | ||
5673 | per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu; | ||
5674 | per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr; | ||
5675 | } | ||
5676 | } | ||
5677 | |||
5678 | static int | ||
5679 | allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size) | ||
5680 | { | ||
5681 | enum ring_buffer_flags rb_flags; | ||
5682 | |||
5683 | rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; | ||
5684 | |||
5685 | buf->buffer = ring_buffer_alloc(size, rb_flags); | ||
5686 | if (!buf->buffer) | ||
5687 | return -ENOMEM; | ||
5688 | |||
5689 | buf->data = alloc_percpu(struct trace_array_cpu); | ||
5690 | if (!buf->data) { | ||
5691 | ring_buffer_free(buf->buffer); | ||
5692 | return -ENOMEM; | ||
5693 | } | ||
5694 | |||
5695 | init_trace_buffers(tr, buf); | ||
5696 | |||
5697 | /* Allocate the first page for all buffers */ | ||
5698 | set_buffer_entries(&tr->trace_buffer, | ||
5699 | ring_buffer_size(tr->trace_buffer.buffer, 0)); | ||
5700 | |||
5701 | return 0; | ||
5702 | } | ||
5703 | |||
5704 | static int allocate_trace_buffers(struct trace_array *tr, int size) | ||
5705 | { | ||
5706 | int ret; | ||
5707 | |||
5708 | ret = allocate_trace_buffer(tr, &tr->trace_buffer, size); | ||
5709 | if (ret) | ||
5710 | return ret; | ||
5711 | |||
5712 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
5713 | ret = allocate_trace_buffer(tr, &tr->max_buffer, | ||
5714 | allocate_snapshot ? size : 1); | ||
5715 | if (WARN_ON(ret)) { | ||
5716 | ring_buffer_free(tr->trace_buffer.buffer); | ||
5717 | free_percpu(tr->trace_buffer.data); | ||
5718 | return -ENOMEM; | ||
5719 | } | ||
5720 | tr->allocated_snapshot = allocate_snapshot; | ||
5721 | |||
5722 | /* | ||
5723 | * Only the top level trace array gets its snapshot allocated | ||
5724 | * from the kernel command line. | ||
5725 | */ | ||
5726 | allocate_snapshot = false; | ||
5727 | #endif | ||
5728 | return 0; | ||
5729 | } | ||
5730 | |||
5731 | static int new_instance_create(const char *name) | ||
5732 | { | ||
5733 | struct trace_array *tr; | ||
5734 | int ret; | ||
5735 | |||
5736 | mutex_lock(&trace_types_lock); | ||
5737 | |||
5738 | ret = -EEXIST; | ||
5739 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | ||
5740 | if (tr->name && strcmp(tr->name, name) == 0) | ||
5741 | goto out_unlock; | ||
5742 | } | ||
5743 | |||
5744 | ret = -ENOMEM; | ||
5745 | tr = kzalloc(sizeof(*tr), GFP_KERNEL); | ||
5746 | if (!tr) | ||
5747 | goto out_unlock; | ||
5748 | |||
5749 | tr->name = kstrdup(name, GFP_KERNEL); | ||
5750 | if (!tr->name) | ||
5751 | goto out_free_tr; | ||
5752 | |||
5753 | raw_spin_lock_init(&tr->start_lock); | ||
5754 | |||
5755 | tr->current_trace = &nop_trace; | ||
5756 | |||
5757 | INIT_LIST_HEAD(&tr->systems); | ||
5758 | INIT_LIST_HEAD(&tr->events); | ||
5759 | |||
5760 | if (allocate_trace_buffers(tr, trace_buf_size) < 0) | ||
5761 | goto out_free_tr; | ||
5762 | |||
5763 | /* Holder for file callbacks */ | ||
5764 | tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS; | ||
5765 | tr->trace_cpu.tr = tr; | ||
5766 | |||
5767 | tr->dir = debugfs_create_dir(name, trace_instance_dir); | ||
5768 | if (!tr->dir) | ||
5769 | goto out_free_tr; | ||
5770 | |||
5771 | ret = event_trace_add_tracer(tr->dir, tr); | ||
5772 | if (ret) | ||
5773 | goto out_free_tr; | ||
5774 | |||
5775 | init_tracer_debugfs(tr, tr->dir); | ||
5776 | |||
5777 | list_add(&tr->list, &ftrace_trace_arrays); | ||
5778 | |||
5779 | mutex_unlock(&trace_types_lock); | ||
5780 | |||
5781 | return 0; | ||
5782 | |||
5783 | out_free_tr: | ||
5784 | if (tr->trace_buffer.buffer) | ||
5785 | ring_buffer_free(tr->trace_buffer.buffer); | ||
5786 | kfree(tr->name); | ||
5787 | kfree(tr); | ||
5788 | |||
5789 | out_unlock: | ||
5790 | mutex_unlock(&trace_types_lock); | ||
5791 | |||
5792 | return ret; | ||
5793 | |||
5794 | } | ||
5795 | |||
5796 | static int instance_delete(const char *name) | ||
5797 | { | ||
5798 | struct trace_array *tr; | ||
5799 | int found = 0; | ||
5800 | int ret; | ||
5801 | |||
5802 | mutex_lock(&trace_types_lock); | ||
5803 | |||
5804 | ret = -ENODEV; | ||
5805 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | ||
5806 | if (tr->name && strcmp(tr->name, name) == 0) { | ||
5807 | found = 1; | ||
5808 | break; | ||
5809 | } | ||
5810 | } | ||
5811 | if (!found) | ||
5812 | goto out_unlock; | ||
5813 | |||
5814 | ret = -EBUSY; | ||
5815 | if (tr->ref) | ||
5816 | goto out_unlock; | ||
5817 | |||
5818 | list_del(&tr->list); | ||
5819 | |||
5820 | event_trace_del_tracer(tr); | ||
5821 | debugfs_remove_recursive(tr->dir); | ||
5822 | free_percpu(tr->trace_buffer.data); | ||
5823 | ring_buffer_free(tr->trace_buffer.buffer); | ||
5824 | |||
5825 | kfree(tr->name); | ||
5826 | kfree(tr); | ||
5827 | |||
5828 | ret = 0; | ||
5829 | |||
5830 | out_unlock: | ||
5831 | mutex_unlock(&trace_types_lock); | ||
5832 | |||
5833 | return ret; | ||
5834 | } | ||
5835 | |||
5836 | static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode) | ||
5837 | { | ||
5838 | struct dentry *parent; | ||
5839 | int ret; | ||
5840 | |||
5841 | /* Paranoid: Make sure the parent is the "instances" directory */ | ||
5842 | parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); | ||
5843 | if (WARN_ON_ONCE(parent != trace_instance_dir)) | ||
5844 | return -ENOENT; | ||
5845 | |||
5846 | /* | ||
5847 | * The inode mutex is locked, but debugfs_create_dir() will also | ||
5848 | * take the mutex. As the instances directory can not be destroyed | ||
5849 | * or changed in any other way, it is safe to unlock it, and | ||
5850 | * let the dentry try. If two users try to make the same dir at | ||
5851 | * the same time, then the new_instance_create() will determine the | ||
5852 | * winner. | ||
5853 | */ | ||
5854 | mutex_unlock(&inode->i_mutex); | ||
5855 | |||
5856 | ret = new_instance_create(dentry->d_iname); | ||
5857 | |||
5858 | mutex_lock(&inode->i_mutex); | ||
5859 | |||
5860 | return ret; | ||
5861 | } | ||
5862 | |||
5863 | static int instance_rmdir(struct inode *inode, struct dentry *dentry) | ||
5864 | { | ||
5865 | struct dentry *parent; | ||
5866 | int ret; | ||
5867 | |||
5868 | /* Paranoid: Make sure the parent is the "instances" directory */ | ||
5869 | parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); | ||
5870 | if (WARN_ON_ONCE(parent != trace_instance_dir)) | ||
5871 | return -ENOENT; | ||
5872 | |||
5873 | /* The caller did a dget() on dentry */ | ||
5874 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
5875 | |||
5876 | /* | ||
5877 | * The inode mutex is locked, but debugfs_create_dir() will also | ||
5878 | * take the mutex. As the instances directory can not be destroyed | ||
5879 | * or changed in any other way, it is safe to unlock it, and | ||
5880 | * let the dentry try. If two users try to make the same dir at | ||
5881 | * the same time, then the instance_delete() will determine the | ||
5882 | * winner. | ||
5883 | */ | ||
5884 | mutex_unlock(&inode->i_mutex); | ||
5885 | |||
5886 | ret = instance_delete(dentry->d_iname); | ||
5887 | |||
5888 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
5889 | mutex_lock(&dentry->d_inode->i_mutex); | ||
5890 | |||
5891 | return ret; | ||
5892 | } | ||
5893 | |||
5894 | static const struct inode_operations instance_dir_inode_operations = { | ||
5895 | .lookup = simple_lookup, | ||
5896 | .mkdir = instance_mkdir, | ||
5897 | .rmdir = instance_rmdir, | ||
5898 | }; | ||
5899 | |||
5900 | static __init void create_trace_instances(struct dentry *d_tracer) | ||
5901 | { | ||
5902 | trace_instance_dir = debugfs_create_dir("instances", d_tracer); | ||
5903 | if (WARN_ON(!trace_instance_dir)) | ||
5904 | return; | ||
5905 | |||
5906 | /* Hijack the dir inode operations, to allow mkdir */ | ||
5907 | trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations; | ||
5908 | } | ||
5909 | |||
5910 | static void | ||
5911 | init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) | ||
5912 | { | ||
5913 | int cpu; | ||
5914 | |||
5915 | trace_create_file("trace_options", 0644, d_tracer, | ||
5916 | tr, &tracing_iter_fops); | ||
5917 | |||
5918 | trace_create_file("trace", 0644, d_tracer, | ||
5919 | (void *)&tr->trace_cpu, &tracing_fops); | ||
5920 | |||
5921 | trace_create_file("trace_pipe", 0444, d_tracer, | ||
5922 | (void *)&tr->trace_cpu, &tracing_pipe_fops); | ||
5923 | |||
5924 | trace_create_file("buffer_size_kb", 0644, d_tracer, | ||
5925 | (void *)&tr->trace_cpu, &tracing_entries_fops); | ||
5926 | |||
5927 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, | ||
5928 | tr, &tracing_total_entries_fops); | ||
5929 | |||
5930 | trace_create_file("free_buffer", 0644, d_tracer, | ||
5931 | tr, &tracing_free_buffer_fops); | ||
5932 | |||
5933 | trace_create_file("trace_marker", 0220, d_tracer, | ||
5934 | tr, &tracing_mark_fops); | ||
5935 | |||
5936 | trace_create_file("trace_clock", 0644, d_tracer, tr, | ||
5937 | &trace_clock_fops); | ||
5938 | |||
5939 | trace_create_file("tracing_on", 0644, d_tracer, | ||
5940 | tr, &rb_simple_fops); | ||
5941 | |||
5942 | #ifdef CONFIG_TRACER_SNAPSHOT | ||
5943 | trace_create_file("snapshot", 0644, d_tracer, | ||
5944 | (void *)&tr->trace_cpu, &snapshot_fops); | ||
5945 | #endif | ||
5946 | |||
5947 | for_each_tracing_cpu(cpu) | ||
5948 | tracing_init_debugfs_percpu(tr, cpu); | ||
5949 | |||
5950 | } | ||
5951 | |||
5023 | static __init int tracer_init_debugfs(void) | 5952 | static __init int tracer_init_debugfs(void) |
5024 | { | 5953 | { |
5025 | struct dentry *d_tracer; | 5954 | struct dentry *d_tracer; |
5026 | int cpu; | ||
5027 | 5955 | ||
5028 | trace_access_lock_init(); | 5956 | trace_access_lock_init(); |
5029 | 5957 | ||
5030 | d_tracer = tracing_init_dentry(); | 5958 | d_tracer = tracing_init_dentry(); |
5959 | if (!d_tracer) | ||
5960 | return 0; | ||
5031 | 5961 | ||
5032 | trace_create_file("trace_options", 0644, d_tracer, | 5962 | init_tracer_debugfs(&global_trace, d_tracer); |
5033 | NULL, &tracing_iter_fops); | ||
5034 | 5963 | ||
5035 | trace_create_file("tracing_cpumask", 0644, d_tracer, | 5964 | trace_create_file("tracing_cpumask", 0644, d_tracer, |
5036 | NULL, &tracing_cpumask_fops); | 5965 | &global_trace, &tracing_cpumask_fops); |
5037 | |||
5038 | trace_create_file("trace", 0644, d_tracer, | ||
5039 | (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); | ||
5040 | 5966 | ||
5041 | trace_create_file("available_tracers", 0444, d_tracer, | 5967 | trace_create_file("available_tracers", 0444, d_tracer, |
5042 | &global_trace, &show_traces_fops); | 5968 | &global_trace, &show_traces_fops); |
@@ -5055,44 +5981,17 @@ static __init int tracer_init_debugfs(void) | |||
5055 | trace_create_file("README", 0444, d_tracer, | 5981 | trace_create_file("README", 0444, d_tracer, |
5056 | NULL, &tracing_readme_fops); | 5982 | NULL, &tracing_readme_fops); |
5057 | 5983 | ||
5058 | trace_create_file("trace_pipe", 0444, d_tracer, | ||
5059 | (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); | ||
5060 | |||
5061 | trace_create_file("buffer_size_kb", 0644, d_tracer, | ||
5062 | (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops); | ||
5063 | |||
5064 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, | ||
5065 | &global_trace, &tracing_total_entries_fops); | ||
5066 | |||
5067 | trace_create_file("free_buffer", 0644, d_tracer, | ||
5068 | &global_trace, &tracing_free_buffer_fops); | ||
5069 | |||
5070 | trace_create_file("trace_marker", 0220, d_tracer, | ||
5071 | NULL, &tracing_mark_fops); | ||
5072 | |||
5073 | trace_create_file("saved_cmdlines", 0444, d_tracer, | 5984 | trace_create_file("saved_cmdlines", 0444, d_tracer, |
5074 | NULL, &tracing_saved_cmdlines_fops); | 5985 | NULL, &tracing_saved_cmdlines_fops); |
5075 | 5986 | ||
5076 | trace_create_file("trace_clock", 0644, d_tracer, NULL, | ||
5077 | &trace_clock_fops); | ||
5078 | |||
5079 | trace_create_file("tracing_on", 0644, d_tracer, | ||
5080 | &global_trace, &rb_simple_fops); | ||
5081 | |||
5082 | #ifdef CONFIG_DYNAMIC_FTRACE | 5987 | #ifdef CONFIG_DYNAMIC_FTRACE |
5083 | trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, | 5988 | trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, |
5084 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); | 5989 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); |
5085 | #endif | 5990 | #endif |
5086 | 5991 | ||
5087 | #ifdef CONFIG_TRACER_SNAPSHOT | 5992 | create_trace_instances(d_tracer); |
5088 | trace_create_file("snapshot", 0644, d_tracer, | ||
5089 | (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops); | ||
5090 | #endif | ||
5091 | 5993 | ||
5092 | create_trace_options_dir(); | 5994 | create_trace_options_dir(&global_trace); |
5093 | |||
5094 | for_each_tracing_cpu(cpu) | ||
5095 | tracing_init_debugfs_percpu(cpu); | ||
5096 | 5995 | ||
5097 | return 0; | 5996 | return 0; |
5098 | } | 5997 | } |
@@ -5148,8 +6047,8 @@ void | |||
5148 | trace_printk_seq(struct trace_seq *s) | 6047 | trace_printk_seq(struct trace_seq *s) |
5149 | { | 6048 | { |
5150 | /* Probably should print a warning here. */ | 6049 | /* Probably should print a warning here. */ |
5151 | if (s->len >= 1000) | 6050 | if (s->len >= TRACE_MAX_PRINT) |
5152 | s->len = 1000; | 6051 | s->len = TRACE_MAX_PRINT; |
5153 | 6052 | ||
5154 | /* should be zero ended, but we are paranoid. */ | 6053 | /* should be zero ended, but we are paranoid. */ |
5155 | s->buffer[s->len] = 0; | 6054 | s->buffer[s->len] = 0; |
@@ -5162,46 +6061,43 @@ trace_printk_seq(struct trace_seq *s) | |||
5162 | void trace_init_global_iter(struct trace_iterator *iter) | 6061 | void trace_init_global_iter(struct trace_iterator *iter) |
5163 | { | 6062 | { |
5164 | iter->tr = &global_trace; | 6063 | iter->tr = &global_trace; |
5165 | iter->trace = current_trace; | 6064 | iter->trace = iter->tr->current_trace; |
5166 | iter->cpu_file = TRACE_PIPE_ALL_CPU; | 6065 | iter->cpu_file = RING_BUFFER_ALL_CPUS; |
6066 | iter->trace_buffer = &global_trace.trace_buffer; | ||
5167 | } | 6067 | } |
5168 | 6068 | ||
5169 | static void | 6069 | void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) |
5170 | __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | ||
5171 | { | 6070 | { |
5172 | static arch_spinlock_t ftrace_dump_lock = | ||
5173 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | ||
5174 | /* use static because iter can be a bit big for the stack */ | 6071 | /* use static because iter can be a bit big for the stack */ |
5175 | static struct trace_iterator iter; | 6072 | static struct trace_iterator iter; |
6073 | static atomic_t dump_running; | ||
5176 | unsigned int old_userobj; | 6074 | unsigned int old_userobj; |
5177 | static int dump_ran; | ||
5178 | unsigned long flags; | 6075 | unsigned long flags; |
5179 | int cnt = 0, cpu; | 6076 | int cnt = 0, cpu; |
5180 | 6077 | ||
5181 | /* only one dump */ | 6078 | /* Only allow one dump user at a time. */ |
5182 | local_irq_save(flags); | 6079 | if (atomic_inc_return(&dump_running) != 1) { |
5183 | arch_spin_lock(&ftrace_dump_lock); | 6080 | atomic_dec(&dump_running); |
5184 | if (dump_ran) | 6081 | return; |
5185 | goto out; | 6082 | } |
5186 | |||
5187 | dump_ran = 1; | ||
5188 | 6083 | ||
6084 | /* | ||
6085 | * Always turn off tracing when we dump. | ||
6086 | * We don't need to show trace output of what happens | ||
6087 | * between multiple crashes. | ||
6088 | * | ||
6089 | * If the user does a sysrq-z, then they can re-enable | ||
6090 | * tracing with echo 1 > tracing_on. | ||
6091 | */ | ||
5189 | tracing_off(); | 6092 | tracing_off(); |
5190 | 6093 | ||
5191 | /* Did function tracer already get disabled? */ | 6094 | local_irq_save(flags); |
5192 | if (ftrace_is_dead()) { | ||
5193 | printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); | ||
5194 | printk("# MAY BE MISSING FUNCTION EVENTS\n"); | ||
5195 | } | ||
5196 | |||
5197 | if (disable_tracing) | ||
5198 | ftrace_kill(); | ||
5199 | 6095 | ||
5200 | /* Simulate the iterator */ | 6096 | /* Simulate the iterator */ |
5201 | trace_init_global_iter(&iter); | 6097 | trace_init_global_iter(&iter); |
5202 | 6098 | ||
5203 | for_each_tracing_cpu(cpu) { | 6099 | for_each_tracing_cpu(cpu) { |
5204 | atomic_inc(&iter.tr->data[cpu]->disabled); | 6100 | atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled); |
5205 | } | 6101 | } |
5206 | 6102 | ||
5207 | old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; | 6103 | old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; |
@@ -5211,7 +6107,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | |||
5211 | 6107 | ||
5212 | switch (oops_dump_mode) { | 6108 | switch (oops_dump_mode) { |
5213 | case DUMP_ALL: | 6109 | case DUMP_ALL: |
5214 | iter.cpu_file = TRACE_PIPE_ALL_CPU; | 6110 | iter.cpu_file = RING_BUFFER_ALL_CPUS; |
5215 | break; | 6111 | break; |
5216 | case DUMP_ORIG: | 6112 | case DUMP_ORIG: |
5217 | iter.cpu_file = raw_smp_processor_id(); | 6113 | iter.cpu_file = raw_smp_processor_id(); |
@@ -5220,11 +6116,17 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | |||
5220 | goto out_enable; | 6116 | goto out_enable; |
5221 | default: | 6117 | default: |
5222 | printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); | 6118 | printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); |
5223 | iter.cpu_file = TRACE_PIPE_ALL_CPU; | 6119 | iter.cpu_file = RING_BUFFER_ALL_CPUS; |
5224 | } | 6120 | } |
5225 | 6121 | ||
5226 | printk(KERN_TRACE "Dumping ftrace buffer:\n"); | 6122 | printk(KERN_TRACE "Dumping ftrace buffer:\n"); |
5227 | 6123 | ||
6124 | /* Did function tracer already get disabled? */ | ||
6125 | if (ftrace_is_dead()) { | ||
6126 | printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); | ||
6127 | printk("# MAY BE MISSING FUNCTION EVENTS\n"); | ||
6128 | } | ||
6129 | |||
5228 | /* | 6130 | /* |
5229 | * We need to stop all tracing on all CPUS to read the | 6131 | * We need to stop all tracing on all CPUS to read the |
5230 | * the next buffer. This is a bit expensive, but is | 6132 | * the next buffer. This is a bit expensive, but is |
@@ -5264,33 +6166,19 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | |||
5264 | printk(KERN_TRACE "---------------------------------\n"); | 6166 | printk(KERN_TRACE "---------------------------------\n"); |
5265 | 6167 | ||
5266 | out_enable: | 6168 | out_enable: |
5267 | /* Re-enable tracing if requested */ | 6169 | trace_flags |= old_userobj; |
5268 | if (!disable_tracing) { | ||
5269 | trace_flags |= old_userobj; | ||
5270 | 6170 | ||
5271 | for_each_tracing_cpu(cpu) { | 6171 | for_each_tracing_cpu(cpu) { |
5272 | atomic_dec(&iter.tr->data[cpu]->disabled); | 6172 | atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); |
5273 | } | ||
5274 | tracing_on(); | ||
5275 | } | 6173 | } |
5276 | 6174 | atomic_dec(&dump_running); | |
5277 | out: | ||
5278 | arch_spin_unlock(&ftrace_dump_lock); | ||
5279 | local_irq_restore(flags); | 6175 | local_irq_restore(flags); |
5280 | } | 6176 | } |
5281 | |||
5282 | /* By default: disable tracing after the dump */ | ||
5283 | void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) | ||
5284 | { | ||
5285 | __ftrace_dump(true, oops_dump_mode); | ||
5286 | } | ||
5287 | EXPORT_SYMBOL_GPL(ftrace_dump); | 6177 | EXPORT_SYMBOL_GPL(ftrace_dump); |
5288 | 6178 | ||
5289 | __init static int tracer_alloc_buffers(void) | 6179 | __init static int tracer_alloc_buffers(void) |
5290 | { | 6180 | { |
5291 | int ring_buf_size; | 6181 | int ring_buf_size; |
5292 | enum ring_buffer_flags rb_flags; | ||
5293 | int i; | ||
5294 | int ret = -ENOMEM; | 6182 | int ret = -ENOMEM; |
5295 | 6183 | ||
5296 | 6184 | ||
@@ -5311,49 +6199,27 @@ __init static int tracer_alloc_buffers(void) | |||
5311 | else | 6199 | else |
5312 | ring_buf_size = 1; | 6200 | ring_buf_size = 1; |
5313 | 6201 | ||
5314 | rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; | ||
5315 | |||
5316 | cpumask_copy(tracing_buffer_mask, cpu_possible_mask); | 6202 | cpumask_copy(tracing_buffer_mask, cpu_possible_mask); |
5317 | cpumask_copy(tracing_cpumask, cpu_all_mask); | 6203 | cpumask_copy(tracing_cpumask, cpu_all_mask); |
5318 | 6204 | ||
6205 | raw_spin_lock_init(&global_trace.start_lock); | ||
6206 | |||
5319 | /* TODO: make the number of buffers hot pluggable with CPUS */ | 6207 | /* TODO: make the number of buffers hot pluggable with CPUS */ |
5320 | global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags); | 6208 | if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { |
5321 | if (!global_trace.buffer) { | ||
5322 | printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); | 6209 | printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); |
5323 | WARN_ON(1); | 6210 | WARN_ON(1); |
5324 | goto out_free_cpumask; | 6211 | goto out_free_cpumask; |
5325 | } | 6212 | } |
6213 | |||
5326 | if (global_trace.buffer_disabled) | 6214 | if (global_trace.buffer_disabled) |
5327 | tracing_off(); | 6215 | tracing_off(); |
5328 | 6216 | ||
5329 | |||
5330 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
5331 | max_tr.buffer = ring_buffer_alloc(1, rb_flags); | ||
5332 | if (!max_tr.buffer) { | ||
5333 | printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); | ||
5334 | WARN_ON(1); | ||
5335 | ring_buffer_free(global_trace.buffer); | ||
5336 | goto out_free_cpumask; | ||
5337 | } | ||
5338 | #endif | ||
5339 | |||
5340 | /* Allocate the first page for all buffers */ | ||
5341 | for_each_tracing_cpu(i) { | ||
5342 | global_trace.data[i] = &per_cpu(global_trace_cpu, i); | ||
5343 | max_tr.data[i] = &per_cpu(max_tr_data, i); | ||
5344 | } | ||
5345 | |||
5346 | set_buffer_entries(&global_trace, | ||
5347 | ring_buffer_size(global_trace.buffer, 0)); | ||
5348 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
5349 | set_buffer_entries(&max_tr, 1); | ||
5350 | #endif | ||
5351 | |||
5352 | trace_init_cmdlines(); | 6217 | trace_init_cmdlines(); |
5353 | init_irq_work(&trace_work_wakeup, trace_wake_up); | ||
5354 | 6218 | ||
5355 | register_tracer(&nop_trace); | 6219 | register_tracer(&nop_trace); |
5356 | 6220 | ||
6221 | global_trace.current_trace = &nop_trace; | ||
6222 | |||
5357 | /* All seems OK, enable tracing */ | 6223 | /* All seems OK, enable tracing */ |
5358 | tracing_disabled = 0; | 6224 | tracing_disabled = 0; |
5359 | 6225 | ||
@@ -5362,16 +6228,32 @@ __init static int tracer_alloc_buffers(void) | |||
5362 | 6228 | ||
5363 | register_die_notifier(&trace_die_notifier); | 6229 | register_die_notifier(&trace_die_notifier); |
5364 | 6230 | ||
6231 | global_trace.flags = TRACE_ARRAY_FL_GLOBAL; | ||
6232 | |||
6233 | /* Holder for file callbacks */ | ||
6234 | global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS; | ||
6235 | global_trace.trace_cpu.tr = &global_trace; | ||
6236 | |||
6237 | INIT_LIST_HEAD(&global_trace.systems); | ||
6238 | INIT_LIST_HEAD(&global_trace.events); | ||
6239 | list_add(&global_trace.list, &ftrace_trace_arrays); | ||
6240 | |||
5365 | while (trace_boot_options) { | 6241 | while (trace_boot_options) { |
5366 | char *option; | 6242 | char *option; |
5367 | 6243 | ||
5368 | option = strsep(&trace_boot_options, ","); | 6244 | option = strsep(&trace_boot_options, ","); |
5369 | trace_set_options(option); | 6245 | trace_set_options(&global_trace, option); |
5370 | } | 6246 | } |
5371 | 6247 | ||
6248 | register_snapshot_cmd(); | ||
6249 | |||
5372 | return 0; | 6250 | return 0; |
5373 | 6251 | ||
5374 | out_free_cpumask: | 6252 | out_free_cpumask: |
6253 | free_percpu(global_trace.trace_buffer.data); | ||
6254 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
6255 | free_percpu(global_trace.max_buffer.data); | ||
6256 | #endif | ||
5375 | free_cpumask_var(tracing_cpumask); | 6257 | free_cpumask_var(tracing_cpumask); |
5376 | out_free_buffer_mask: | 6258 | out_free_buffer_mask: |
5377 | free_cpumask_var(tracing_buffer_mask); | 6259 | free_cpumask_var(tracing_buffer_mask); |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2081971367ea..9e014582e763 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -13,6 +13,11 @@ | |||
13 | #include <linux/trace_seq.h> | 13 | #include <linux/trace_seq.h> |
14 | #include <linux/ftrace_event.h> | 14 | #include <linux/ftrace_event.h> |
15 | 15 | ||
16 | #ifdef CONFIG_FTRACE_SYSCALLS | ||
17 | #include <asm/unistd.h> /* For NR_SYSCALLS */ | ||
18 | #include <asm/syscall.h> /* some archs define it here */ | ||
19 | #endif | ||
20 | |||
16 | enum trace_type { | 21 | enum trace_type { |
17 | __TRACE_FIRST_TYPE = 0, | 22 | __TRACE_FIRST_TYPE = 0, |
18 | 23 | ||
@@ -29,6 +34,7 @@ enum trace_type { | |||
29 | TRACE_GRAPH_ENT, | 34 | TRACE_GRAPH_ENT, |
30 | TRACE_USER_STACK, | 35 | TRACE_USER_STACK, |
31 | TRACE_BLK, | 36 | TRACE_BLK, |
37 | TRACE_BPUTS, | ||
32 | 38 | ||
33 | __TRACE_LAST_TYPE, | 39 | __TRACE_LAST_TYPE, |
34 | }; | 40 | }; |
@@ -127,12 +133,21 @@ enum trace_flag_type { | |||
127 | 133 | ||
128 | #define TRACE_BUF_SIZE 1024 | 134 | #define TRACE_BUF_SIZE 1024 |
129 | 135 | ||
136 | struct trace_array; | ||
137 | |||
138 | struct trace_cpu { | ||
139 | struct trace_array *tr; | ||
140 | struct dentry *dir; | ||
141 | int cpu; | ||
142 | }; | ||
143 | |||
130 | /* | 144 | /* |
131 | * The CPU trace array - it consists of thousands of trace entries | 145 | * The CPU trace array - it consists of thousands of trace entries |
132 | * plus some other descriptor data: (for example which task started | 146 | * plus some other descriptor data: (for example which task started |
133 | * the trace, etc.) | 147 | * the trace, etc.) |
134 | */ | 148 | */ |
135 | struct trace_array_cpu { | 149 | struct trace_array_cpu { |
150 | struct trace_cpu trace_cpu; | ||
136 | atomic_t disabled; | 151 | atomic_t disabled; |
137 | void *buffer_page; /* ring buffer spare */ | 152 | void *buffer_page; /* ring buffer spare */ |
138 | 153 | ||
@@ -151,20 +166,83 @@ struct trace_array_cpu { | |||
151 | char comm[TASK_COMM_LEN]; | 166 | char comm[TASK_COMM_LEN]; |
152 | }; | 167 | }; |
153 | 168 | ||
169 | struct tracer; | ||
170 | |||
171 | struct trace_buffer { | ||
172 | struct trace_array *tr; | ||
173 | struct ring_buffer *buffer; | ||
174 | struct trace_array_cpu __percpu *data; | ||
175 | cycle_t time_start; | ||
176 | int cpu; | ||
177 | }; | ||
178 | |||
154 | /* | 179 | /* |
155 | * The trace array - an array of per-CPU trace arrays. This is the | 180 | * The trace array - an array of per-CPU trace arrays. This is the |
156 | * highest level data structure that individual tracers deal with. | 181 | * highest level data structure that individual tracers deal with. |
157 | * They have on/off state as well: | 182 | * They have on/off state as well: |
158 | */ | 183 | */ |
159 | struct trace_array { | 184 | struct trace_array { |
160 | struct ring_buffer *buffer; | 185 | struct list_head list; |
161 | int cpu; | 186 | char *name; |
187 | struct trace_buffer trace_buffer; | ||
188 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
189 | /* | ||
190 | * The max_buffer is used to snapshot the trace when a maximum | ||
191 | * latency is reached, or when the user initiates a snapshot. | ||
192 | * Some tracers will use this to store a maximum trace while | ||
193 | * it continues examining live traces. | ||
194 | * | ||
195 | * The buffers for the max_buffer are set up the same as the trace_buffer | ||
196 | * When a snapshot is taken, the buffer of the max_buffer is swapped | ||
197 | * with the buffer of the trace_buffer and the buffers are reset for | ||
198 | * the trace_buffer so the tracing can continue. | ||
199 | */ | ||
200 | struct trace_buffer max_buffer; | ||
201 | bool allocated_snapshot; | ||
202 | #endif | ||
162 | int buffer_disabled; | 203 | int buffer_disabled; |
163 | cycle_t time_start; | 204 | struct trace_cpu trace_cpu; /* place holder */ |
205 | #ifdef CONFIG_FTRACE_SYSCALLS | ||
206 | int sys_refcount_enter; | ||
207 | int sys_refcount_exit; | ||
208 | DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); | ||
209 | DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); | ||
210 | #endif | ||
211 | int stop_count; | ||
212 | int clock_id; | ||
213 | struct tracer *current_trace; | ||
214 | unsigned int flags; | ||
215 | raw_spinlock_t start_lock; | ||
216 | struct dentry *dir; | ||
217 | struct dentry *options; | ||
218 | struct dentry *percpu_dir; | ||
219 | struct dentry *event_dir; | ||
220 | struct list_head systems; | ||
221 | struct list_head events; | ||
164 | struct task_struct *waiter; | 222 | struct task_struct *waiter; |
165 | struct trace_array_cpu *data[NR_CPUS]; | 223 | int ref; |
166 | }; | 224 | }; |
167 | 225 | ||
226 | enum { | ||
227 | TRACE_ARRAY_FL_GLOBAL = (1 << 0) | ||
228 | }; | ||
229 | |||
230 | extern struct list_head ftrace_trace_arrays; | ||
231 | |||
232 | /* | ||
233 | * The global tracer (top) should be the first trace array added, | ||
234 | * but we check the flag anyway. | ||
235 | */ | ||
236 | static inline struct trace_array *top_trace_array(void) | ||
237 | { | ||
238 | struct trace_array *tr; | ||
239 | |||
240 | tr = list_entry(ftrace_trace_arrays.prev, | ||
241 | typeof(*tr), list); | ||
242 | WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); | ||
243 | return tr; | ||
244 | } | ||
245 | |||
168 | #define FTRACE_CMP_TYPE(var, type) \ | 246 | #define FTRACE_CMP_TYPE(var, type) \ |
169 | __builtin_types_compatible_p(typeof(var), type *) | 247 | __builtin_types_compatible_p(typeof(var), type *) |
170 | 248 | ||
@@ -200,6 +278,7 @@ extern void __ftrace_bad_type(void); | |||
200 | IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ | 278 | IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ |
201 | IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ | 279 | IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ |
202 | IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ | 280 | IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ |
281 | IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ | ||
203 | IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ | 282 | IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ |
204 | TRACE_MMIO_RW); \ | 283 | TRACE_MMIO_RW); \ |
205 | IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ | 284 | IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ |
@@ -289,9 +368,10 @@ struct tracer { | |||
289 | struct tracer *next; | 368 | struct tracer *next; |
290 | struct tracer_flags *flags; | 369 | struct tracer_flags *flags; |
291 | bool print_max; | 370 | bool print_max; |
292 | bool use_max_tr; | ||
293 | bool allocated_snapshot; | ||
294 | bool enabled; | 371 | bool enabled; |
372 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
373 | bool use_max_tr; | ||
374 | #endif | ||
295 | }; | 375 | }; |
296 | 376 | ||
297 | 377 | ||
@@ -427,8 +507,6 @@ static __always_inline void trace_clear_recursion(int bit) | |||
427 | current->trace_recursion = val; | 507 | current->trace_recursion = val; |
428 | } | 508 | } |
429 | 509 | ||
430 | #define TRACE_PIPE_ALL_CPU -1 | ||
431 | |||
432 | static inline struct ring_buffer_iter * | 510 | static inline struct ring_buffer_iter * |
433 | trace_buffer_iter(struct trace_iterator *iter, int cpu) | 511 | trace_buffer_iter(struct trace_iterator *iter, int cpu) |
434 | { | 512 | { |
@@ -439,10 +517,10 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) | |||
439 | 517 | ||
440 | int tracer_init(struct tracer *t, struct trace_array *tr); | 518 | int tracer_init(struct tracer *t, struct trace_array *tr); |
441 | int tracing_is_enabled(void); | 519 | int tracing_is_enabled(void); |
442 | void tracing_reset(struct trace_array *tr, int cpu); | 520 | void tracing_reset(struct trace_buffer *buf, int cpu); |
443 | void tracing_reset_online_cpus(struct trace_array *tr); | 521 | void tracing_reset_online_cpus(struct trace_buffer *buf); |
444 | void tracing_reset_current(int cpu); | 522 | void tracing_reset_current(int cpu); |
445 | void tracing_reset_current_online_cpus(void); | 523 | void tracing_reset_all_online_cpus(void); |
446 | int tracing_open_generic(struct inode *inode, struct file *filp); | 524 | int tracing_open_generic(struct inode *inode, struct file *filp); |
447 | struct dentry *trace_create_file(const char *name, | 525 | struct dentry *trace_create_file(const char *name, |
448 | umode_t mode, | 526 | umode_t mode, |
@@ -450,6 +528,7 @@ struct dentry *trace_create_file(const char *name, | |||
450 | void *data, | 528 | void *data, |
451 | const struct file_operations *fops); | 529 | const struct file_operations *fops); |
452 | 530 | ||
531 | struct dentry *tracing_init_dentry_tr(struct trace_array *tr); | ||
453 | struct dentry *tracing_init_dentry(void); | 532 | struct dentry *tracing_init_dentry(void); |
454 | 533 | ||
455 | struct ring_buffer_event; | 534 | struct ring_buffer_event; |
@@ -583,7 +662,7 @@ extern int DYN_FTRACE_TEST_NAME(void); | |||
583 | #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 | 662 | #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 |
584 | extern int DYN_FTRACE_TEST_NAME2(void); | 663 | extern int DYN_FTRACE_TEST_NAME2(void); |
585 | 664 | ||
586 | extern int ring_buffer_expanded; | 665 | extern bool ring_buffer_expanded; |
587 | extern bool tracing_selftest_disabled; | 666 | extern bool tracing_selftest_disabled; |
588 | DECLARE_PER_CPU(int, ftrace_cpu_disabled); | 667 | DECLARE_PER_CPU(int, ftrace_cpu_disabled); |
589 | 668 | ||
@@ -619,6 +698,8 @@ trace_array_vprintk(struct trace_array *tr, | |||
619 | unsigned long ip, const char *fmt, va_list args); | 698 | unsigned long ip, const char *fmt, va_list args); |
620 | int trace_array_printk(struct trace_array *tr, | 699 | int trace_array_printk(struct trace_array *tr, |
621 | unsigned long ip, const char *fmt, ...); | 700 | unsigned long ip, const char *fmt, ...); |
701 | int trace_array_printk_buf(struct ring_buffer *buffer, | ||
702 | unsigned long ip, const char *fmt, ...); | ||
622 | void trace_printk_seq(struct trace_seq *s); | 703 | void trace_printk_seq(struct trace_seq *s); |
623 | enum print_line_t print_trace_line(struct trace_iterator *iter); | 704 | enum print_line_t print_trace_line(struct trace_iterator *iter); |
624 | 705 | ||
@@ -786,6 +867,7 @@ enum trace_iterator_flags { | |||
786 | TRACE_ITER_STOP_ON_FREE = 0x400000, | 867 | TRACE_ITER_STOP_ON_FREE = 0x400000, |
787 | TRACE_ITER_IRQ_INFO = 0x800000, | 868 | TRACE_ITER_IRQ_INFO = 0x800000, |
788 | TRACE_ITER_MARKERS = 0x1000000, | 869 | TRACE_ITER_MARKERS = 0x1000000, |
870 | TRACE_ITER_FUNCTION = 0x2000000, | ||
789 | }; | 871 | }; |
790 | 872 | ||
791 | /* | 873 | /* |
@@ -832,8 +914,8 @@ enum { | |||
832 | 914 | ||
833 | struct ftrace_event_field { | 915 | struct ftrace_event_field { |
834 | struct list_head link; | 916 | struct list_head link; |
835 | char *name; | 917 | const char *name; |
836 | char *type; | 918 | const char *type; |
837 | int filter_type; | 919 | int filter_type; |
838 | int offset; | 920 | int offset; |
839 | int size; | 921 | int size; |
@@ -851,12 +933,19 @@ struct event_filter { | |||
851 | struct event_subsystem { | 933 | struct event_subsystem { |
852 | struct list_head list; | 934 | struct list_head list; |
853 | const char *name; | 935 | const char *name; |
854 | struct dentry *entry; | ||
855 | struct event_filter *filter; | 936 | struct event_filter *filter; |
856 | int nr_events; | ||
857 | int ref_count; | 937 | int ref_count; |
858 | }; | 938 | }; |
859 | 939 | ||
940 | struct ftrace_subsystem_dir { | ||
941 | struct list_head list; | ||
942 | struct event_subsystem *subsystem; | ||
943 | struct trace_array *tr; | ||
944 | struct dentry *entry; | ||
945 | int ref_count; | ||
946 | int nr_events; | ||
947 | }; | ||
948 | |||
860 | #define FILTER_PRED_INVALID ((unsigned short)-1) | 949 | #define FILTER_PRED_INVALID ((unsigned short)-1) |
861 | #define FILTER_PRED_IS_RIGHT (1 << 15) | 950 | #define FILTER_PRED_IS_RIGHT (1 << 15) |
862 | #define FILTER_PRED_FOLD (1 << 15) | 951 | #define FILTER_PRED_FOLD (1 << 15) |
@@ -906,22 +995,20 @@ struct filter_pred { | |||
906 | unsigned short right; | 995 | unsigned short right; |
907 | }; | 996 | }; |
908 | 997 | ||
909 | extern struct list_head ftrace_common_fields; | ||
910 | |||
911 | extern enum regex_type | 998 | extern enum regex_type |
912 | filter_parse_regex(char *buff, int len, char **search, int *not); | 999 | filter_parse_regex(char *buff, int len, char **search, int *not); |
913 | extern void print_event_filter(struct ftrace_event_call *call, | 1000 | extern void print_event_filter(struct ftrace_event_call *call, |
914 | struct trace_seq *s); | 1001 | struct trace_seq *s); |
915 | extern int apply_event_filter(struct ftrace_event_call *call, | 1002 | extern int apply_event_filter(struct ftrace_event_call *call, |
916 | char *filter_string); | 1003 | char *filter_string); |
917 | extern int apply_subsystem_event_filter(struct event_subsystem *system, | 1004 | extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, |
918 | char *filter_string); | 1005 | char *filter_string); |
919 | extern void print_subsystem_event_filter(struct event_subsystem *system, | 1006 | extern void print_subsystem_event_filter(struct event_subsystem *system, |
920 | struct trace_seq *s); | 1007 | struct trace_seq *s); |
921 | extern int filter_assign_type(const char *type); | 1008 | extern int filter_assign_type(const char *type); |
922 | 1009 | ||
923 | struct list_head * | 1010 | struct ftrace_event_field * |
924 | trace_get_fields(struct ftrace_event_call *event_call); | 1011 | trace_find_event_field(struct ftrace_event_call *call, char *name); |
925 | 1012 | ||
926 | static inline int | 1013 | static inline int |
927 | filter_check_discard(struct ftrace_event_call *call, void *rec, | 1014 | filter_check_discard(struct ftrace_event_call *call, void *rec, |
@@ -938,6 +1025,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, | |||
938 | } | 1025 | } |
939 | 1026 | ||
940 | extern void trace_event_enable_cmd_record(bool enable); | 1027 | extern void trace_event_enable_cmd_record(bool enable); |
1028 | extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); | ||
1029 | extern int event_trace_del_tracer(struct trace_array *tr); | ||
941 | 1030 | ||
942 | extern struct mutex event_mutex; | 1031 | extern struct mutex event_mutex; |
943 | extern struct list_head ftrace_events; | 1032 | extern struct list_head ftrace_events; |
@@ -948,7 +1037,18 @@ extern const char *__stop___trace_bprintk_fmt[]; | |||
948 | void trace_printk_init_buffers(void); | 1037 | void trace_printk_init_buffers(void); |
949 | void trace_printk_start_comm(void); | 1038 | void trace_printk_start_comm(void); |
950 | int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); | 1039 | int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); |
951 | int set_tracer_flag(unsigned int mask, int enabled); | 1040 | int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); |
1041 | |||
1042 | /* | ||
1043 | * Normal trace_printk() and friends allocates special buffers | ||
1044 | * to do the manipulation, as well as saves the print formats | ||
1045 | * into sections to display. But the trace infrastructure wants | ||
1046 | * to use these without the added overhead at the price of being | ||
1047 | * a bit slower (used mainly for warnings, where we don't care | ||
1048 | * about performance). The internal_trace_puts() is for such | ||
1049 | * a purpose. | ||
1050 | */ | ||
1051 | #define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) | ||
952 | 1052 | ||
953 | #undef FTRACE_ENTRY | 1053 | #undef FTRACE_ENTRY |
954 | #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ | 1054 | #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ |
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 95e96842ed29..d594da0dc03c 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c | |||
@@ -32,6 +32,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) | |||
32 | { | 32 | { |
33 | struct ftrace_event_call *call = &event_branch; | 33 | struct ftrace_event_call *call = &event_branch; |
34 | struct trace_array *tr = branch_tracer; | 34 | struct trace_array *tr = branch_tracer; |
35 | struct trace_array_cpu *data; | ||
35 | struct ring_buffer_event *event; | 36 | struct ring_buffer_event *event; |
36 | struct trace_branch *entry; | 37 | struct trace_branch *entry; |
37 | struct ring_buffer *buffer; | 38 | struct ring_buffer *buffer; |
@@ -51,11 +52,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) | |||
51 | 52 | ||
52 | local_irq_save(flags); | 53 | local_irq_save(flags); |
53 | cpu = raw_smp_processor_id(); | 54 | cpu = raw_smp_processor_id(); |
54 | if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) | 55 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
56 | if (atomic_inc_return(&data->disabled) != 1) | ||
55 | goto out; | 57 | goto out; |
56 | 58 | ||
57 | pc = preempt_count(); | 59 | pc = preempt_count(); |
58 | buffer = tr->buffer; | 60 | buffer = tr->trace_buffer.buffer; |
59 | event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, | 61 | event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, |
60 | sizeof(*entry), flags, pc); | 62 | sizeof(*entry), flags, pc); |
61 | if (!event) | 63 | if (!event) |
@@ -80,7 +82,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) | |||
80 | __buffer_unlock_commit(buffer, event); | 82 | __buffer_unlock_commit(buffer, event); |
81 | 83 | ||
82 | out: | 84 | out: |
83 | atomic_dec(&tr->data[cpu]->disabled); | 85 | atomic_dec(&data->disabled); |
84 | local_irq_restore(flags); | 86 | local_irq_restore(flags); |
85 | } | 87 | } |
86 | 88 | ||
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index aa8f5f48dae6..26dc348332b7 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -57,6 +57,16 @@ u64 notrace trace_clock(void) | |||
57 | return local_clock(); | 57 | return local_clock(); |
58 | } | 58 | } |
59 | 59 | ||
60 | /* | ||
61 | * trace_jiffy_clock(): Simply use jiffies as a clock counter. | ||
62 | */ | ||
63 | u64 notrace trace_clock_jiffies(void) | ||
64 | { | ||
65 | u64 jiffy = jiffies - INITIAL_JIFFIES; | ||
66 | |||
67 | /* Return nsecs */ | ||
68 | return (u64)jiffies_to_usecs(jiffy) * 1000ULL; | ||
69 | } | ||
60 | 70 | ||
61 | /* | 71 | /* |
62 | * trace_clock_global(): special globally coherent trace clock | 72 | * trace_clock_global(): special globally coherent trace clock |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 4108e1250ca2..e2d027ac66a2 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -223,8 +223,8 @@ FTRACE_ENTRY(bprint, bprint_entry, | |||
223 | __dynamic_array( u32, buf ) | 223 | __dynamic_array( u32, buf ) |
224 | ), | 224 | ), |
225 | 225 | ||
226 | F_printk("%08lx fmt:%p", | 226 | F_printk("%pf: %s", |
227 | __entry->ip, __entry->fmt), | 227 | (void *)__entry->ip, __entry->fmt), |
228 | 228 | ||
229 | FILTER_OTHER | 229 | FILTER_OTHER |
230 | ); | 230 | ); |
@@ -238,8 +238,23 @@ FTRACE_ENTRY(print, print_entry, | |||
238 | __dynamic_array( char, buf ) | 238 | __dynamic_array( char, buf ) |
239 | ), | 239 | ), |
240 | 240 | ||
241 | F_printk("%08lx %s", | 241 | F_printk("%pf: %s", |
242 | __entry->ip, __entry->buf), | 242 | (void *)__entry->ip, __entry->buf), |
243 | |||
244 | FILTER_OTHER | ||
245 | ); | ||
246 | |||
247 | FTRACE_ENTRY(bputs, bputs_entry, | ||
248 | |||
249 | TRACE_BPUTS, | ||
250 | |||
251 | F_STRUCT( | ||
252 | __field( unsigned long, ip ) | ||
253 | __field( const char *, str ) | ||
254 | ), | ||
255 | |||
256 | F_printk("%pf: %s", | ||
257 | (void *)__entry->ip, __entry->str), | ||
243 | 258 | ||
244 | FILTER_OTHER | 259 | FILTER_OTHER |
245 | ); | 260 | ); |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 57e9b284250c..53582e982e51 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -34,9 +34,27 @@ char event_storage[EVENT_STORAGE_SIZE]; | |||
34 | EXPORT_SYMBOL_GPL(event_storage); | 34 | EXPORT_SYMBOL_GPL(event_storage); |
35 | 35 | ||
36 | LIST_HEAD(ftrace_events); | 36 | LIST_HEAD(ftrace_events); |
37 | LIST_HEAD(ftrace_common_fields); | 37 | static LIST_HEAD(ftrace_common_fields); |
38 | 38 | ||
39 | struct list_head * | 39 | #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) |
40 | |||
41 | static struct kmem_cache *field_cachep; | ||
42 | static struct kmem_cache *file_cachep; | ||
43 | |||
44 | /* Double loops, do not use break, only goto's work */ | ||
45 | #define do_for_each_event_file(tr, file) \ | ||
46 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ | ||
47 | list_for_each_entry(file, &tr->events, list) | ||
48 | |||
49 | #define do_for_each_event_file_safe(tr, file) \ | ||
50 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ | ||
51 | struct ftrace_event_file *___n; \ | ||
52 | list_for_each_entry_safe(file, ___n, &tr->events, list) | ||
53 | |||
54 | #define while_for_each_event_file() \ | ||
55 | } | ||
56 | |||
57 | static struct list_head * | ||
40 | trace_get_fields(struct ftrace_event_call *event_call) | 58 | trace_get_fields(struct ftrace_event_call *event_call) |
41 | { | 59 | { |
42 | if (!event_call->class->get_fields) | 60 | if (!event_call->class->get_fields) |
@@ -44,23 +62,45 @@ trace_get_fields(struct ftrace_event_call *event_call) | |||
44 | return event_call->class->get_fields(event_call); | 62 | return event_call->class->get_fields(event_call); |
45 | } | 63 | } |
46 | 64 | ||
65 | static struct ftrace_event_field * | ||
66 | __find_event_field(struct list_head *head, char *name) | ||
67 | { | ||
68 | struct ftrace_event_field *field; | ||
69 | |||
70 | list_for_each_entry(field, head, link) { | ||
71 | if (!strcmp(field->name, name)) | ||
72 | return field; | ||
73 | } | ||
74 | |||
75 | return NULL; | ||
76 | } | ||
77 | |||
78 | struct ftrace_event_field * | ||
79 | trace_find_event_field(struct ftrace_event_call *call, char *name) | ||
80 | { | ||
81 | struct ftrace_event_field *field; | ||
82 | struct list_head *head; | ||
83 | |||
84 | field = __find_event_field(&ftrace_common_fields, name); | ||
85 | if (field) | ||
86 | return field; | ||
87 | |||
88 | head = trace_get_fields(call); | ||
89 | return __find_event_field(head, name); | ||
90 | } | ||
91 | |||
47 | static int __trace_define_field(struct list_head *head, const char *type, | 92 | static int __trace_define_field(struct list_head *head, const char *type, |
48 | const char *name, int offset, int size, | 93 | const char *name, int offset, int size, |
49 | int is_signed, int filter_type) | 94 | int is_signed, int filter_type) |
50 | { | 95 | { |
51 | struct ftrace_event_field *field; | 96 | struct ftrace_event_field *field; |
52 | 97 | ||
53 | field = kzalloc(sizeof(*field), GFP_KERNEL); | 98 | field = kmem_cache_alloc(field_cachep, GFP_TRACE); |
54 | if (!field) | 99 | if (!field) |
55 | goto err; | 100 | goto err; |
56 | 101 | ||
57 | field->name = kstrdup(name, GFP_KERNEL); | 102 | field->name = name; |
58 | if (!field->name) | 103 | field->type = type; |
59 | goto err; | ||
60 | |||
61 | field->type = kstrdup(type, GFP_KERNEL); | ||
62 | if (!field->type) | ||
63 | goto err; | ||
64 | 104 | ||
65 | if (filter_type == FILTER_OTHER) | 105 | if (filter_type == FILTER_OTHER) |
66 | field->filter_type = filter_assign_type(type); | 106 | field->filter_type = filter_assign_type(type); |
@@ -76,9 +116,7 @@ static int __trace_define_field(struct list_head *head, const char *type, | |||
76 | return 0; | 116 | return 0; |
77 | 117 | ||
78 | err: | 118 | err: |
79 | if (field) | 119 | kmem_cache_free(field_cachep, field); |
80 | kfree(field->name); | ||
81 | kfree(field); | ||
82 | 120 | ||
83 | return -ENOMEM; | 121 | return -ENOMEM; |
84 | } | 122 | } |
@@ -120,7 +158,7 @@ static int trace_define_common_fields(void) | |||
120 | return ret; | 158 | return ret; |
121 | } | 159 | } |
122 | 160 | ||
123 | void trace_destroy_fields(struct ftrace_event_call *call) | 161 | static void trace_destroy_fields(struct ftrace_event_call *call) |
124 | { | 162 | { |
125 | struct ftrace_event_field *field, *next; | 163 | struct ftrace_event_field *field, *next; |
126 | struct list_head *head; | 164 | struct list_head *head; |
@@ -128,9 +166,7 @@ void trace_destroy_fields(struct ftrace_event_call *call) | |||
128 | head = trace_get_fields(call); | 166 | head = trace_get_fields(call); |
129 | list_for_each_entry_safe(field, next, head, link) { | 167 | list_for_each_entry_safe(field, next, head, link) { |
130 | list_del(&field->link); | 168 | list_del(&field->link); |
131 | kfree(field->type); | 169 | kmem_cache_free(field_cachep, field); |
132 | kfree(field->name); | ||
133 | kfree(field); | ||
134 | } | 170 | } |
135 | } | 171 | } |
136 | 172 | ||
@@ -149,15 +185,17 @@ EXPORT_SYMBOL_GPL(trace_event_raw_init); | |||
149 | int ftrace_event_reg(struct ftrace_event_call *call, | 185 | int ftrace_event_reg(struct ftrace_event_call *call, |
150 | enum trace_reg type, void *data) | 186 | enum trace_reg type, void *data) |
151 | { | 187 | { |
188 | struct ftrace_event_file *file = data; | ||
189 | |||
152 | switch (type) { | 190 | switch (type) { |
153 | case TRACE_REG_REGISTER: | 191 | case TRACE_REG_REGISTER: |
154 | return tracepoint_probe_register(call->name, | 192 | return tracepoint_probe_register(call->name, |
155 | call->class->probe, | 193 | call->class->probe, |
156 | call); | 194 | file); |
157 | case TRACE_REG_UNREGISTER: | 195 | case TRACE_REG_UNREGISTER: |
158 | tracepoint_probe_unregister(call->name, | 196 | tracepoint_probe_unregister(call->name, |
159 | call->class->probe, | 197 | call->class->probe, |
160 | call); | 198 | file); |
161 | return 0; | 199 | return 0; |
162 | 200 | ||
163 | #ifdef CONFIG_PERF_EVENTS | 201 | #ifdef CONFIG_PERF_EVENTS |
@@ -183,54 +221,100 @@ EXPORT_SYMBOL_GPL(ftrace_event_reg); | |||
183 | 221 | ||
184 | void trace_event_enable_cmd_record(bool enable) | 222 | void trace_event_enable_cmd_record(bool enable) |
185 | { | 223 | { |
186 | struct ftrace_event_call *call; | 224 | struct ftrace_event_file *file; |
225 | struct trace_array *tr; | ||
187 | 226 | ||
188 | mutex_lock(&event_mutex); | 227 | mutex_lock(&event_mutex); |
189 | list_for_each_entry(call, &ftrace_events, list) { | 228 | do_for_each_event_file(tr, file) { |
190 | if (!(call->flags & TRACE_EVENT_FL_ENABLED)) | 229 | |
230 | if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) | ||
191 | continue; | 231 | continue; |
192 | 232 | ||
193 | if (enable) { | 233 | if (enable) { |
194 | tracing_start_cmdline_record(); | 234 | tracing_start_cmdline_record(); |
195 | call->flags |= TRACE_EVENT_FL_RECORDED_CMD; | 235 | set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); |
196 | } else { | 236 | } else { |
197 | tracing_stop_cmdline_record(); | 237 | tracing_stop_cmdline_record(); |
198 | call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; | 238 | clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); |
199 | } | 239 | } |
200 | } | 240 | } while_for_each_event_file(); |
201 | mutex_unlock(&event_mutex); | 241 | mutex_unlock(&event_mutex); |
202 | } | 242 | } |
203 | 243 | ||
204 | static int ftrace_event_enable_disable(struct ftrace_event_call *call, | 244 | static int __ftrace_event_enable_disable(struct ftrace_event_file *file, |
205 | int enable) | 245 | int enable, int soft_disable) |
206 | { | 246 | { |
247 | struct ftrace_event_call *call = file->event_call; | ||
207 | int ret = 0; | 248 | int ret = 0; |
249 | int disable; | ||
208 | 250 | ||
209 | switch (enable) { | 251 | switch (enable) { |
210 | case 0: | 252 | case 0: |
211 | if (call->flags & TRACE_EVENT_FL_ENABLED) { | 253 | /* |
212 | call->flags &= ~TRACE_EVENT_FL_ENABLED; | 254 | * When soft_disable is set and enable is cleared, we want |
213 | if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) { | 255 | * to clear the SOFT_DISABLED flag but leave the event in the |
256 | * state that it was. That is, if the event was enabled and | ||
257 | * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED | ||
258 | * is set we do not want the event to be enabled before we | ||
259 | * clear the bit. | ||
260 | * | ||
261 | * When soft_disable is not set but the SOFT_MODE flag is, | ||
262 | * we do nothing. Do not disable the tracepoint, otherwise | ||
263 | * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. | ||
264 | */ | ||
265 | if (soft_disable) { | ||
266 | disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED; | ||
267 | clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); | ||
268 | } else | ||
269 | disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE); | ||
270 | |||
271 | if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) { | ||
272 | clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); | ||
273 | if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) { | ||
214 | tracing_stop_cmdline_record(); | 274 | tracing_stop_cmdline_record(); |
215 | call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; | 275 | clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); |
216 | } | 276 | } |
217 | call->class->reg(call, TRACE_REG_UNREGISTER, NULL); | 277 | call->class->reg(call, TRACE_REG_UNREGISTER, file); |
218 | } | 278 | } |
279 | /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */ | ||
280 | if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) | ||
281 | set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); | ||
219 | break; | 282 | break; |
220 | case 1: | 283 | case 1: |
221 | if (!(call->flags & TRACE_EVENT_FL_ENABLED)) { | 284 | /* |
285 | * When soft_disable is set and enable is set, we want to | ||
286 | * register the tracepoint for the event, but leave the event | ||
287 | * as is. That means, if the event was already enabled, we do | ||
288 | * nothing (but set SOFT_MODE). If the event is disabled, we | ||
289 | * set SOFT_DISABLED before enabling the event tracepoint, so | ||
290 | * it still seems to be disabled. | ||
291 | */ | ||
292 | if (!soft_disable) | ||
293 | clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); | ||
294 | else | ||
295 | set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); | ||
296 | |||
297 | if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) { | ||
298 | |||
299 | /* Keep the event disabled, when going to SOFT_MODE. */ | ||
300 | if (soft_disable) | ||
301 | set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); | ||
302 | |||
222 | if (trace_flags & TRACE_ITER_RECORD_CMD) { | 303 | if (trace_flags & TRACE_ITER_RECORD_CMD) { |
223 | tracing_start_cmdline_record(); | 304 | tracing_start_cmdline_record(); |
224 | call->flags |= TRACE_EVENT_FL_RECORDED_CMD; | 305 | set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); |
225 | } | 306 | } |
226 | ret = call->class->reg(call, TRACE_REG_REGISTER, NULL); | 307 | ret = call->class->reg(call, TRACE_REG_REGISTER, file); |
227 | if (ret) { | 308 | if (ret) { |
228 | tracing_stop_cmdline_record(); | 309 | tracing_stop_cmdline_record(); |
229 | pr_info("event trace: Could not enable event " | 310 | pr_info("event trace: Could not enable event " |
230 | "%s\n", call->name); | 311 | "%s\n", call->name); |
231 | break; | 312 | break; |
232 | } | 313 | } |
233 | call->flags |= TRACE_EVENT_FL_ENABLED; | 314 | set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); |
315 | |||
316 | /* WAS_ENABLED gets set but never cleared. */ | ||
317 | call->flags |= TRACE_EVENT_FL_WAS_ENABLED; | ||
234 | } | 318 | } |
235 | break; | 319 | break; |
236 | } | 320 | } |
@@ -238,13 +322,19 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, | |||
238 | return ret; | 322 | return ret; |
239 | } | 323 | } |
240 | 324 | ||
241 | static void ftrace_clear_events(void) | 325 | static int ftrace_event_enable_disable(struct ftrace_event_file *file, |
326 | int enable) | ||
242 | { | 327 | { |
243 | struct ftrace_event_call *call; | 328 | return __ftrace_event_enable_disable(file, enable, 0); |
329 | } | ||
330 | |||
331 | static void ftrace_clear_events(struct trace_array *tr) | ||
332 | { | ||
333 | struct ftrace_event_file *file; | ||
244 | 334 | ||
245 | mutex_lock(&event_mutex); | 335 | mutex_lock(&event_mutex); |
246 | list_for_each_entry(call, &ftrace_events, list) { | 336 | list_for_each_entry(file, &tr->events, list) { |
247 | ftrace_event_enable_disable(call, 0); | 337 | ftrace_event_enable_disable(file, 0); |
248 | } | 338 | } |
249 | mutex_unlock(&event_mutex); | 339 | mutex_unlock(&event_mutex); |
250 | } | 340 | } |
@@ -257,11 +347,12 @@ static void __put_system(struct event_subsystem *system) | |||
257 | if (--system->ref_count) | 347 | if (--system->ref_count) |
258 | return; | 348 | return; |
259 | 349 | ||
350 | list_del(&system->list); | ||
351 | |||
260 | if (filter) { | 352 | if (filter) { |
261 | kfree(filter->filter_string); | 353 | kfree(filter->filter_string); |
262 | kfree(filter); | 354 | kfree(filter); |
263 | } | 355 | } |
264 | kfree(system->name); | ||
265 | kfree(system); | 356 | kfree(system); |
266 | } | 357 | } |
267 | 358 | ||
@@ -271,24 +362,45 @@ static void __get_system(struct event_subsystem *system) | |||
271 | system->ref_count++; | 362 | system->ref_count++; |
272 | } | 363 | } |
273 | 364 | ||
274 | static void put_system(struct event_subsystem *system) | 365 | static void __get_system_dir(struct ftrace_subsystem_dir *dir) |
366 | { | ||
367 | WARN_ON_ONCE(dir->ref_count == 0); | ||
368 | dir->ref_count++; | ||
369 | __get_system(dir->subsystem); | ||
370 | } | ||
371 | |||
372 | static void __put_system_dir(struct ftrace_subsystem_dir *dir) | ||
373 | { | ||
374 | WARN_ON_ONCE(dir->ref_count == 0); | ||
375 | /* If the subsystem is about to be freed, the dir must be too */ | ||
376 | WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); | ||
377 | |||
378 | __put_system(dir->subsystem); | ||
379 | if (!--dir->ref_count) | ||
380 | kfree(dir); | ||
381 | } | ||
382 | |||
383 | static void put_system(struct ftrace_subsystem_dir *dir) | ||
275 | { | 384 | { |
276 | mutex_lock(&event_mutex); | 385 | mutex_lock(&event_mutex); |
277 | __put_system(system); | 386 | __put_system_dir(dir); |
278 | mutex_unlock(&event_mutex); | 387 | mutex_unlock(&event_mutex); |
279 | } | 388 | } |
280 | 389 | ||
281 | /* | 390 | /* |
282 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. | 391 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. |
283 | */ | 392 | */ |
284 | static int __ftrace_set_clr_event(const char *match, const char *sub, | 393 | static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, |
285 | const char *event, int set) | 394 | const char *sub, const char *event, int set) |
286 | { | 395 | { |
396 | struct ftrace_event_file *file; | ||
287 | struct ftrace_event_call *call; | 397 | struct ftrace_event_call *call; |
288 | int ret = -EINVAL; | 398 | int ret = -EINVAL; |
289 | 399 | ||
290 | mutex_lock(&event_mutex); | 400 | mutex_lock(&event_mutex); |
291 | list_for_each_entry(call, &ftrace_events, list) { | 401 | list_for_each_entry(file, &tr->events, list) { |
402 | |||
403 | call = file->event_call; | ||
292 | 404 | ||
293 | if (!call->name || !call->class || !call->class->reg) | 405 | if (!call->name || !call->class || !call->class->reg) |
294 | continue; | 406 | continue; |
@@ -307,7 +419,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub, | |||
307 | if (event && strcmp(event, call->name) != 0) | 419 | if (event && strcmp(event, call->name) != 0) |
308 | continue; | 420 | continue; |
309 | 421 | ||
310 | ftrace_event_enable_disable(call, set); | 422 | ftrace_event_enable_disable(file, set); |
311 | 423 | ||
312 | ret = 0; | 424 | ret = 0; |
313 | } | 425 | } |
@@ -316,7 +428,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub, | |||
316 | return ret; | 428 | return ret; |
317 | } | 429 | } |
318 | 430 | ||
319 | static int ftrace_set_clr_event(char *buf, int set) | 431 | static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) |
320 | { | 432 | { |
321 | char *event = NULL, *sub = NULL, *match; | 433 | char *event = NULL, *sub = NULL, *match; |
322 | 434 | ||
@@ -344,7 +456,7 @@ static int ftrace_set_clr_event(char *buf, int set) | |||
344 | event = NULL; | 456 | event = NULL; |
345 | } | 457 | } |
346 | 458 | ||
347 | return __ftrace_set_clr_event(match, sub, event, set); | 459 | return __ftrace_set_clr_event(tr, match, sub, event, set); |
348 | } | 460 | } |
349 | 461 | ||
350 | /** | 462 | /** |
@@ -361,7 +473,9 @@ static int ftrace_set_clr_event(char *buf, int set) | |||
361 | */ | 473 | */ |
362 | int trace_set_clr_event(const char *system, const char *event, int set) | 474 | int trace_set_clr_event(const char *system, const char *event, int set) |
363 | { | 475 | { |
364 | return __ftrace_set_clr_event(NULL, system, event, set); | 476 | struct trace_array *tr = top_trace_array(); |
477 | |||
478 | return __ftrace_set_clr_event(tr, NULL, system, event, set); | ||
365 | } | 479 | } |
366 | EXPORT_SYMBOL_GPL(trace_set_clr_event); | 480 | EXPORT_SYMBOL_GPL(trace_set_clr_event); |
367 | 481 | ||
@@ -373,6 +487,8 @@ ftrace_event_write(struct file *file, const char __user *ubuf, | |||
373 | size_t cnt, loff_t *ppos) | 487 | size_t cnt, loff_t *ppos) |
374 | { | 488 | { |
375 | struct trace_parser parser; | 489 | struct trace_parser parser; |
490 | struct seq_file *m = file->private_data; | ||
491 | struct trace_array *tr = m->private; | ||
376 | ssize_t read, ret; | 492 | ssize_t read, ret; |
377 | 493 | ||
378 | if (!cnt) | 494 | if (!cnt) |
@@ -395,7 +511,7 @@ ftrace_event_write(struct file *file, const char __user *ubuf, | |||
395 | 511 | ||
396 | parser.buffer[parser.idx] = 0; | 512 | parser.buffer[parser.idx] = 0; |
397 | 513 | ||
398 | ret = ftrace_set_clr_event(parser.buffer + !set, set); | 514 | ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); |
399 | if (ret) | 515 | if (ret) |
400 | goto out_put; | 516 | goto out_put; |
401 | } | 517 | } |
@@ -411,17 +527,20 @@ ftrace_event_write(struct file *file, const char __user *ubuf, | |||
411 | static void * | 527 | static void * |
412 | t_next(struct seq_file *m, void *v, loff_t *pos) | 528 | t_next(struct seq_file *m, void *v, loff_t *pos) |
413 | { | 529 | { |
414 | struct ftrace_event_call *call = v; | 530 | struct ftrace_event_file *file = v; |
531 | struct ftrace_event_call *call; | ||
532 | struct trace_array *tr = m->private; | ||
415 | 533 | ||
416 | (*pos)++; | 534 | (*pos)++; |
417 | 535 | ||
418 | list_for_each_entry_continue(call, &ftrace_events, list) { | 536 | list_for_each_entry_continue(file, &tr->events, list) { |
537 | call = file->event_call; | ||
419 | /* | 538 | /* |
420 | * The ftrace subsystem is for showing formats only. | 539 | * The ftrace subsystem is for showing formats only. |
421 | * They can not be enabled or disabled via the event files. | 540 | * They can not be enabled or disabled via the event files. |
422 | */ | 541 | */ |
423 | if (call->class && call->class->reg) | 542 | if (call->class && call->class->reg) |
424 | return call; | 543 | return file; |
425 | } | 544 | } |
426 | 545 | ||
427 | return NULL; | 546 | return NULL; |
@@ -429,30 +548,32 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
429 | 548 | ||
430 | static void *t_start(struct seq_file *m, loff_t *pos) | 549 | static void *t_start(struct seq_file *m, loff_t *pos) |
431 | { | 550 | { |
432 | struct ftrace_event_call *call; | 551 | struct ftrace_event_file *file; |
552 | struct trace_array *tr = m->private; | ||
433 | loff_t l; | 553 | loff_t l; |
434 | 554 | ||
435 | mutex_lock(&event_mutex); | 555 | mutex_lock(&event_mutex); |
436 | 556 | ||
437 | call = list_entry(&ftrace_events, struct ftrace_event_call, list); | 557 | file = list_entry(&tr->events, struct ftrace_event_file, list); |
438 | for (l = 0; l <= *pos; ) { | 558 | for (l = 0; l <= *pos; ) { |
439 | call = t_next(m, call, &l); | 559 | file = t_next(m, file, &l); |
440 | if (!call) | 560 | if (!file) |
441 | break; | 561 | break; |
442 | } | 562 | } |
443 | return call; | 563 | return file; |
444 | } | 564 | } |
445 | 565 | ||
446 | static void * | 566 | static void * |
447 | s_next(struct seq_file *m, void *v, loff_t *pos) | 567 | s_next(struct seq_file *m, void *v, loff_t *pos) |
448 | { | 568 | { |
449 | struct ftrace_event_call *call = v; | 569 | struct ftrace_event_file *file = v; |
570 | struct trace_array *tr = m->private; | ||
450 | 571 | ||
451 | (*pos)++; | 572 | (*pos)++; |
452 | 573 | ||
453 | list_for_each_entry_continue(call, &ftrace_events, list) { | 574 | list_for_each_entry_continue(file, &tr->events, list) { |
454 | if (call->flags & TRACE_EVENT_FL_ENABLED) | 575 | if (file->flags & FTRACE_EVENT_FL_ENABLED) |
455 | return call; | 576 | return file; |
456 | } | 577 | } |
457 | 578 | ||
458 | return NULL; | 579 | return NULL; |
@@ -460,23 +581,25 @@ s_next(struct seq_file *m, void *v, loff_t *pos) | |||
460 | 581 | ||
461 | static void *s_start(struct seq_file *m, loff_t *pos) | 582 | static void *s_start(struct seq_file *m, loff_t *pos) |
462 | { | 583 | { |
463 | struct ftrace_event_call *call; | 584 | struct ftrace_event_file *file; |
585 | struct trace_array *tr = m->private; | ||
464 | loff_t l; | 586 | loff_t l; |
465 | 587 | ||
466 | mutex_lock(&event_mutex); | 588 | mutex_lock(&event_mutex); |
467 | 589 | ||
468 | call = list_entry(&ftrace_events, struct ftrace_event_call, list); | 590 | file = list_entry(&tr->events, struct ftrace_event_file, list); |
469 | for (l = 0; l <= *pos; ) { | 591 | for (l = 0; l <= *pos; ) { |
470 | call = s_next(m, call, &l); | 592 | file = s_next(m, file, &l); |
471 | if (!call) | 593 | if (!file) |
472 | break; | 594 | break; |
473 | } | 595 | } |
474 | return call; | 596 | return file; |
475 | } | 597 | } |
476 | 598 | ||
477 | static int t_show(struct seq_file *m, void *v) | 599 | static int t_show(struct seq_file *m, void *v) |
478 | { | 600 | { |
479 | struct ftrace_event_call *call = v; | 601 | struct ftrace_event_file *file = v; |
602 | struct ftrace_event_call *call = file->event_call; | ||
480 | 603 | ||
481 | if (strcmp(call->class->system, TRACE_SYSTEM) != 0) | 604 | if (strcmp(call->class->system, TRACE_SYSTEM) != 0) |
482 | seq_printf(m, "%s:", call->class->system); | 605 | seq_printf(m, "%s:", call->class->system); |
@@ -494,25 +617,31 @@ static ssize_t | |||
494 | event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | 617 | event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, |
495 | loff_t *ppos) | 618 | loff_t *ppos) |
496 | { | 619 | { |
497 | struct ftrace_event_call *call = filp->private_data; | 620 | struct ftrace_event_file *file = filp->private_data; |
498 | char *buf; | 621 | char *buf; |
499 | 622 | ||
500 | if (call->flags & TRACE_EVENT_FL_ENABLED) | 623 | if (file->flags & FTRACE_EVENT_FL_ENABLED) { |
501 | buf = "1\n"; | 624 | if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) |
502 | else | 625 | buf = "0*\n"; |
626 | else | ||
627 | buf = "1\n"; | ||
628 | } else | ||
503 | buf = "0\n"; | 629 | buf = "0\n"; |
504 | 630 | ||
505 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); | 631 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); |
506 | } | 632 | } |
507 | 633 | ||
508 | static ssize_t | 634 | static ssize_t |
509 | event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | 635 | event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, |
510 | loff_t *ppos) | 636 | loff_t *ppos) |
511 | { | 637 | { |
512 | struct ftrace_event_call *call = filp->private_data; | 638 | struct ftrace_event_file *file = filp->private_data; |
513 | unsigned long val; | 639 | unsigned long val; |
514 | int ret; | 640 | int ret; |
515 | 641 | ||
642 | if (!file) | ||
643 | return -EINVAL; | ||
644 | |||
516 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | 645 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); |
517 | if (ret) | 646 | if (ret) |
518 | return ret; | 647 | return ret; |
@@ -525,7 +654,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
525 | case 0: | 654 | case 0: |
526 | case 1: | 655 | case 1: |
527 | mutex_lock(&event_mutex); | 656 | mutex_lock(&event_mutex); |
528 | ret = ftrace_event_enable_disable(call, val); | 657 | ret = ftrace_event_enable_disable(file, val); |
529 | mutex_unlock(&event_mutex); | 658 | mutex_unlock(&event_mutex); |
530 | break; | 659 | break; |
531 | 660 | ||
@@ -543,14 +672,18 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
543 | loff_t *ppos) | 672 | loff_t *ppos) |
544 | { | 673 | { |
545 | const char set_to_char[4] = { '?', '0', '1', 'X' }; | 674 | const char set_to_char[4] = { '?', '0', '1', 'X' }; |
546 | struct event_subsystem *system = filp->private_data; | 675 | struct ftrace_subsystem_dir *dir = filp->private_data; |
676 | struct event_subsystem *system = dir->subsystem; | ||
547 | struct ftrace_event_call *call; | 677 | struct ftrace_event_call *call; |
678 | struct ftrace_event_file *file; | ||
679 | struct trace_array *tr = dir->tr; | ||
548 | char buf[2]; | 680 | char buf[2]; |
549 | int set = 0; | 681 | int set = 0; |
550 | int ret; | 682 | int ret; |
551 | 683 | ||
552 | mutex_lock(&event_mutex); | 684 | mutex_lock(&event_mutex); |
553 | list_for_each_entry(call, &ftrace_events, list) { | 685 | list_for_each_entry(file, &tr->events, list) { |
686 | call = file->event_call; | ||
554 | if (!call->name || !call->class || !call->class->reg) | 687 | if (!call->name || !call->class || !call->class->reg) |
555 | continue; | 688 | continue; |
556 | 689 | ||
@@ -562,7 +695,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
562 | * or if all events or cleared, or if we have | 695 | * or if all events or cleared, or if we have |
563 | * a mixture. | 696 | * a mixture. |
564 | */ | 697 | */ |
565 | set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED)); | 698 | set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED)); |
566 | 699 | ||
567 | /* | 700 | /* |
568 | * If we have a mixture, no need to look further. | 701 | * If we have a mixture, no need to look further. |
@@ -584,7 +717,8 @@ static ssize_t | |||
584 | system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | 717 | system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, |
585 | loff_t *ppos) | 718 | loff_t *ppos) |
586 | { | 719 | { |
587 | struct event_subsystem *system = filp->private_data; | 720 | struct ftrace_subsystem_dir *dir = filp->private_data; |
721 | struct event_subsystem *system = dir->subsystem; | ||
588 | const char *name = NULL; | 722 | const char *name = NULL; |
589 | unsigned long val; | 723 | unsigned long val; |
590 | ssize_t ret; | 724 | ssize_t ret; |
@@ -607,7 +741,7 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
607 | if (system) | 741 | if (system) |
608 | name = system->name; | 742 | name = system->name; |
609 | 743 | ||
610 | ret = __ftrace_set_clr_event(NULL, name, NULL, val); | 744 | ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val); |
611 | if (ret) | 745 | if (ret) |
612 | goto out; | 746 | goto out; |
613 | 747 | ||
@@ -845,43 +979,75 @@ static LIST_HEAD(event_subsystems); | |||
845 | static int subsystem_open(struct inode *inode, struct file *filp) | 979 | static int subsystem_open(struct inode *inode, struct file *filp) |
846 | { | 980 | { |
847 | struct event_subsystem *system = NULL; | 981 | struct event_subsystem *system = NULL; |
982 | struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */ | ||
983 | struct trace_array *tr; | ||
848 | int ret; | 984 | int ret; |
849 | 985 | ||
850 | if (!inode->i_private) | ||
851 | goto skip_search; | ||
852 | |||
853 | /* Make sure the system still exists */ | 986 | /* Make sure the system still exists */ |
854 | mutex_lock(&event_mutex); | 987 | mutex_lock(&event_mutex); |
855 | list_for_each_entry(system, &event_subsystems, list) { | 988 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { |
856 | if (system == inode->i_private) { | 989 | list_for_each_entry(dir, &tr->systems, list) { |
857 | /* Don't open systems with no events */ | 990 | if (dir == inode->i_private) { |
858 | if (!system->nr_events) { | 991 | /* Don't open systems with no events */ |
859 | system = NULL; | 992 | if (dir->nr_events) { |
860 | break; | 993 | __get_system_dir(dir); |
994 | system = dir->subsystem; | ||
995 | } | ||
996 | goto exit_loop; | ||
861 | } | 997 | } |
862 | __get_system(system); | ||
863 | break; | ||
864 | } | 998 | } |
865 | } | 999 | } |
1000 | exit_loop: | ||
866 | mutex_unlock(&event_mutex); | 1001 | mutex_unlock(&event_mutex); |
867 | 1002 | ||
868 | if (system != inode->i_private) | 1003 | if (!system) |
869 | return -ENODEV; | 1004 | return -ENODEV; |
870 | 1005 | ||
871 | skip_search: | 1006 | /* Some versions of gcc think dir can be uninitialized here */ |
1007 | WARN_ON(!dir); | ||
1008 | |||
872 | ret = tracing_open_generic(inode, filp); | 1009 | ret = tracing_open_generic(inode, filp); |
873 | if (ret < 0 && system) | 1010 | if (ret < 0) |
874 | put_system(system); | 1011 | put_system(dir); |
1012 | |||
1013 | return ret; | ||
1014 | } | ||
1015 | |||
1016 | static int system_tr_open(struct inode *inode, struct file *filp) | ||
1017 | { | ||
1018 | struct ftrace_subsystem_dir *dir; | ||
1019 | struct trace_array *tr = inode->i_private; | ||
1020 | int ret; | ||
1021 | |||
1022 | /* Make a temporary dir that has no system but points to tr */ | ||
1023 | dir = kzalloc(sizeof(*dir), GFP_KERNEL); | ||
1024 | if (!dir) | ||
1025 | return -ENOMEM; | ||
1026 | |||
1027 | dir->tr = tr; | ||
1028 | |||
1029 | ret = tracing_open_generic(inode, filp); | ||
1030 | if (ret < 0) | ||
1031 | kfree(dir); | ||
1032 | |||
1033 | filp->private_data = dir; | ||
875 | 1034 | ||
876 | return ret; | 1035 | return ret; |
877 | } | 1036 | } |
878 | 1037 | ||
879 | static int subsystem_release(struct inode *inode, struct file *file) | 1038 | static int subsystem_release(struct inode *inode, struct file *file) |
880 | { | 1039 | { |
881 | struct event_subsystem *system = inode->i_private; | 1040 | struct ftrace_subsystem_dir *dir = file->private_data; |
882 | 1041 | ||
883 | if (system) | 1042 | /* |
884 | put_system(system); | 1043 | * If dir->subsystem is NULL, then this is a temporary |
1044 | * descriptor that was made for a trace_array to enable | ||
1045 | * all subsystems. | ||
1046 | */ | ||
1047 | if (dir->subsystem) | ||
1048 | put_system(dir); | ||
1049 | else | ||
1050 | kfree(dir); | ||
885 | 1051 | ||
886 | return 0; | 1052 | return 0; |
887 | } | 1053 | } |
@@ -890,7 +1056,8 @@ static ssize_t | |||
890 | subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | 1056 | subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, |
891 | loff_t *ppos) | 1057 | loff_t *ppos) |
892 | { | 1058 | { |
893 | struct event_subsystem *system = filp->private_data; | 1059 | struct ftrace_subsystem_dir *dir = filp->private_data; |
1060 | struct event_subsystem *system = dir->subsystem; | ||
894 | struct trace_seq *s; | 1061 | struct trace_seq *s; |
895 | int r; | 1062 | int r; |
896 | 1063 | ||
@@ -915,7 +1082,7 @@ static ssize_t | |||
915 | subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | 1082 | subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, |
916 | loff_t *ppos) | 1083 | loff_t *ppos) |
917 | { | 1084 | { |
918 | struct event_subsystem *system = filp->private_data; | 1085 | struct ftrace_subsystem_dir *dir = filp->private_data; |
919 | char *buf; | 1086 | char *buf; |
920 | int err; | 1087 | int err; |
921 | 1088 | ||
@@ -932,7 +1099,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
932 | } | 1099 | } |
933 | buf[cnt] = '\0'; | 1100 | buf[cnt] = '\0'; |
934 | 1101 | ||
935 | err = apply_subsystem_event_filter(system, buf); | 1102 | err = apply_subsystem_event_filter(dir, buf); |
936 | free_page((unsigned long) buf); | 1103 | free_page((unsigned long) buf); |
937 | if (err < 0) | 1104 | if (err < 0) |
938 | return err; | 1105 | return err; |
@@ -1041,30 +1208,35 @@ static const struct file_operations ftrace_system_enable_fops = { | |||
1041 | .release = subsystem_release, | 1208 | .release = subsystem_release, |
1042 | }; | 1209 | }; |
1043 | 1210 | ||
1211 | static const struct file_operations ftrace_tr_enable_fops = { | ||
1212 | .open = system_tr_open, | ||
1213 | .read = system_enable_read, | ||
1214 | .write = system_enable_write, | ||
1215 | .llseek = default_llseek, | ||
1216 | .release = subsystem_release, | ||
1217 | }; | ||
1218 | |||
1044 | static const struct file_operations ftrace_show_header_fops = { | 1219 | static const struct file_operations ftrace_show_header_fops = { |
1045 | .open = tracing_open_generic, | 1220 | .open = tracing_open_generic, |
1046 | .read = show_header, | 1221 | .read = show_header, |
1047 | .llseek = default_llseek, | 1222 | .llseek = default_llseek, |
1048 | }; | 1223 | }; |
1049 | 1224 | ||
1050 | static struct dentry *event_trace_events_dir(void) | 1225 | static int |
1226 | ftrace_event_open(struct inode *inode, struct file *file, | ||
1227 | const struct seq_operations *seq_ops) | ||
1051 | { | 1228 | { |
1052 | static struct dentry *d_tracer; | 1229 | struct seq_file *m; |
1053 | static struct dentry *d_events; | 1230 | int ret; |
1054 | |||
1055 | if (d_events) | ||
1056 | return d_events; | ||
1057 | |||
1058 | d_tracer = tracing_init_dentry(); | ||
1059 | if (!d_tracer) | ||
1060 | return NULL; | ||
1061 | 1231 | ||
1062 | d_events = debugfs_create_dir("events", d_tracer); | 1232 | ret = seq_open(file, seq_ops); |
1063 | if (!d_events) | 1233 | if (ret < 0) |
1064 | pr_warning("Could not create debugfs " | 1234 | return ret; |
1065 | "'events' directory\n"); | 1235 | m = file->private_data; |
1236 | /* copy tr over to seq ops */ | ||
1237 | m->private = inode->i_private; | ||
1066 | 1238 | ||
1067 | return d_events; | 1239 | return ret; |
1068 | } | 1240 | } |
1069 | 1241 | ||
1070 | static int | 1242 | static int |
@@ -1072,117 +1244,165 @@ ftrace_event_avail_open(struct inode *inode, struct file *file) | |||
1072 | { | 1244 | { |
1073 | const struct seq_operations *seq_ops = &show_event_seq_ops; | 1245 | const struct seq_operations *seq_ops = &show_event_seq_ops; |
1074 | 1246 | ||
1075 | return seq_open(file, seq_ops); | 1247 | return ftrace_event_open(inode, file, seq_ops); |
1076 | } | 1248 | } |
1077 | 1249 | ||
1078 | static int | 1250 | static int |
1079 | ftrace_event_set_open(struct inode *inode, struct file *file) | 1251 | ftrace_event_set_open(struct inode *inode, struct file *file) |
1080 | { | 1252 | { |
1081 | const struct seq_operations *seq_ops = &show_set_event_seq_ops; | 1253 | const struct seq_operations *seq_ops = &show_set_event_seq_ops; |
1254 | struct trace_array *tr = inode->i_private; | ||
1082 | 1255 | ||
1083 | if ((file->f_mode & FMODE_WRITE) && | 1256 | if ((file->f_mode & FMODE_WRITE) && |
1084 | (file->f_flags & O_TRUNC)) | 1257 | (file->f_flags & O_TRUNC)) |
1085 | ftrace_clear_events(); | 1258 | ftrace_clear_events(tr); |
1086 | 1259 | ||
1087 | return seq_open(file, seq_ops); | 1260 | return ftrace_event_open(inode, file, seq_ops); |
1261 | } | ||
1262 | |||
1263 | static struct event_subsystem * | ||
1264 | create_new_subsystem(const char *name) | ||
1265 | { | ||
1266 | struct event_subsystem *system; | ||
1267 | |||
1268 | /* need to create new entry */ | ||
1269 | system = kmalloc(sizeof(*system), GFP_KERNEL); | ||
1270 | if (!system) | ||
1271 | return NULL; | ||
1272 | |||
1273 | system->ref_count = 1; | ||
1274 | system->name = name; | ||
1275 | |||
1276 | system->filter = NULL; | ||
1277 | |||
1278 | system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); | ||
1279 | if (!system->filter) | ||
1280 | goto out_free; | ||
1281 | |||
1282 | list_add(&system->list, &event_subsystems); | ||
1283 | |||
1284 | return system; | ||
1285 | |||
1286 | out_free: | ||
1287 | kfree(system); | ||
1288 | return NULL; | ||
1088 | } | 1289 | } |
1089 | 1290 | ||
1090 | static struct dentry * | 1291 | static struct dentry * |
1091 | event_subsystem_dir(const char *name, struct dentry *d_events) | 1292 | event_subsystem_dir(struct trace_array *tr, const char *name, |
1293 | struct ftrace_event_file *file, struct dentry *parent) | ||
1092 | { | 1294 | { |
1295 | struct ftrace_subsystem_dir *dir; | ||
1093 | struct event_subsystem *system; | 1296 | struct event_subsystem *system; |
1094 | struct dentry *entry; | 1297 | struct dentry *entry; |
1095 | 1298 | ||
1096 | /* First see if we did not already create this dir */ | 1299 | /* First see if we did not already create this dir */ |
1097 | list_for_each_entry(system, &event_subsystems, list) { | 1300 | list_for_each_entry(dir, &tr->systems, list) { |
1301 | system = dir->subsystem; | ||
1098 | if (strcmp(system->name, name) == 0) { | 1302 | if (strcmp(system->name, name) == 0) { |
1099 | system->nr_events++; | 1303 | dir->nr_events++; |
1100 | return system->entry; | 1304 | file->system = dir; |
1305 | return dir->entry; | ||
1101 | } | 1306 | } |
1102 | } | 1307 | } |
1103 | 1308 | ||
1104 | /* need to create new entry */ | 1309 | /* Now see if the system itself exists. */ |
1105 | system = kmalloc(sizeof(*system), GFP_KERNEL); | 1310 | list_for_each_entry(system, &event_subsystems, list) { |
1106 | if (!system) { | 1311 | if (strcmp(system->name, name) == 0) |
1107 | pr_warning("No memory to create event subsystem %s\n", | 1312 | break; |
1108 | name); | ||
1109 | return d_events; | ||
1110 | } | 1313 | } |
1314 | /* Reset system variable when not found */ | ||
1315 | if (&system->list == &event_subsystems) | ||
1316 | system = NULL; | ||
1111 | 1317 | ||
1112 | system->entry = debugfs_create_dir(name, d_events); | 1318 | dir = kmalloc(sizeof(*dir), GFP_KERNEL); |
1113 | if (!system->entry) { | 1319 | if (!dir) |
1114 | pr_warning("Could not create event subsystem %s\n", | 1320 | goto out_fail; |
1115 | name); | ||
1116 | kfree(system); | ||
1117 | return d_events; | ||
1118 | } | ||
1119 | 1321 | ||
1120 | system->nr_events = 1; | 1322 | if (!system) { |
1121 | system->ref_count = 1; | 1323 | system = create_new_subsystem(name); |
1122 | system->name = kstrdup(name, GFP_KERNEL); | 1324 | if (!system) |
1123 | if (!system->name) { | 1325 | goto out_free; |
1124 | debugfs_remove(system->entry); | 1326 | } else |
1125 | kfree(system); | 1327 | __get_system(system); |
1126 | return d_events; | 1328 | |
1329 | dir->entry = debugfs_create_dir(name, parent); | ||
1330 | if (!dir->entry) { | ||
1331 | pr_warning("Failed to create system directory %s\n", name); | ||
1332 | __put_system(system); | ||
1333 | goto out_free; | ||
1127 | } | 1334 | } |
1128 | 1335 | ||
1129 | list_add(&system->list, &event_subsystems); | 1336 | dir->tr = tr; |
1130 | 1337 | dir->ref_count = 1; | |
1131 | system->filter = NULL; | 1338 | dir->nr_events = 1; |
1132 | 1339 | dir->subsystem = system; | |
1133 | system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); | 1340 | file->system = dir; |
1134 | if (!system->filter) { | ||
1135 | pr_warning("Could not allocate filter for subsystem " | ||
1136 | "'%s'\n", name); | ||
1137 | return system->entry; | ||
1138 | } | ||
1139 | 1341 | ||
1140 | entry = debugfs_create_file("filter", 0644, system->entry, system, | 1342 | entry = debugfs_create_file("filter", 0644, dir->entry, dir, |
1141 | &ftrace_subsystem_filter_fops); | 1343 | &ftrace_subsystem_filter_fops); |
1142 | if (!entry) { | 1344 | if (!entry) { |
1143 | kfree(system->filter); | 1345 | kfree(system->filter); |
1144 | system->filter = NULL; | 1346 | system->filter = NULL; |
1145 | pr_warning("Could not create debugfs " | 1347 | pr_warning("Could not create debugfs '%s/filter' entry\n", name); |
1146 | "'%s/filter' entry\n", name); | ||
1147 | } | 1348 | } |
1148 | 1349 | ||
1149 | trace_create_file("enable", 0644, system->entry, system, | 1350 | trace_create_file("enable", 0644, dir->entry, dir, |
1150 | &ftrace_system_enable_fops); | 1351 | &ftrace_system_enable_fops); |
1151 | 1352 | ||
1152 | return system->entry; | 1353 | list_add(&dir->list, &tr->systems); |
1354 | |||
1355 | return dir->entry; | ||
1356 | |||
1357 | out_free: | ||
1358 | kfree(dir); | ||
1359 | out_fail: | ||
1360 | /* Only print this message if failed on memory allocation */ | ||
1361 | if (!dir || !system) | ||
1362 | pr_warning("No memory to create event subsystem %s\n", | ||
1363 | name); | ||
1364 | return NULL; | ||
1153 | } | 1365 | } |
1154 | 1366 | ||
1155 | static int | 1367 | static int |
1156 | event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, | 1368 | event_create_dir(struct dentry *parent, |
1369 | struct ftrace_event_file *file, | ||
1157 | const struct file_operations *id, | 1370 | const struct file_operations *id, |
1158 | const struct file_operations *enable, | 1371 | const struct file_operations *enable, |
1159 | const struct file_operations *filter, | 1372 | const struct file_operations *filter, |
1160 | const struct file_operations *format) | 1373 | const struct file_operations *format) |
1161 | { | 1374 | { |
1375 | struct ftrace_event_call *call = file->event_call; | ||
1376 | struct trace_array *tr = file->tr; | ||
1162 | struct list_head *head; | 1377 | struct list_head *head; |
1378 | struct dentry *d_events; | ||
1163 | int ret; | 1379 | int ret; |
1164 | 1380 | ||
1165 | /* | 1381 | /* |
1166 | * If the trace point header did not define TRACE_SYSTEM | 1382 | * If the trace point header did not define TRACE_SYSTEM |
1167 | * then the system would be called "TRACE_SYSTEM". | 1383 | * then the system would be called "TRACE_SYSTEM". |
1168 | */ | 1384 | */ |
1169 | if (strcmp(call->class->system, TRACE_SYSTEM) != 0) | 1385 | if (strcmp(call->class->system, TRACE_SYSTEM) != 0) { |
1170 | d_events = event_subsystem_dir(call->class->system, d_events); | 1386 | d_events = event_subsystem_dir(tr, call->class->system, file, parent); |
1171 | 1387 | if (!d_events) | |
1172 | call->dir = debugfs_create_dir(call->name, d_events); | 1388 | return -ENOMEM; |
1173 | if (!call->dir) { | 1389 | } else |
1174 | pr_warning("Could not create debugfs " | 1390 | d_events = parent; |
1175 | "'%s' directory\n", call->name); | 1391 | |
1392 | file->dir = debugfs_create_dir(call->name, d_events); | ||
1393 | if (!file->dir) { | ||
1394 | pr_warning("Could not create debugfs '%s' directory\n", | ||
1395 | call->name); | ||
1176 | return -1; | 1396 | return -1; |
1177 | } | 1397 | } |
1178 | 1398 | ||
1179 | if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) | 1399 | if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) |
1180 | trace_create_file("enable", 0644, call->dir, call, | 1400 | trace_create_file("enable", 0644, file->dir, file, |
1181 | enable); | 1401 | enable); |
1182 | 1402 | ||
1183 | #ifdef CONFIG_PERF_EVENTS | 1403 | #ifdef CONFIG_PERF_EVENTS |
1184 | if (call->event.type && call->class->reg) | 1404 | if (call->event.type && call->class->reg) |
1185 | trace_create_file("id", 0444, call->dir, call, | 1405 | trace_create_file("id", 0444, file->dir, call, |
1186 | id); | 1406 | id); |
1187 | #endif | 1407 | #endif |
1188 | 1408 | ||
@@ -1196,23 +1416,76 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, | |||
1196 | if (ret < 0) { | 1416 | if (ret < 0) { |
1197 | pr_warning("Could not initialize trace point" | 1417 | pr_warning("Could not initialize trace point" |
1198 | " events/%s\n", call->name); | 1418 | " events/%s\n", call->name); |
1199 | return ret; | 1419 | return -1; |
1200 | } | 1420 | } |
1201 | } | 1421 | } |
1202 | trace_create_file("filter", 0644, call->dir, call, | 1422 | trace_create_file("filter", 0644, file->dir, call, |
1203 | filter); | 1423 | filter); |
1204 | 1424 | ||
1205 | trace_create_file("format", 0444, call->dir, call, | 1425 | trace_create_file("format", 0444, file->dir, call, |
1206 | format); | 1426 | format); |
1207 | 1427 | ||
1208 | return 0; | 1428 | return 0; |
1209 | } | 1429 | } |
1210 | 1430 | ||
1431 | static void remove_subsystem(struct ftrace_subsystem_dir *dir) | ||
1432 | { | ||
1433 | if (!dir) | ||
1434 | return; | ||
1435 | |||
1436 | if (!--dir->nr_events) { | ||
1437 | debugfs_remove_recursive(dir->entry); | ||
1438 | list_del(&dir->list); | ||
1439 | __put_system_dir(dir); | ||
1440 | } | ||
1441 | } | ||
1442 | |||
1443 | static void remove_event_from_tracers(struct ftrace_event_call *call) | ||
1444 | { | ||
1445 | struct ftrace_event_file *file; | ||
1446 | struct trace_array *tr; | ||
1447 | |||
1448 | do_for_each_event_file_safe(tr, file) { | ||
1449 | |||
1450 | if (file->event_call != call) | ||
1451 | continue; | ||
1452 | |||
1453 | list_del(&file->list); | ||
1454 | debugfs_remove_recursive(file->dir); | ||
1455 | remove_subsystem(file->system); | ||
1456 | kmem_cache_free(file_cachep, file); | ||
1457 | |||
1458 | /* | ||
1459 | * The do_for_each_event_file_safe() is | ||
1460 | * a double loop. After finding the call for this | ||
1461 | * trace_array, we use break to jump to the next | ||
1462 | * trace_array. | ||
1463 | */ | ||
1464 | break; | ||
1465 | } while_for_each_event_file(); | ||
1466 | } | ||
1467 | |||
1211 | static void event_remove(struct ftrace_event_call *call) | 1468 | static void event_remove(struct ftrace_event_call *call) |
1212 | { | 1469 | { |
1213 | ftrace_event_enable_disable(call, 0); | 1470 | struct trace_array *tr; |
1471 | struct ftrace_event_file *file; | ||
1472 | |||
1473 | do_for_each_event_file(tr, file) { | ||
1474 | if (file->event_call != call) | ||
1475 | continue; | ||
1476 | ftrace_event_enable_disable(file, 0); | ||
1477 | /* | ||
1478 | * The do_for_each_event_file() is | ||
1479 | * a double loop. After finding the call for this | ||
1480 | * trace_array, we use break to jump to the next | ||
1481 | * trace_array. | ||
1482 | */ | ||
1483 | break; | ||
1484 | } while_for_each_event_file(); | ||
1485 | |||
1214 | if (call->event.funcs) | 1486 | if (call->event.funcs) |
1215 | __unregister_ftrace_event(&call->event); | 1487 | __unregister_ftrace_event(&call->event); |
1488 | remove_event_from_tracers(call); | ||
1216 | list_del(&call->list); | 1489 | list_del(&call->list); |
1217 | } | 1490 | } |
1218 | 1491 | ||
@@ -1234,82 +1507,99 @@ static int event_init(struct ftrace_event_call *call) | |||
1234 | } | 1507 | } |
1235 | 1508 | ||
1236 | static int | 1509 | static int |
1237 | __trace_add_event_call(struct ftrace_event_call *call, struct module *mod, | 1510 | __register_event(struct ftrace_event_call *call, struct module *mod) |
1238 | const struct file_operations *id, | ||
1239 | const struct file_operations *enable, | ||
1240 | const struct file_operations *filter, | ||
1241 | const struct file_operations *format) | ||
1242 | { | 1511 | { |
1243 | struct dentry *d_events; | ||
1244 | int ret; | 1512 | int ret; |
1245 | 1513 | ||
1246 | ret = event_init(call); | 1514 | ret = event_init(call); |
1247 | if (ret < 0) | 1515 | if (ret < 0) |
1248 | return ret; | 1516 | return ret; |
1249 | 1517 | ||
1250 | d_events = event_trace_events_dir(); | 1518 | list_add(&call->list, &ftrace_events); |
1251 | if (!d_events) | ||
1252 | return -ENOENT; | ||
1253 | |||
1254 | ret = event_create_dir(call, d_events, id, enable, filter, format); | ||
1255 | if (!ret) | ||
1256 | list_add(&call->list, &ftrace_events); | ||
1257 | call->mod = mod; | 1519 | call->mod = mod; |
1258 | 1520 | ||
1259 | return ret; | 1521 | return 0; |
1522 | } | ||
1523 | |||
1524 | /* Add an event to a trace directory */ | ||
1525 | static int | ||
1526 | __trace_add_new_event(struct ftrace_event_call *call, | ||
1527 | struct trace_array *tr, | ||
1528 | const struct file_operations *id, | ||
1529 | const struct file_operations *enable, | ||
1530 | const struct file_operations *filter, | ||
1531 | const struct file_operations *format) | ||
1532 | { | ||
1533 | struct ftrace_event_file *file; | ||
1534 | |||
1535 | file = kmem_cache_alloc(file_cachep, GFP_TRACE); | ||
1536 | if (!file) | ||
1537 | return -ENOMEM; | ||
1538 | |||
1539 | file->event_call = call; | ||
1540 | file->tr = tr; | ||
1541 | list_add(&file->list, &tr->events); | ||
1542 | |||
1543 | return event_create_dir(tr->event_dir, file, id, enable, filter, format); | ||
1260 | } | 1544 | } |
1261 | 1545 | ||
1546 | /* | ||
1547 | * Just create a decriptor for early init. A descriptor is required | ||
1548 | * for enabling events at boot. We want to enable events before | ||
1549 | * the filesystem is initialized. | ||
1550 | */ | ||
1551 | static __init int | ||
1552 | __trace_early_add_new_event(struct ftrace_event_call *call, | ||
1553 | struct trace_array *tr) | ||
1554 | { | ||
1555 | struct ftrace_event_file *file; | ||
1556 | |||
1557 | file = kmem_cache_alloc(file_cachep, GFP_TRACE); | ||
1558 | if (!file) | ||
1559 | return -ENOMEM; | ||
1560 | |||
1561 | file->event_call = call; | ||
1562 | file->tr = tr; | ||
1563 | list_add(&file->list, &tr->events); | ||
1564 | |||
1565 | return 0; | ||
1566 | } | ||
1567 | |||
1568 | struct ftrace_module_file_ops; | ||
1569 | static void __add_event_to_tracers(struct ftrace_event_call *call, | ||
1570 | struct ftrace_module_file_ops *file_ops); | ||
1571 | |||
1262 | /* Add an additional event_call dynamically */ | 1572 | /* Add an additional event_call dynamically */ |
1263 | int trace_add_event_call(struct ftrace_event_call *call) | 1573 | int trace_add_event_call(struct ftrace_event_call *call) |
1264 | { | 1574 | { |
1265 | int ret; | 1575 | int ret; |
1266 | mutex_lock(&event_mutex); | 1576 | mutex_lock(&event_mutex); |
1267 | ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops, | ||
1268 | &ftrace_enable_fops, | ||
1269 | &ftrace_event_filter_fops, | ||
1270 | &ftrace_event_format_fops); | ||
1271 | mutex_unlock(&event_mutex); | ||
1272 | return ret; | ||
1273 | } | ||
1274 | 1577 | ||
1275 | static void remove_subsystem_dir(const char *name) | 1578 | ret = __register_event(call, NULL); |
1276 | { | 1579 | if (ret >= 0) |
1277 | struct event_subsystem *system; | 1580 | __add_event_to_tracers(call, NULL); |
1278 | |||
1279 | if (strcmp(name, TRACE_SYSTEM) == 0) | ||
1280 | return; | ||
1281 | 1581 | ||
1282 | list_for_each_entry(system, &event_subsystems, list) { | 1582 | mutex_unlock(&event_mutex); |
1283 | if (strcmp(system->name, name) == 0) { | 1583 | return ret; |
1284 | if (!--system->nr_events) { | ||
1285 | debugfs_remove_recursive(system->entry); | ||
1286 | list_del(&system->list); | ||
1287 | __put_system(system); | ||
1288 | } | ||
1289 | break; | ||
1290 | } | ||
1291 | } | ||
1292 | } | 1584 | } |
1293 | 1585 | ||
1294 | /* | 1586 | /* |
1295 | * Must be called under locking both of event_mutex and trace_event_mutex. | 1587 | * Must be called under locking both of event_mutex and trace_event_sem. |
1296 | */ | 1588 | */ |
1297 | static void __trace_remove_event_call(struct ftrace_event_call *call) | 1589 | static void __trace_remove_event_call(struct ftrace_event_call *call) |
1298 | { | 1590 | { |
1299 | event_remove(call); | 1591 | event_remove(call); |
1300 | trace_destroy_fields(call); | 1592 | trace_destroy_fields(call); |
1301 | destroy_preds(call); | 1593 | destroy_preds(call); |
1302 | debugfs_remove_recursive(call->dir); | ||
1303 | remove_subsystem_dir(call->class->system); | ||
1304 | } | 1594 | } |
1305 | 1595 | ||
1306 | /* Remove an event_call */ | 1596 | /* Remove an event_call */ |
1307 | void trace_remove_event_call(struct ftrace_event_call *call) | 1597 | void trace_remove_event_call(struct ftrace_event_call *call) |
1308 | { | 1598 | { |
1309 | mutex_lock(&event_mutex); | 1599 | mutex_lock(&event_mutex); |
1310 | down_write(&trace_event_mutex); | 1600 | down_write(&trace_event_sem); |
1311 | __trace_remove_event_call(call); | 1601 | __trace_remove_event_call(call); |
1312 | up_write(&trace_event_mutex); | 1602 | up_write(&trace_event_sem); |
1313 | mutex_unlock(&event_mutex); | 1603 | mutex_unlock(&event_mutex); |
1314 | } | 1604 | } |
1315 | 1605 | ||
@@ -1336,6 +1626,26 @@ struct ftrace_module_file_ops { | |||
1336 | }; | 1626 | }; |
1337 | 1627 | ||
1338 | static struct ftrace_module_file_ops * | 1628 | static struct ftrace_module_file_ops * |
1629 | find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod) | ||
1630 | { | ||
1631 | /* | ||
1632 | * As event_calls are added in groups by module, | ||
1633 | * when we find one file_ops, we don't need to search for | ||
1634 | * each call in that module, as the rest should be the | ||
1635 | * same. Only search for a new one if the last one did | ||
1636 | * not match. | ||
1637 | */ | ||
1638 | if (file_ops && mod == file_ops->mod) | ||
1639 | return file_ops; | ||
1640 | |||
1641 | list_for_each_entry(file_ops, &ftrace_module_file_list, list) { | ||
1642 | if (file_ops->mod == mod) | ||
1643 | return file_ops; | ||
1644 | } | ||
1645 | return NULL; | ||
1646 | } | ||
1647 | |||
1648 | static struct ftrace_module_file_ops * | ||
1339 | trace_create_file_ops(struct module *mod) | 1649 | trace_create_file_ops(struct module *mod) |
1340 | { | 1650 | { |
1341 | struct ftrace_module_file_ops *file_ops; | 1651 | struct ftrace_module_file_ops *file_ops; |
@@ -1386,9 +1696,8 @@ static void trace_module_add_events(struct module *mod) | |||
1386 | return; | 1696 | return; |
1387 | 1697 | ||
1388 | for_each_event(call, start, end) { | 1698 | for_each_event(call, start, end) { |
1389 | __trace_add_event_call(*call, mod, | 1699 | __register_event(*call, mod); |
1390 | &file_ops->id, &file_ops->enable, | 1700 | __add_event_to_tracers(*call, file_ops); |
1391 | &file_ops->filter, &file_ops->format); | ||
1392 | } | 1701 | } |
1393 | } | 1702 | } |
1394 | 1703 | ||
@@ -1396,12 +1705,13 @@ static void trace_module_remove_events(struct module *mod) | |||
1396 | { | 1705 | { |
1397 | struct ftrace_module_file_ops *file_ops; | 1706 | struct ftrace_module_file_ops *file_ops; |
1398 | struct ftrace_event_call *call, *p; | 1707 | struct ftrace_event_call *call, *p; |
1399 | bool found = false; | 1708 | bool clear_trace = false; |
1400 | 1709 | ||
1401 | down_write(&trace_event_mutex); | 1710 | down_write(&trace_event_sem); |
1402 | list_for_each_entry_safe(call, p, &ftrace_events, list) { | 1711 | list_for_each_entry_safe(call, p, &ftrace_events, list) { |
1403 | if (call->mod == mod) { | 1712 | if (call->mod == mod) { |
1404 | found = true; | 1713 | if (call->flags & TRACE_EVENT_FL_WAS_ENABLED) |
1714 | clear_trace = true; | ||
1405 | __trace_remove_event_call(call); | 1715 | __trace_remove_event_call(call); |
1406 | } | 1716 | } |
1407 | } | 1717 | } |
@@ -1415,14 +1725,18 @@ static void trace_module_remove_events(struct module *mod) | |||
1415 | list_del(&file_ops->list); | 1725 | list_del(&file_ops->list); |
1416 | kfree(file_ops); | 1726 | kfree(file_ops); |
1417 | } | 1727 | } |
1728 | up_write(&trace_event_sem); | ||
1418 | 1729 | ||
1419 | /* | 1730 | /* |
1420 | * It is safest to reset the ring buffer if the module being unloaded | 1731 | * It is safest to reset the ring buffer if the module being unloaded |
1421 | * registered any events. | 1732 | * registered any events that were used. The only worry is if |
1733 | * a new module gets loaded, and takes on the same id as the events | ||
1734 | * of this module. When printing out the buffer, traced events left | ||
1735 | * over from this module may be passed to the new module events and | ||
1736 | * unexpected results may occur. | ||
1422 | */ | 1737 | */ |
1423 | if (found) | 1738 | if (clear_trace) |
1424 | tracing_reset_current_online_cpus(); | 1739 | tracing_reset_all_online_cpus(); |
1425 | up_write(&trace_event_mutex); | ||
1426 | } | 1740 | } |
1427 | 1741 | ||
1428 | static int trace_module_notify(struct notifier_block *self, | 1742 | static int trace_module_notify(struct notifier_block *self, |
@@ -1443,14 +1757,433 @@ static int trace_module_notify(struct notifier_block *self, | |||
1443 | 1757 | ||
1444 | return 0; | 1758 | return 0; |
1445 | } | 1759 | } |
1760 | |||
1761 | static int | ||
1762 | __trace_add_new_mod_event(struct ftrace_event_call *call, | ||
1763 | struct trace_array *tr, | ||
1764 | struct ftrace_module_file_ops *file_ops) | ||
1765 | { | ||
1766 | return __trace_add_new_event(call, tr, | ||
1767 | &file_ops->id, &file_ops->enable, | ||
1768 | &file_ops->filter, &file_ops->format); | ||
1769 | } | ||
1770 | |||
1446 | #else | 1771 | #else |
1447 | static int trace_module_notify(struct notifier_block *self, | 1772 | static inline struct ftrace_module_file_ops * |
1448 | unsigned long val, void *data) | 1773 | find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod) |
1774 | { | ||
1775 | return NULL; | ||
1776 | } | ||
1777 | static inline int trace_module_notify(struct notifier_block *self, | ||
1778 | unsigned long val, void *data) | ||
1449 | { | 1779 | { |
1450 | return 0; | 1780 | return 0; |
1451 | } | 1781 | } |
1782 | static inline int | ||
1783 | __trace_add_new_mod_event(struct ftrace_event_call *call, | ||
1784 | struct trace_array *tr, | ||
1785 | struct ftrace_module_file_ops *file_ops) | ||
1786 | { | ||
1787 | return -ENODEV; | ||
1788 | } | ||
1452 | #endif /* CONFIG_MODULES */ | 1789 | #endif /* CONFIG_MODULES */ |
1453 | 1790 | ||
1791 | /* Create a new event directory structure for a trace directory. */ | ||
1792 | static void | ||
1793 | __trace_add_event_dirs(struct trace_array *tr) | ||
1794 | { | ||
1795 | struct ftrace_module_file_ops *file_ops = NULL; | ||
1796 | struct ftrace_event_call *call; | ||
1797 | int ret; | ||
1798 | |||
1799 | list_for_each_entry(call, &ftrace_events, list) { | ||
1800 | if (call->mod) { | ||
1801 | /* | ||
1802 | * Directories for events by modules need to | ||
1803 | * keep module ref counts when opened (as we don't | ||
1804 | * want the module to disappear when reading one | ||
1805 | * of these files). The file_ops keep account of | ||
1806 | * the module ref count. | ||
1807 | */ | ||
1808 | file_ops = find_ftrace_file_ops(file_ops, call->mod); | ||
1809 | if (!file_ops) | ||
1810 | continue; /* Warn? */ | ||
1811 | ret = __trace_add_new_mod_event(call, tr, file_ops); | ||
1812 | if (ret < 0) | ||
1813 | pr_warning("Could not create directory for event %s\n", | ||
1814 | call->name); | ||
1815 | continue; | ||
1816 | } | ||
1817 | ret = __trace_add_new_event(call, tr, | ||
1818 | &ftrace_event_id_fops, | ||
1819 | &ftrace_enable_fops, | ||
1820 | &ftrace_event_filter_fops, | ||
1821 | &ftrace_event_format_fops); | ||
1822 | if (ret < 0) | ||
1823 | pr_warning("Could not create directory for event %s\n", | ||
1824 | call->name); | ||
1825 | } | ||
1826 | } | ||
1827 | |||
1828 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
1829 | |||
1830 | /* Avoid typos */ | ||
1831 | #define ENABLE_EVENT_STR "enable_event" | ||
1832 | #define DISABLE_EVENT_STR "disable_event" | ||
1833 | |||
1834 | struct event_probe_data { | ||
1835 | struct ftrace_event_file *file; | ||
1836 | unsigned long count; | ||
1837 | int ref; | ||
1838 | bool enable; | ||
1839 | }; | ||
1840 | |||
1841 | static struct ftrace_event_file * | ||
1842 | find_event_file(struct trace_array *tr, const char *system, const char *event) | ||
1843 | { | ||
1844 | struct ftrace_event_file *file; | ||
1845 | struct ftrace_event_call *call; | ||
1846 | |||
1847 | list_for_each_entry(file, &tr->events, list) { | ||
1848 | |||
1849 | call = file->event_call; | ||
1850 | |||
1851 | if (!call->name || !call->class || !call->class->reg) | ||
1852 | continue; | ||
1853 | |||
1854 | if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) | ||
1855 | continue; | ||
1856 | |||
1857 | if (strcmp(event, call->name) == 0 && | ||
1858 | strcmp(system, call->class->system) == 0) | ||
1859 | return file; | ||
1860 | } | ||
1861 | return NULL; | ||
1862 | } | ||
1863 | |||
1864 | static void | ||
1865 | event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data) | ||
1866 | { | ||
1867 | struct event_probe_data **pdata = (struct event_probe_data **)_data; | ||
1868 | struct event_probe_data *data = *pdata; | ||
1869 | |||
1870 | if (!data) | ||
1871 | return; | ||
1872 | |||
1873 | if (data->enable) | ||
1874 | clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags); | ||
1875 | else | ||
1876 | set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags); | ||
1877 | } | ||
1878 | |||
1879 | static void | ||
1880 | event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data) | ||
1881 | { | ||
1882 | struct event_probe_data **pdata = (struct event_probe_data **)_data; | ||
1883 | struct event_probe_data *data = *pdata; | ||
1884 | |||
1885 | if (!data) | ||
1886 | return; | ||
1887 | |||
1888 | if (!data->count) | ||
1889 | return; | ||
1890 | |||
1891 | /* Skip if the event is in a state we want to switch to */ | ||
1892 | if (data->enable == !(data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) | ||
1893 | return; | ||
1894 | |||
1895 | if (data->count != -1) | ||
1896 | (data->count)--; | ||
1897 | |||
1898 | event_enable_probe(ip, parent_ip, _data); | ||
1899 | } | ||
1900 | |||
1901 | static int | ||
1902 | event_enable_print(struct seq_file *m, unsigned long ip, | ||
1903 | struct ftrace_probe_ops *ops, void *_data) | ||
1904 | { | ||
1905 | struct event_probe_data *data = _data; | ||
1906 | |||
1907 | seq_printf(m, "%ps:", (void *)ip); | ||
1908 | |||
1909 | seq_printf(m, "%s:%s:%s", | ||
1910 | data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, | ||
1911 | data->file->event_call->class->system, | ||
1912 | data->file->event_call->name); | ||
1913 | |||
1914 | if (data->count == -1) | ||
1915 | seq_printf(m, ":unlimited\n"); | ||
1916 | else | ||
1917 | seq_printf(m, ":count=%ld\n", data->count); | ||
1918 | |||
1919 | return 0; | ||
1920 | } | ||
1921 | |||
1922 | static int | ||
1923 | event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip, | ||
1924 | void **_data) | ||
1925 | { | ||
1926 | struct event_probe_data **pdata = (struct event_probe_data **)_data; | ||
1927 | struct event_probe_data *data = *pdata; | ||
1928 | |||
1929 | data->ref++; | ||
1930 | return 0; | ||
1931 | } | ||
1932 | |||
1933 | static void | ||
1934 | event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip, | ||
1935 | void **_data) | ||
1936 | { | ||
1937 | struct event_probe_data **pdata = (struct event_probe_data **)_data; | ||
1938 | struct event_probe_data *data = *pdata; | ||
1939 | |||
1940 | if (WARN_ON_ONCE(data->ref <= 0)) | ||
1941 | return; | ||
1942 | |||
1943 | data->ref--; | ||
1944 | if (!data->ref) { | ||
1945 | /* Remove the SOFT_MODE flag */ | ||
1946 | __ftrace_event_enable_disable(data->file, 0, 1); | ||
1947 | module_put(data->file->event_call->mod); | ||
1948 | kfree(data); | ||
1949 | } | ||
1950 | *pdata = NULL; | ||
1951 | } | ||
1952 | |||
1953 | static struct ftrace_probe_ops event_enable_probe_ops = { | ||
1954 | .func = event_enable_probe, | ||
1955 | .print = event_enable_print, | ||
1956 | .init = event_enable_init, | ||
1957 | .free = event_enable_free, | ||
1958 | }; | ||
1959 | |||
1960 | static struct ftrace_probe_ops event_enable_count_probe_ops = { | ||
1961 | .func = event_enable_count_probe, | ||
1962 | .print = event_enable_print, | ||
1963 | .init = event_enable_init, | ||
1964 | .free = event_enable_free, | ||
1965 | }; | ||
1966 | |||
1967 | static struct ftrace_probe_ops event_disable_probe_ops = { | ||
1968 | .func = event_enable_probe, | ||
1969 | .print = event_enable_print, | ||
1970 | .init = event_enable_init, | ||
1971 | .free = event_enable_free, | ||
1972 | }; | ||
1973 | |||
1974 | static struct ftrace_probe_ops event_disable_count_probe_ops = { | ||
1975 | .func = event_enable_count_probe, | ||
1976 | .print = event_enable_print, | ||
1977 | .init = event_enable_init, | ||
1978 | .free = event_enable_free, | ||
1979 | }; | ||
1980 | |||
1981 | static int | ||
1982 | event_enable_func(struct ftrace_hash *hash, | ||
1983 | char *glob, char *cmd, char *param, int enabled) | ||
1984 | { | ||
1985 | struct trace_array *tr = top_trace_array(); | ||
1986 | struct ftrace_event_file *file; | ||
1987 | struct ftrace_probe_ops *ops; | ||
1988 | struct event_probe_data *data; | ||
1989 | const char *system; | ||
1990 | const char *event; | ||
1991 | char *number; | ||
1992 | bool enable; | ||
1993 | int ret; | ||
1994 | |||
1995 | /* hash funcs only work with set_ftrace_filter */ | ||
1996 | if (!enabled) | ||
1997 | return -EINVAL; | ||
1998 | |||
1999 | if (!param) | ||
2000 | return -EINVAL; | ||
2001 | |||
2002 | system = strsep(¶m, ":"); | ||
2003 | if (!param) | ||
2004 | return -EINVAL; | ||
2005 | |||
2006 | event = strsep(¶m, ":"); | ||
2007 | |||
2008 | mutex_lock(&event_mutex); | ||
2009 | |||
2010 | ret = -EINVAL; | ||
2011 | file = find_event_file(tr, system, event); | ||
2012 | if (!file) | ||
2013 | goto out; | ||
2014 | |||
2015 | enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; | ||
2016 | |||
2017 | if (enable) | ||
2018 | ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops; | ||
2019 | else | ||
2020 | ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops; | ||
2021 | |||
2022 | if (glob[0] == '!') { | ||
2023 | unregister_ftrace_function_probe_func(glob+1, ops); | ||
2024 | ret = 0; | ||
2025 | goto out; | ||
2026 | } | ||
2027 | |||
2028 | ret = -ENOMEM; | ||
2029 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
2030 | if (!data) | ||
2031 | goto out; | ||
2032 | |||
2033 | data->enable = enable; | ||
2034 | data->count = -1; | ||
2035 | data->file = file; | ||
2036 | |||
2037 | if (!param) | ||
2038 | goto out_reg; | ||
2039 | |||
2040 | number = strsep(¶m, ":"); | ||
2041 | |||
2042 | ret = -EINVAL; | ||
2043 | if (!strlen(number)) | ||
2044 | goto out_free; | ||
2045 | |||
2046 | /* | ||
2047 | * We use the callback data field (which is a pointer) | ||
2048 | * as our counter. | ||
2049 | */ | ||
2050 | ret = kstrtoul(number, 0, &data->count); | ||
2051 | if (ret) | ||
2052 | goto out_free; | ||
2053 | |||
2054 | out_reg: | ||
2055 | /* Don't let event modules unload while probe registered */ | ||
2056 | ret = try_module_get(file->event_call->mod); | ||
2057 | if (!ret) | ||
2058 | goto out_free; | ||
2059 | |||
2060 | ret = __ftrace_event_enable_disable(file, 1, 1); | ||
2061 | if (ret < 0) | ||
2062 | goto out_put; | ||
2063 | ret = register_ftrace_function_probe(glob, ops, data); | ||
2064 | if (!ret) | ||
2065 | goto out_disable; | ||
2066 | out: | ||
2067 | mutex_unlock(&event_mutex); | ||
2068 | return ret; | ||
2069 | |||
2070 | out_disable: | ||
2071 | __ftrace_event_enable_disable(file, 0, 1); | ||
2072 | out_put: | ||
2073 | module_put(file->event_call->mod); | ||
2074 | out_free: | ||
2075 | kfree(data); | ||
2076 | goto out; | ||
2077 | } | ||
2078 | |||
2079 | static struct ftrace_func_command event_enable_cmd = { | ||
2080 | .name = ENABLE_EVENT_STR, | ||
2081 | .func = event_enable_func, | ||
2082 | }; | ||
2083 | |||
2084 | static struct ftrace_func_command event_disable_cmd = { | ||
2085 | .name = DISABLE_EVENT_STR, | ||
2086 | .func = event_enable_func, | ||
2087 | }; | ||
2088 | |||
2089 | static __init int register_event_cmds(void) | ||
2090 | { | ||
2091 | int ret; | ||
2092 | |||
2093 | ret = register_ftrace_command(&event_enable_cmd); | ||
2094 | if (WARN_ON(ret < 0)) | ||
2095 | return ret; | ||
2096 | ret = register_ftrace_command(&event_disable_cmd); | ||
2097 | if (WARN_ON(ret < 0)) | ||
2098 | unregister_ftrace_command(&event_enable_cmd); | ||
2099 | return ret; | ||
2100 | } | ||
2101 | #else | ||
2102 | static inline int register_event_cmds(void) { return 0; } | ||
2103 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
2104 | |||
2105 | /* | ||
2106 | * The top level array has already had its ftrace_event_file | ||
2107 | * descriptors created in order to allow for early events to | ||
2108 | * be recorded. This function is called after the debugfs has been | ||
2109 | * initialized, and we now have to create the files associated | ||
2110 | * to the events. | ||
2111 | */ | ||
2112 | static __init void | ||
2113 | __trace_early_add_event_dirs(struct trace_array *tr) | ||
2114 | { | ||
2115 | struct ftrace_event_file *file; | ||
2116 | int ret; | ||
2117 | |||
2118 | |||
2119 | list_for_each_entry(file, &tr->events, list) { | ||
2120 | ret = event_create_dir(tr->event_dir, file, | ||
2121 | &ftrace_event_id_fops, | ||
2122 | &ftrace_enable_fops, | ||
2123 | &ftrace_event_filter_fops, | ||
2124 | &ftrace_event_format_fops); | ||
2125 | if (ret < 0) | ||
2126 | pr_warning("Could not create directory for event %s\n", | ||
2127 | file->event_call->name); | ||
2128 | } | ||
2129 | } | ||
2130 | |||
2131 | /* | ||
2132 | * For early boot up, the top trace array requires to have | ||
2133 | * a list of events that can be enabled. This must be done before | ||
2134 | * the filesystem is set up in order to allow events to be traced | ||
2135 | * early. | ||
2136 | */ | ||
2137 | static __init void | ||
2138 | __trace_early_add_events(struct trace_array *tr) | ||
2139 | { | ||
2140 | struct ftrace_event_call *call; | ||
2141 | int ret; | ||
2142 | |||
2143 | list_for_each_entry(call, &ftrace_events, list) { | ||
2144 | /* Early boot up should not have any modules loaded */ | ||
2145 | if (WARN_ON_ONCE(call->mod)) | ||
2146 | continue; | ||
2147 | |||
2148 | ret = __trace_early_add_new_event(call, tr); | ||
2149 | if (ret < 0) | ||
2150 | pr_warning("Could not create early event %s\n", | ||
2151 | call->name); | ||
2152 | } | ||
2153 | } | ||
2154 | |||
2155 | /* Remove the event directory structure for a trace directory. */ | ||
2156 | static void | ||
2157 | __trace_remove_event_dirs(struct trace_array *tr) | ||
2158 | { | ||
2159 | struct ftrace_event_file *file, *next; | ||
2160 | |||
2161 | list_for_each_entry_safe(file, next, &tr->events, list) { | ||
2162 | list_del(&file->list); | ||
2163 | debugfs_remove_recursive(file->dir); | ||
2164 | remove_subsystem(file->system); | ||
2165 | kmem_cache_free(file_cachep, file); | ||
2166 | } | ||
2167 | } | ||
2168 | |||
2169 | static void | ||
2170 | __add_event_to_tracers(struct ftrace_event_call *call, | ||
2171 | struct ftrace_module_file_ops *file_ops) | ||
2172 | { | ||
2173 | struct trace_array *tr; | ||
2174 | |||
2175 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | ||
2176 | if (file_ops) | ||
2177 | __trace_add_new_mod_event(call, tr, file_ops); | ||
2178 | else | ||
2179 | __trace_add_new_event(call, tr, | ||
2180 | &ftrace_event_id_fops, | ||
2181 | &ftrace_enable_fops, | ||
2182 | &ftrace_event_filter_fops, | ||
2183 | &ftrace_event_format_fops); | ||
2184 | } | ||
2185 | } | ||
2186 | |||
1454 | static struct notifier_block trace_module_nb = { | 2187 | static struct notifier_block trace_module_nb = { |
1455 | .notifier_call = trace_module_notify, | 2188 | .notifier_call = trace_module_notify, |
1456 | .priority = 0, | 2189 | .priority = 0, |
@@ -1464,15 +2197,135 @@ static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; | |||
1464 | static __init int setup_trace_event(char *str) | 2197 | static __init int setup_trace_event(char *str) |
1465 | { | 2198 | { |
1466 | strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE); | 2199 | strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE); |
1467 | ring_buffer_expanded = 1; | 2200 | ring_buffer_expanded = true; |
1468 | tracing_selftest_disabled = 1; | 2201 | tracing_selftest_disabled = true; |
1469 | 2202 | ||
1470 | return 1; | 2203 | return 1; |
1471 | } | 2204 | } |
1472 | __setup("trace_event=", setup_trace_event); | 2205 | __setup("trace_event=", setup_trace_event); |
1473 | 2206 | ||
2207 | /* Expects to have event_mutex held when called */ | ||
2208 | static int | ||
2209 | create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) | ||
2210 | { | ||
2211 | struct dentry *d_events; | ||
2212 | struct dentry *entry; | ||
2213 | |||
2214 | entry = debugfs_create_file("set_event", 0644, parent, | ||
2215 | tr, &ftrace_set_event_fops); | ||
2216 | if (!entry) { | ||
2217 | pr_warning("Could not create debugfs 'set_event' entry\n"); | ||
2218 | return -ENOMEM; | ||
2219 | } | ||
2220 | |||
2221 | d_events = debugfs_create_dir("events", parent); | ||
2222 | if (!d_events) { | ||
2223 | pr_warning("Could not create debugfs 'events' directory\n"); | ||
2224 | return -ENOMEM; | ||
2225 | } | ||
2226 | |||
2227 | /* ring buffer internal formats */ | ||
2228 | trace_create_file("header_page", 0444, d_events, | ||
2229 | ring_buffer_print_page_header, | ||
2230 | &ftrace_show_header_fops); | ||
2231 | |||
2232 | trace_create_file("header_event", 0444, d_events, | ||
2233 | ring_buffer_print_entry_header, | ||
2234 | &ftrace_show_header_fops); | ||
2235 | |||
2236 | trace_create_file("enable", 0644, d_events, | ||
2237 | tr, &ftrace_tr_enable_fops); | ||
2238 | |||
2239 | tr->event_dir = d_events; | ||
2240 | |||
2241 | return 0; | ||
2242 | } | ||
2243 | |||
2244 | /** | ||
2245 | * event_trace_add_tracer - add a instance of a trace_array to events | ||
2246 | * @parent: The parent dentry to place the files/directories for events in | ||
2247 | * @tr: The trace array associated with these events | ||
2248 | * | ||
2249 | * When a new instance is created, it needs to set up its events | ||
2250 | * directory, as well as other files associated with events. It also | ||
2251 | * creates the event hierachry in the @parent/events directory. | ||
2252 | * | ||
2253 | * Returns 0 on success. | ||
2254 | */ | ||
2255 | int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) | ||
2256 | { | ||
2257 | int ret; | ||
2258 | |||
2259 | mutex_lock(&event_mutex); | ||
2260 | |||
2261 | ret = create_event_toplevel_files(parent, tr); | ||
2262 | if (ret) | ||
2263 | goto out_unlock; | ||
2264 | |||
2265 | down_write(&trace_event_sem); | ||
2266 | __trace_add_event_dirs(tr); | ||
2267 | up_write(&trace_event_sem); | ||
2268 | |||
2269 | out_unlock: | ||
2270 | mutex_unlock(&event_mutex); | ||
2271 | |||
2272 | return ret; | ||
2273 | } | ||
2274 | |||
2275 | /* | ||
2276 | * The top trace array already had its file descriptors created. | ||
2277 | * Now the files themselves need to be created. | ||
2278 | */ | ||
2279 | static __init int | ||
2280 | early_event_add_tracer(struct dentry *parent, struct trace_array *tr) | ||
2281 | { | ||
2282 | int ret; | ||
2283 | |||
2284 | mutex_lock(&event_mutex); | ||
2285 | |||
2286 | ret = create_event_toplevel_files(parent, tr); | ||
2287 | if (ret) | ||
2288 | goto out_unlock; | ||
2289 | |||
2290 | down_write(&trace_event_sem); | ||
2291 | __trace_early_add_event_dirs(tr); | ||
2292 | up_write(&trace_event_sem); | ||
2293 | |||
2294 | out_unlock: | ||
2295 | mutex_unlock(&event_mutex); | ||
2296 | |||
2297 | return ret; | ||
2298 | } | ||
2299 | |||
2300 | int event_trace_del_tracer(struct trace_array *tr) | ||
2301 | { | ||
2302 | /* Disable any running events */ | ||
2303 | __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); | ||
2304 | |||
2305 | mutex_lock(&event_mutex); | ||
2306 | |||
2307 | down_write(&trace_event_sem); | ||
2308 | __trace_remove_event_dirs(tr); | ||
2309 | debugfs_remove_recursive(tr->event_dir); | ||
2310 | up_write(&trace_event_sem); | ||
2311 | |||
2312 | tr->event_dir = NULL; | ||
2313 | |||
2314 | mutex_unlock(&event_mutex); | ||
2315 | |||
2316 | return 0; | ||
2317 | } | ||
2318 | |||
2319 | static __init int event_trace_memsetup(void) | ||
2320 | { | ||
2321 | field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); | ||
2322 | file_cachep = KMEM_CACHE(ftrace_event_file, SLAB_PANIC); | ||
2323 | return 0; | ||
2324 | } | ||
2325 | |||
1474 | static __init int event_trace_enable(void) | 2326 | static __init int event_trace_enable(void) |
1475 | { | 2327 | { |
2328 | struct trace_array *tr = top_trace_array(); | ||
1476 | struct ftrace_event_call **iter, *call; | 2329 | struct ftrace_event_call **iter, *call; |
1477 | char *buf = bootup_event_buf; | 2330 | char *buf = bootup_event_buf; |
1478 | char *token; | 2331 | char *token; |
@@ -1486,6 +2339,14 @@ static __init int event_trace_enable(void) | |||
1486 | list_add(&call->list, &ftrace_events); | 2339 | list_add(&call->list, &ftrace_events); |
1487 | } | 2340 | } |
1488 | 2341 | ||
2342 | /* | ||
2343 | * We need the top trace array to have a working set of trace | ||
2344 | * points at early init, before the debug files and directories | ||
2345 | * are created. Create the file entries now, and attach them | ||
2346 | * to the actual file dentries later. | ||
2347 | */ | ||
2348 | __trace_early_add_events(tr); | ||
2349 | |||
1489 | while (true) { | 2350 | while (true) { |
1490 | token = strsep(&buf, ","); | 2351 | token = strsep(&buf, ","); |
1491 | 2352 | ||
@@ -1494,73 +2355,43 @@ static __init int event_trace_enable(void) | |||
1494 | if (!*token) | 2355 | if (!*token) |
1495 | continue; | 2356 | continue; |
1496 | 2357 | ||
1497 | ret = ftrace_set_clr_event(token, 1); | 2358 | ret = ftrace_set_clr_event(tr, token, 1); |
1498 | if (ret) | 2359 | if (ret) |
1499 | pr_warn("Failed to enable trace event: %s\n", token); | 2360 | pr_warn("Failed to enable trace event: %s\n", token); |
1500 | } | 2361 | } |
1501 | 2362 | ||
1502 | trace_printk_start_comm(); | 2363 | trace_printk_start_comm(); |
1503 | 2364 | ||
2365 | register_event_cmds(); | ||
2366 | |||
1504 | return 0; | 2367 | return 0; |
1505 | } | 2368 | } |
1506 | 2369 | ||
1507 | static __init int event_trace_init(void) | 2370 | static __init int event_trace_init(void) |
1508 | { | 2371 | { |
1509 | struct ftrace_event_call *call; | 2372 | struct trace_array *tr; |
1510 | struct dentry *d_tracer; | 2373 | struct dentry *d_tracer; |
1511 | struct dentry *entry; | 2374 | struct dentry *entry; |
1512 | struct dentry *d_events; | ||
1513 | int ret; | 2375 | int ret; |
1514 | 2376 | ||
2377 | tr = top_trace_array(); | ||
2378 | |||
1515 | d_tracer = tracing_init_dentry(); | 2379 | d_tracer = tracing_init_dentry(); |
1516 | if (!d_tracer) | 2380 | if (!d_tracer) |
1517 | return 0; | 2381 | return 0; |
1518 | 2382 | ||
1519 | entry = debugfs_create_file("available_events", 0444, d_tracer, | 2383 | entry = debugfs_create_file("available_events", 0444, d_tracer, |
1520 | NULL, &ftrace_avail_fops); | 2384 | tr, &ftrace_avail_fops); |
1521 | if (!entry) | 2385 | if (!entry) |
1522 | pr_warning("Could not create debugfs " | 2386 | pr_warning("Could not create debugfs " |
1523 | "'available_events' entry\n"); | 2387 | "'available_events' entry\n"); |
1524 | 2388 | ||
1525 | entry = debugfs_create_file("set_event", 0644, d_tracer, | ||
1526 | NULL, &ftrace_set_event_fops); | ||
1527 | if (!entry) | ||
1528 | pr_warning("Could not create debugfs " | ||
1529 | "'set_event' entry\n"); | ||
1530 | |||
1531 | d_events = event_trace_events_dir(); | ||
1532 | if (!d_events) | ||
1533 | return 0; | ||
1534 | |||
1535 | /* ring buffer internal formats */ | ||
1536 | trace_create_file("header_page", 0444, d_events, | ||
1537 | ring_buffer_print_page_header, | ||
1538 | &ftrace_show_header_fops); | ||
1539 | |||
1540 | trace_create_file("header_event", 0444, d_events, | ||
1541 | ring_buffer_print_entry_header, | ||
1542 | &ftrace_show_header_fops); | ||
1543 | |||
1544 | trace_create_file("enable", 0644, d_events, | ||
1545 | NULL, &ftrace_system_enable_fops); | ||
1546 | |||
1547 | if (trace_define_common_fields()) | 2389 | if (trace_define_common_fields()) |
1548 | pr_warning("tracing: Failed to allocate common fields"); | 2390 | pr_warning("tracing: Failed to allocate common fields"); |
1549 | 2391 | ||
1550 | /* | 2392 | ret = early_event_add_tracer(d_tracer, tr); |
1551 | * Early initialization already enabled ftrace event. | 2393 | if (ret) |
1552 | * Now it's only necessary to create the event directory. | 2394 | return ret; |
1553 | */ | ||
1554 | list_for_each_entry(call, &ftrace_events, list) { | ||
1555 | |||
1556 | ret = event_create_dir(call, d_events, | ||
1557 | &ftrace_event_id_fops, | ||
1558 | &ftrace_enable_fops, | ||
1559 | &ftrace_event_filter_fops, | ||
1560 | &ftrace_event_format_fops); | ||
1561 | if (ret < 0) | ||
1562 | event_remove(call); | ||
1563 | } | ||
1564 | 2395 | ||
1565 | ret = register_module_notifier(&trace_module_nb); | 2396 | ret = register_module_notifier(&trace_module_nb); |
1566 | if (ret) | 2397 | if (ret) |
@@ -1568,6 +2399,7 @@ static __init int event_trace_init(void) | |||
1568 | 2399 | ||
1569 | return 0; | 2400 | return 0; |
1570 | } | 2401 | } |
2402 | early_initcall(event_trace_memsetup); | ||
1571 | core_initcall(event_trace_enable); | 2403 | core_initcall(event_trace_enable); |
1572 | fs_initcall(event_trace_init); | 2404 | fs_initcall(event_trace_init); |
1573 | 2405 | ||
@@ -1627,13 +2459,20 @@ static __init void event_test_stuff(void) | |||
1627 | */ | 2459 | */ |
1628 | static __init void event_trace_self_tests(void) | 2460 | static __init void event_trace_self_tests(void) |
1629 | { | 2461 | { |
2462 | struct ftrace_subsystem_dir *dir; | ||
2463 | struct ftrace_event_file *file; | ||
1630 | struct ftrace_event_call *call; | 2464 | struct ftrace_event_call *call; |
1631 | struct event_subsystem *system; | 2465 | struct event_subsystem *system; |
2466 | struct trace_array *tr; | ||
1632 | int ret; | 2467 | int ret; |
1633 | 2468 | ||
2469 | tr = top_trace_array(); | ||
2470 | |||
1634 | pr_info("Running tests on trace events:\n"); | 2471 | pr_info("Running tests on trace events:\n"); |
1635 | 2472 | ||
1636 | list_for_each_entry(call, &ftrace_events, list) { | 2473 | list_for_each_entry(file, &tr->events, list) { |
2474 | |||
2475 | call = file->event_call; | ||
1637 | 2476 | ||
1638 | /* Only test those that have a probe */ | 2477 | /* Only test those that have a probe */ |
1639 | if (!call->class || !call->class->probe) | 2478 | if (!call->class || !call->class->probe) |
@@ -1657,15 +2496,15 @@ static __init void event_trace_self_tests(void) | |||
1657 | * If an event is already enabled, someone is using | 2496 | * If an event is already enabled, someone is using |
1658 | * it and the self test should not be on. | 2497 | * it and the self test should not be on. |
1659 | */ | 2498 | */ |
1660 | if (call->flags & TRACE_EVENT_FL_ENABLED) { | 2499 | if (file->flags & FTRACE_EVENT_FL_ENABLED) { |
1661 | pr_warning("Enabled event during self test!\n"); | 2500 | pr_warning("Enabled event during self test!\n"); |
1662 | WARN_ON_ONCE(1); | 2501 | WARN_ON_ONCE(1); |
1663 | continue; | 2502 | continue; |
1664 | } | 2503 | } |
1665 | 2504 | ||
1666 | ftrace_event_enable_disable(call, 1); | 2505 | ftrace_event_enable_disable(file, 1); |
1667 | event_test_stuff(); | 2506 | event_test_stuff(); |
1668 | ftrace_event_enable_disable(call, 0); | 2507 | ftrace_event_enable_disable(file, 0); |
1669 | 2508 | ||
1670 | pr_cont("OK\n"); | 2509 | pr_cont("OK\n"); |
1671 | } | 2510 | } |
@@ -1674,7 +2513,9 @@ static __init void event_trace_self_tests(void) | |||
1674 | 2513 | ||
1675 | pr_info("Running tests on trace event systems:\n"); | 2514 | pr_info("Running tests on trace event systems:\n"); |
1676 | 2515 | ||
1677 | list_for_each_entry(system, &event_subsystems, list) { | 2516 | list_for_each_entry(dir, &tr->systems, list) { |
2517 | |||
2518 | system = dir->subsystem; | ||
1678 | 2519 | ||
1679 | /* the ftrace system is special, skip it */ | 2520 | /* the ftrace system is special, skip it */ |
1680 | if (strcmp(system->name, "ftrace") == 0) | 2521 | if (strcmp(system->name, "ftrace") == 0) |
@@ -1682,7 +2523,7 @@ static __init void event_trace_self_tests(void) | |||
1682 | 2523 | ||
1683 | pr_info("Testing event system %s: ", system->name); | 2524 | pr_info("Testing event system %s: ", system->name); |
1684 | 2525 | ||
1685 | ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1); | 2526 | ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1); |
1686 | if (WARN_ON_ONCE(ret)) { | 2527 | if (WARN_ON_ONCE(ret)) { |
1687 | pr_warning("error enabling system %s\n", | 2528 | pr_warning("error enabling system %s\n", |
1688 | system->name); | 2529 | system->name); |
@@ -1691,7 +2532,7 @@ static __init void event_trace_self_tests(void) | |||
1691 | 2532 | ||
1692 | event_test_stuff(); | 2533 | event_test_stuff(); |
1693 | 2534 | ||
1694 | ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0); | 2535 | ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0); |
1695 | if (WARN_ON_ONCE(ret)) { | 2536 | if (WARN_ON_ONCE(ret)) { |
1696 | pr_warning("error disabling system %s\n", | 2537 | pr_warning("error disabling system %s\n", |
1697 | system->name); | 2538 | system->name); |
@@ -1706,7 +2547,7 @@ static __init void event_trace_self_tests(void) | |||
1706 | pr_info("Running tests on all trace events:\n"); | 2547 | pr_info("Running tests on all trace events:\n"); |
1707 | pr_info("Testing all events: "); | 2548 | pr_info("Testing all events: "); |
1708 | 2549 | ||
1709 | ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1); | 2550 | ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1); |
1710 | if (WARN_ON_ONCE(ret)) { | 2551 | if (WARN_ON_ONCE(ret)) { |
1711 | pr_warning("error enabling all events\n"); | 2552 | pr_warning("error enabling all events\n"); |
1712 | return; | 2553 | return; |
@@ -1715,7 +2556,7 @@ static __init void event_trace_self_tests(void) | |||
1715 | event_test_stuff(); | 2556 | event_test_stuff(); |
1716 | 2557 | ||
1717 | /* reset sysname */ | 2558 | /* reset sysname */ |
1718 | ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0); | 2559 | ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); |
1719 | if (WARN_ON_ONCE(ret)) { | 2560 | if (WARN_ON_ONCE(ret)) { |
1720 | pr_warning("error disabling all events\n"); | 2561 | pr_warning("error disabling all events\n"); |
1721 | return; | 2562 | return; |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e5b0ca8b8d4d..a6361178de5a 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -658,33 +658,6 @@ void print_subsystem_event_filter(struct event_subsystem *system, | |||
658 | mutex_unlock(&event_mutex); | 658 | mutex_unlock(&event_mutex); |
659 | } | 659 | } |
660 | 660 | ||
661 | static struct ftrace_event_field * | ||
662 | __find_event_field(struct list_head *head, char *name) | ||
663 | { | ||
664 | struct ftrace_event_field *field; | ||
665 | |||
666 | list_for_each_entry(field, head, link) { | ||
667 | if (!strcmp(field->name, name)) | ||
668 | return field; | ||
669 | } | ||
670 | |||
671 | return NULL; | ||
672 | } | ||
673 | |||
674 | static struct ftrace_event_field * | ||
675 | find_event_field(struct ftrace_event_call *call, char *name) | ||
676 | { | ||
677 | struct ftrace_event_field *field; | ||
678 | struct list_head *head; | ||
679 | |||
680 | field = __find_event_field(&ftrace_common_fields, name); | ||
681 | if (field) | ||
682 | return field; | ||
683 | |||
684 | head = trace_get_fields(call); | ||
685 | return __find_event_field(head, name); | ||
686 | } | ||
687 | |||
688 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) | 661 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) |
689 | { | 662 | { |
690 | stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL); | 663 | stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL); |
@@ -1337,7 +1310,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps, | |||
1337 | return NULL; | 1310 | return NULL; |
1338 | } | 1311 | } |
1339 | 1312 | ||
1340 | field = find_event_field(call, operand1); | 1313 | field = trace_find_event_field(call, operand1); |
1341 | if (!field) { | 1314 | if (!field) { |
1342 | parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); | 1315 | parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); |
1343 | return NULL; | 1316 | return NULL; |
@@ -1907,16 +1880,17 @@ out_unlock: | |||
1907 | return err; | 1880 | return err; |
1908 | } | 1881 | } |
1909 | 1882 | ||
1910 | int apply_subsystem_event_filter(struct event_subsystem *system, | 1883 | int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, |
1911 | char *filter_string) | 1884 | char *filter_string) |
1912 | { | 1885 | { |
1886 | struct event_subsystem *system = dir->subsystem; | ||
1913 | struct event_filter *filter; | 1887 | struct event_filter *filter; |
1914 | int err = 0; | 1888 | int err = 0; |
1915 | 1889 | ||
1916 | mutex_lock(&event_mutex); | 1890 | mutex_lock(&event_mutex); |
1917 | 1891 | ||
1918 | /* Make sure the system still has events */ | 1892 | /* Make sure the system still has events */ |
1919 | if (!system->nr_events) { | 1893 | if (!dir->nr_events) { |
1920 | err = -ENODEV; | 1894 | err = -ENODEV; |
1921 | goto out_unlock; | 1895 | goto out_unlock; |
1922 | } | 1896 | } |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index e039906b037d..d21a74670088 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -129,7 +129,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
129 | 129 | ||
130 | #undef FTRACE_ENTRY | 130 | #undef FTRACE_ENTRY |
131 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ | 131 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ |
132 | int \ | 132 | static int __init \ |
133 | ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | 133 | ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ |
134 | { \ | 134 | { \ |
135 | struct struct_name field; \ | 135 | struct struct_name field; \ |
@@ -168,7 +168,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | |||
168 | #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ | 168 | #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ |
169 | regfn) \ | 169 | regfn) \ |
170 | \ | 170 | \ |
171 | struct ftrace_event_class event_class_ftrace_##call = { \ | 171 | struct ftrace_event_class __refdata event_class_ftrace_##call = { \ |
172 | .system = __stringify(TRACE_SYSTEM), \ | 172 | .system = __stringify(TRACE_SYSTEM), \ |
173 | .define_fields = ftrace_define_fields_##call, \ | 173 | .define_fields = ftrace_define_fields_##call, \ |
174 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ | 174 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 601152523326..c4d6d7191988 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -28,7 +28,7 @@ static void tracing_stop_function_trace(void); | |||
28 | static int function_trace_init(struct trace_array *tr) | 28 | static int function_trace_init(struct trace_array *tr) |
29 | { | 29 | { |
30 | func_trace = tr; | 30 | func_trace = tr; |
31 | tr->cpu = get_cpu(); | 31 | tr->trace_buffer.cpu = get_cpu(); |
32 | put_cpu(); | 32 | put_cpu(); |
33 | 33 | ||
34 | tracing_start_cmdline_record(); | 34 | tracing_start_cmdline_record(); |
@@ -44,7 +44,7 @@ static void function_trace_reset(struct trace_array *tr) | |||
44 | 44 | ||
45 | static void function_trace_start(struct trace_array *tr) | 45 | static void function_trace_start(struct trace_array *tr) |
46 | { | 46 | { |
47 | tracing_reset_online_cpus(tr); | 47 | tracing_reset_online_cpus(&tr->trace_buffer); |
48 | } | 48 | } |
49 | 49 | ||
50 | /* Our option */ | 50 | /* Our option */ |
@@ -76,7 +76,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, | |||
76 | goto out; | 76 | goto out; |
77 | 77 | ||
78 | cpu = smp_processor_id(); | 78 | cpu = smp_processor_id(); |
79 | data = tr->data[cpu]; | 79 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
80 | if (!atomic_read(&data->disabled)) { | 80 | if (!atomic_read(&data->disabled)) { |
81 | local_save_flags(flags); | 81 | local_save_flags(flags); |
82 | trace_function(tr, ip, parent_ip, flags, pc); | 82 | trace_function(tr, ip, parent_ip, flags, pc); |
@@ -107,7 +107,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, | |||
107 | */ | 107 | */ |
108 | local_irq_save(flags); | 108 | local_irq_save(flags); |
109 | cpu = raw_smp_processor_id(); | 109 | cpu = raw_smp_processor_id(); |
110 | data = tr->data[cpu]; | 110 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
111 | disabled = atomic_inc_return(&data->disabled); | 111 | disabled = atomic_inc_return(&data->disabled); |
112 | 112 | ||
113 | if (likely(disabled == 1)) { | 113 | if (likely(disabled == 1)) { |
@@ -214,66 +214,89 @@ static struct tracer function_trace __read_mostly = | |||
214 | }; | 214 | }; |
215 | 215 | ||
216 | #ifdef CONFIG_DYNAMIC_FTRACE | 216 | #ifdef CONFIG_DYNAMIC_FTRACE |
217 | static void | 217 | static int update_count(void **data) |
218 | ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data) | ||
219 | { | 218 | { |
220 | long *count = (long *)data; | 219 | unsigned long *count = (long *)data; |
221 | |||
222 | if (tracing_is_on()) | ||
223 | return; | ||
224 | 220 | ||
225 | if (!*count) | 221 | if (!*count) |
226 | return; | 222 | return 0; |
227 | 223 | ||
228 | if (*count != -1) | 224 | if (*count != -1) |
229 | (*count)--; | 225 | (*count)--; |
230 | 226 | ||
231 | tracing_on(); | 227 | return 1; |
232 | } | 228 | } |
233 | 229 | ||
234 | static void | 230 | static void |
235 | ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data) | 231 | ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, void **data) |
236 | { | 232 | { |
237 | long *count = (long *)data; | 233 | if (tracing_is_on()) |
234 | return; | ||
235 | |||
236 | if (update_count(data)) | ||
237 | tracing_on(); | ||
238 | } | ||
238 | 239 | ||
240 | static void | ||
241 | ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, void **data) | ||
242 | { | ||
239 | if (!tracing_is_on()) | 243 | if (!tracing_is_on()) |
240 | return; | 244 | return; |
241 | 245 | ||
242 | if (!*count) | 246 | if (update_count(data)) |
247 | tracing_off(); | ||
248 | } | ||
249 | |||
250 | static void | ||
251 | ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data) | ||
252 | { | ||
253 | if (tracing_is_on()) | ||
243 | return; | 254 | return; |
244 | 255 | ||
245 | if (*count != -1) | 256 | tracing_on(); |
246 | (*count)--; | 257 | } |
258 | |||
259 | static void | ||
260 | ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data) | ||
261 | { | ||
262 | if (!tracing_is_on()) | ||
263 | return; | ||
247 | 264 | ||
248 | tracing_off(); | 265 | tracing_off(); |
249 | } | 266 | } |
250 | 267 | ||
251 | static int | 268 | /* |
252 | ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, | 269 | * Skip 4: |
253 | struct ftrace_probe_ops *ops, void *data); | 270 | * ftrace_stacktrace() |
271 | * function_trace_probe_call() | ||
272 | * ftrace_ops_list_func() | ||
273 | * ftrace_call() | ||
274 | */ | ||
275 | #define STACK_SKIP 4 | ||
254 | 276 | ||
255 | static struct ftrace_probe_ops traceon_probe_ops = { | 277 | static void |
256 | .func = ftrace_traceon, | 278 | ftrace_stacktrace(unsigned long ip, unsigned long parent_ip, void **data) |
257 | .print = ftrace_trace_onoff_print, | 279 | { |
258 | }; | 280 | trace_dump_stack(STACK_SKIP); |
281 | } | ||
259 | 282 | ||
260 | static struct ftrace_probe_ops traceoff_probe_ops = { | 283 | static void |
261 | .func = ftrace_traceoff, | 284 | ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) |
262 | .print = ftrace_trace_onoff_print, | 285 | { |
263 | }; | 286 | if (!tracing_is_on()) |
287 | return; | ||
288 | |||
289 | if (update_count(data)) | ||
290 | trace_dump_stack(STACK_SKIP); | ||
291 | } | ||
264 | 292 | ||
265 | static int | 293 | static int |
266 | ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, | 294 | ftrace_probe_print(const char *name, struct seq_file *m, |
267 | struct ftrace_probe_ops *ops, void *data) | 295 | unsigned long ip, void *data) |
268 | { | 296 | { |
269 | long count = (long)data; | 297 | long count = (long)data; |
270 | 298 | ||
271 | seq_printf(m, "%ps:", (void *)ip); | 299 | seq_printf(m, "%ps:%s", (void *)ip, name); |
272 | |||
273 | if (ops == &traceon_probe_ops) | ||
274 | seq_printf(m, "traceon"); | ||
275 | else | ||
276 | seq_printf(m, "traceoff"); | ||
277 | 300 | ||
278 | if (count == -1) | 301 | if (count == -1) |
279 | seq_printf(m, ":unlimited\n"); | 302 | seq_printf(m, ":unlimited\n"); |
@@ -284,26 +307,61 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, | |||
284 | } | 307 | } |
285 | 308 | ||
286 | static int | 309 | static int |
287 | ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param) | 310 | ftrace_traceon_print(struct seq_file *m, unsigned long ip, |
311 | struct ftrace_probe_ops *ops, void *data) | ||
288 | { | 312 | { |
289 | struct ftrace_probe_ops *ops; | 313 | return ftrace_probe_print("traceon", m, ip, data); |
290 | 314 | } | |
291 | /* we register both traceon and traceoff to this callback */ | ||
292 | if (strcmp(cmd, "traceon") == 0) | ||
293 | ops = &traceon_probe_ops; | ||
294 | else | ||
295 | ops = &traceoff_probe_ops; | ||
296 | 315 | ||
297 | unregister_ftrace_function_probe_func(glob, ops); | 316 | static int |
317 | ftrace_traceoff_print(struct seq_file *m, unsigned long ip, | ||
318 | struct ftrace_probe_ops *ops, void *data) | ||
319 | { | ||
320 | return ftrace_probe_print("traceoff", m, ip, data); | ||
321 | } | ||
298 | 322 | ||
299 | return 0; | 323 | static int |
324 | ftrace_stacktrace_print(struct seq_file *m, unsigned long ip, | ||
325 | struct ftrace_probe_ops *ops, void *data) | ||
326 | { | ||
327 | return ftrace_probe_print("stacktrace", m, ip, data); | ||
300 | } | 328 | } |
301 | 329 | ||
330 | static struct ftrace_probe_ops traceon_count_probe_ops = { | ||
331 | .func = ftrace_traceon_count, | ||
332 | .print = ftrace_traceon_print, | ||
333 | }; | ||
334 | |||
335 | static struct ftrace_probe_ops traceoff_count_probe_ops = { | ||
336 | .func = ftrace_traceoff_count, | ||
337 | .print = ftrace_traceoff_print, | ||
338 | }; | ||
339 | |||
340 | static struct ftrace_probe_ops stacktrace_count_probe_ops = { | ||
341 | .func = ftrace_stacktrace_count, | ||
342 | .print = ftrace_stacktrace_print, | ||
343 | }; | ||
344 | |||
345 | static struct ftrace_probe_ops traceon_probe_ops = { | ||
346 | .func = ftrace_traceon, | ||
347 | .print = ftrace_traceon_print, | ||
348 | }; | ||
349 | |||
350 | static struct ftrace_probe_ops traceoff_probe_ops = { | ||
351 | .func = ftrace_traceoff, | ||
352 | .print = ftrace_traceoff_print, | ||
353 | }; | ||
354 | |||
355 | static struct ftrace_probe_ops stacktrace_probe_ops = { | ||
356 | .func = ftrace_stacktrace, | ||
357 | .print = ftrace_stacktrace_print, | ||
358 | }; | ||
359 | |||
302 | static int | 360 | static int |
303 | ftrace_trace_onoff_callback(struct ftrace_hash *hash, | 361 | ftrace_trace_probe_callback(struct ftrace_probe_ops *ops, |
304 | char *glob, char *cmd, char *param, int enable) | 362 | struct ftrace_hash *hash, char *glob, |
363 | char *cmd, char *param, int enable) | ||
305 | { | 364 | { |
306 | struct ftrace_probe_ops *ops; | ||
307 | void *count = (void *)-1; | 365 | void *count = (void *)-1; |
308 | char *number; | 366 | char *number; |
309 | int ret; | 367 | int ret; |
@@ -312,14 +370,10 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash, | |||
312 | if (!enable) | 370 | if (!enable) |
313 | return -EINVAL; | 371 | return -EINVAL; |
314 | 372 | ||
315 | if (glob[0] == '!') | 373 | if (glob[0] == '!') { |
316 | return ftrace_trace_onoff_unreg(glob+1, cmd, param); | 374 | unregister_ftrace_function_probe_func(glob+1, ops); |
317 | 375 | return 0; | |
318 | /* we register both traceon and traceoff to this callback */ | 376 | } |
319 | if (strcmp(cmd, "traceon") == 0) | ||
320 | ops = &traceon_probe_ops; | ||
321 | else | ||
322 | ops = &traceoff_probe_ops; | ||
323 | 377 | ||
324 | if (!param) | 378 | if (!param) |
325 | goto out_reg; | 379 | goto out_reg; |
@@ -343,6 +397,34 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash, | |||
343 | return ret < 0 ? ret : 0; | 397 | return ret < 0 ? ret : 0; |
344 | } | 398 | } |
345 | 399 | ||
400 | static int | ||
401 | ftrace_trace_onoff_callback(struct ftrace_hash *hash, | ||
402 | char *glob, char *cmd, char *param, int enable) | ||
403 | { | ||
404 | struct ftrace_probe_ops *ops; | ||
405 | |||
406 | /* we register both traceon and traceoff to this callback */ | ||
407 | if (strcmp(cmd, "traceon") == 0) | ||
408 | ops = param ? &traceon_count_probe_ops : &traceon_probe_ops; | ||
409 | else | ||
410 | ops = param ? &traceoff_count_probe_ops : &traceoff_probe_ops; | ||
411 | |||
412 | return ftrace_trace_probe_callback(ops, hash, glob, cmd, | ||
413 | param, enable); | ||
414 | } | ||
415 | |||
416 | static int | ||
417 | ftrace_stacktrace_callback(struct ftrace_hash *hash, | ||
418 | char *glob, char *cmd, char *param, int enable) | ||
419 | { | ||
420 | struct ftrace_probe_ops *ops; | ||
421 | |||
422 | ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops; | ||
423 | |||
424 | return ftrace_trace_probe_callback(ops, hash, glob, cmd, | ||
425 | param, enable); | ||
426 | } | ||
427 | |||
346 | static struct ftrace_func_command ftrace_traceon_cmd = { | 428 | static struct ftrace_func_command ftrace_traceon_cmd = { |
347 | .name = "traceon", | 429 | .name = "traceon", |
348 | .func = ftrace_trace_onoff_callback, | 430 | .func = ftrace_trace_onoff_callback, |
@@ -353,6 +435,11 @@ static struct ftrace_func_command ftrace_traceoff_cmd = { | |||
353 | .func = ftrace_trace_onoff_callback, | 435 | .func = ftrace_trace_onoff_callback, |
354 | }; | 436 | }; |
355 | 437 | ||
438 | static struct ftrace_func_command ftrace_stacktrace_cmd = { | ||
439 | .name = "stacktrace", | ||
440 | .func = ftrace_stacktrace_callback, | ||
441 | }; | ||
442 | |||
356 | static int __init init_func_cmd_traceon(void) | 443 | static int __init init_func_cmd_traceon(void) |
357 | { | 444 | { |
358 | int ret; | 445 | int ret; |
@@ -364,6 +451,12 @@ static int __init init_func_cmd_traceon(void) | |||
364 | ret = register_ftrace_command(&ftrace_traceon_cmd); | 451 | ret = register_ftrace_command(&ftrace_traceon_cmd); |
365 | if (ret) | 452 | if (ret) |
366 | unregister_ftrace_command(&ftrace_traceoff_cmd); | 453 | unregister_ftrace_command(&ftrace_traceoff_cmd); |
454 | |||
455 | ret = register_ftrace_command(&ftrace_stacktrace_cmd); | ||
456 | if (ret) { | ||
457 | unregister_ftrace_command(&ftrace_traceoff_cmd); | ||
458 | unregister_ftrace_command(&ftrace_traceon_cmd); | ||
459 | } | ||
367 | return ret; | 460 | return ret; |
368 | } | 461 | } |
369 | #else | 462 | #else |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 39ada66389cc..8388bc99f2ee 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -218,7 +218,7 @@ int __trace_graph_entry(struct trace_array *tr, | |||
218 | { | 218 | { |
219 | struct ftrace_event_call *call = &event_funcgraph_entry; | 219 | struct ftrace_event_call *call = &event_funcgraph_entry; |
220 | struct ring_buffer_event *event; | 220 | struct ring_buffer_event *event; |
221 | struct ring_buffer *buffer = tr->buffer; | 221 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
222 | struct ftrace_graph_ent_entry *entry; | 222 | struct ftrace_graph_ent_entry *entry; |
223 | 223 | ||
224 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) | 224 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) |
@@ -265,7 +265,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) | |||
265 | 265 | ||
266 | local_irq_save(flags); | 266 | local_irq_save(flags); |
267 | cpu = raw_smp_processor_id(); | 267 | cpu = raw_smp_processor_id(); |
268 | data = tr->data[cpu]; | 268 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
269 | disabled = atomic_inc_return(&data->disabled); | 269 | disabled = atomic_inc_return(&data->disabled); |
270 | if (likely(disabled == 1)) { | 270 | if (likely(disabled == 1)) { |
271 | pc = preempt_count(); | 271 | pc = preempt_count(); |
@@ -323,7 +323,7 @@ void __trace_graph_return(struct trace_array *tr, | |||
323 | { | 323 | { |
324 | struct ftrace_event_call *call = &event_funcgraph_exit; | 324 | struct ftrace_event_call *call = &event_funcgraph_exit; |
325 | struct ring_buffer_event *event; | 325 | struct ring_buffer_event *event; |
326 | struct ring_buffer *buffer = tr->buffer; | 326 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
327 | struct ftrace_graph_ret_entry *entry; | 327 | struct ftrace_graph_ret_entry *entry; |
328 | 328 | ||
329 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) | 329 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) |
@@ -350,7 +350,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) | |||
350 | 350 | ||
351 | local_irq_save(flags); | 351 | local_irq_save(flags); |
352 | cpu = raw_smp_processor_id(); | 352 | cpu = raw_smp_processor_id(); |
353 | data = tr->data[cpu]; | 353 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
354 | disabled = atomic_inc_return(&data->disabled); | 354 | disabled = atomic_inc_return(&data->disabled); |
355 | if (likely(disabled == 1)) { | 355 | if (likely(disabled == 1)) { |
356 | pc = preempt_count(); | 356 | pc = preempt_count(); |
@@ -560,9 +560,9 @@ get_return_for_leaf(struct trace_iterator *iter, | |||
560 | * We need to consume the current entry to see | 560 | * We need to consume the current entry to see |
561 | * the next one. | 561 | * the next one. |
562 | */ | 562 | */ |
563 | ring_buffer_consume(iter->tr->buffer, iter->cpu, | 563 | ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, |
564 | NULL, NULL); | 564 | NULL, NULL); |
565 | event = ring_buffer_peek(iter->tr->buffer, iter->cpu, | 565 | event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu, |
566 | NULL, NULL); | 566 | NULL, NULL); |
567 | } | 567 | } |
568 | 568 | ||
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 443b25b43b4f..b19d065a28cb 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -33,6 +33,7 @@ enum { | |||
33 | static int trace_type __read_mostly; | 33 | static int trace_type __read_mostly; |
34 | 34 | ||
35 | static int save_flags; | 35 | static int save_flags; |
36 | static bool function_enabled; | ||
36 | 37 | ||
37 | static void stop_irqsoff_tracer(struct trace_array *tr, int graph); | 38 | static void stop_irqsoff_tracer(struct trace_array *tr, int graph); |
38 | static int start_irqsoff_tracer(struct trace_array *tr, int graph); | 39 | static int start_irqsoff_tracer(struct trace_array *tr, int graph); |
@@ -121,7 +122,7 @@ static int func_prolog_dec(struct trace_array *tr, | |||
121 | if (!irqs_disabled_flags(*flags)) | 122 | if (!irqs_disabled_flags(*flags)) |
122 | return 0; | 123 | return 0; |
123 | 124 | ||
124 | *data = tr->data[cpu]; | 125 | *data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
125 | disabled = atomic_inc_return(&(*data)->disabled); | 126 | disabled = atomic_inc_return(&(*data)->disabled); |
126 | 127 | ||
127 | if (likely(disabled == 1)) | 128 | if (likely(disabled == 1)) |
@@ -175,7 +176,7 @@ static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) | |||
175 | per_cpu(tracing_cpu, cpu) = 0; | 176 | per_cpu(tracing_cpu, cpu) = 0; |
176 | 177 | ||
177 | tracing_max_latency = 0; | 178 | tracing_max_latency = 0; |
178 | tracing_reset_online_cpus(irqsoff_trace); | 179 | tracing_reset_online_cpus(&irqsoff_trace->trace_buffer); |
179 | 180 | ||
180 | return start_irqsoff_tracer(irqsoff_trace, set); | 181 | return start_irqsoff_tracer(irqsoff_trace, set); |
181 | } | 182 | } |
@@ -380,7 +381,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
380 | if (per_cpu(tracing_cpu, cpu)) | 381 | if (per_cpu(tracing_cpu, cpu)) |
381 | return; | 382 | return; |
382 | 383 | ||
383 | data = tr->data[cpu]; | 384 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
384 | 385 | ||
385 | if (unlikely(!data) || atomic_read(&data->disabled)) | 386 | if (unlikely(!data) || atomic_read(&data->disabled)) |
386 | return; | 387 | return; |
@@ -418,7 +419,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
418 | if (!tracer_enabled) | 419 | if (!tracer_enabled) |
419 | return; | 420 | return; |
420 | 421 | ||
421 | data = tr->data[cpu]; | 422 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
422 | 423 | ||
423 | if (unlikely(!data) || | 424 | if (unlikely(!data) || |
424 | !data->critical_start || atomic_read(&data->disabled)) | 425 | !data->critical_start || atomic_read(&data->disabled)) |
@@ -528,15 +529,60 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) | |||
528 | } | 529 | } |
529 | #endif /* CONFIG_PREEMPT_TRACER */ | 530 | #endif /* CONFIG_PREEMPT_TRACER */ |
530 | 531 | ||
531 | static int start_irqsoff_tracer(struct trace_array *tr, int graph) | 532 | static int register_irqsoff_function(int graph, int set) |
532 | { | 533 | { |
533 | int ret = 0; | 534 | int ret; |
534 | 535 | ||
535 | if (!graph) | 536 | /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */ |
536 | ret = register_ftrace_function(&trace_ops); | 537 | if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION))) |
537 | else | 538 | return 0; |
539 | |||
540 | if (graph) | ||
538 | ret = register_ftrace_graph(&irqsoff_graph_return, | 541 | ret = register_ftrace_graph(&irqsoff_graph_return, |
539 | &irqsoff_graph_entry); | 542 | &irqsoff_graph_entry); |
543 | else | ||
544 | ret = register_ftrace_function(&trace_ops); | ||
545 | |||
546 | if (!ret) | ||
547 | function_enabled = true; | ||
548 | |||
549 | return ret; | ||
550 | } | ||
551 | |||
552 | static void unregister_irqsoff_function(int graph) | ||
553 | { | ||
554 | if (!function_enabled) | ||
555 | return; | ||
556 | |||
557 | if (graph) | ||
558 | unregister_ftrace_graph(); | ||
559 | else | ||
560 | unregister_ftrace_function(&trace_ops); | ||
561 | |||
562 | function_enabled = false; | ||
563 | } | ||
564 | |||
565 | static void irqsoff_function_set(int set) | ||
566 | { | ||
567 | if (set) | ||
568 | register_irqsoff_function(is_graph(), 1); | ||
569 | else | ||
570 | unregister_irqsoff_function(is_graph()); | ||
571 | } | ||
572 | |||
573 | static int irqsoff_flag_changed(struct tracer *tracer, u32 mask, int set) | ||
574 | { | ||
575 | if (mask & TRACE_ITER_FUNCTION) | ||
576 | irqsoff_function_set(set); | ||
577 | |||
578 | return trace_keep_overwrite(tracer, mask, set); | ||
579 | } | ||
580 | |||
581 | static int start_irqsoff_tracer(struct trace_array *tr, int graph) | ||
582 | { | ||
583 | int ret; | ||
584 | |||
585 | ret = register_irqsoff_function(graph, 0); | ||
540 | 586 | ||
541 | if (!ret && tracing_is_enabled()) | 587 | if (!ret && tracing_is_enabled()) |
542 | tracer_enabled = 1; | 588 | tracer_enabled = 1; |
@@ -550,10 +596,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph) | |||
550 | { | 596 | { |
551 | tracer_enabled = 0; | 597 | tracer_enabled = 0; |
552 | 598 | ||
553 | if (!graph) | 599 | unregister_irqsoff_function(graph); |
554 | unregister_ftrace_function(&trace_ops); | ||
555 | else | ||
556 | unregister_ftrace_graph(); | ||
557 | } | 600 | } |
558 | 601 | ||
559 | static void __irqsoff_tracer_init(struct trace_array *tr) | 602 | static void __irqsoff_tracer_init(struct trace_array *tr) |
@@ -561,14 +604,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr) | |||
561 | save_flags = trace_flags; | 604 | save_flags = trace_flags; |
562 | 605 | ||
563 | /* non overwrite screws up the latency tracers */ | 606 | /* non overwrite screws up the latency tracers */ |
564 | set_tracer_flag(TRACE_ITER_OVERWRITE, 1); | 607 | set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1); |
565 | set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1); | 608 | set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1); |
566 | 609 | ||
567 | tracing_max_latency = 0; | 610 | tracing_max_latency = 0; |
568 | irqsoff_trace = tr; | 611 | irqsoff_trace = tr; |
569 | /* make sure that the tracer is visible */ | 612 | /* make sure that the tracer is visible */ |
570 | smp_wmb(); | 613 | smp_wmb(); |
571 | tracing_reset_online_cpus(tr); | 614 | tracing_reset_online_cpus(&tr->trace_buffer); |
572 | 615 | ||
573 | if (start_irqsoff_tracer(tr, is_graph())) | 616 | if (start_irqsoff_tracer(tr, is_graph())) |
574 | printk(KERN_ERR "failed to start irqsoff tracer\n"); | 617 | printk(KERN_ERR "failed to start irqsoff tracer\n"); |
@@ -581,8 +624,8 @@ static void irqsoff_tracer_reset(struct trace_array *tr) | |||
581 | 624 | ||
582 | stop_irqsoff_tracer(tr, is_graph()); | 625 | stop_irqsoff_tracer(tr, is_graph()); |
583 | 626 | ||
584 | set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag); | 627 | set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag); |
585 | set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag); | 628 | set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); |
586 | } | 629 | } |
587 | 630 | ||
588 | static void irqsoff_tracer_start(struct trace_array *tr) | 631 | static void irqsoff_tracer_start(struct trace_array *tr) |
@@ -615,7 +658,7 @@ static struct tracer irqsoff_tracer __read_mostly = | |||
615 | .print_line = irqsoff_print_line, | 658 | .print_line = irqsoff_print_line, |
616 | .flags = &tracer_flags, | 659 | .flags = &tracer_flags, |
617 | .set_flag = irqsoff_set_flag, | 660 | .set_flag = irqsoff_set_flag, |
618 | .flag_changed = trace_keep_overwrite, | 661 | .flag_changed = irqsoff_flag_changed, |
619 | #ifdef CONFIG_FTRACE_SELFTEST | 662 | #ifdef CONFIG_FTRACE_SELFTEST |
620 | .selftest = trace_selftest_startup_irqsoff, | 663 | .selftest = trace_selftest_startup_irqsoff, |
621 | #endif | 664 | #endif |
@@ -649,7 +692,7 @@ static struct tracer preemptoff_tracer __read_mostly = | |||
649 | .print_line = irqsoff_print_line, | 692 | .print_line = irqsoff_print_line, |
650 | .flags = &tracer_flags, | 693 | .flags = &tracer_flags, |
651 | .set_flag = irqsoff_set_flag, | 694 | .set_flag = irqsoff_set_flag, |
652 | .flag_changed = trace_keep_overwrite, | 695 | .flag_changed = irqsoff_flag_changed, |
653 | #ifdef CONFIG_FTRACE_SELFTEST | 696 | #ifdef CONFIG_FTRACE_SELFTEST |
654 | .selftest = trace_selftest_startup_preemptoff, | 697 | .selftest = trace_selftest_startup_preemptoff, |
655 | #endif | 698 | #endif |
@@ -685,7 +728,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly = | |||
685 | .print_line = irqsoff_print_line, | 728 | .print_line = irqsoff_print_line, |
686 | .flags = &tracer_flags, | 729 | .flags = &tracer_flags, |
687 | .set_flag = irqsoff_set_flag, | 730 | .set_flag = irqsoff_set_flag, |
688 | .flag_changed = trace_keep_overwrite, | 731 | .flag_changed = irqsoff_flag_changed, |
689 | #ifdef CONFIG_FTRACE_SELFTEST | 732 | #ifdef CONFIG_FTRACE_SELFTEST |
690 | .selftest = trace_selftest_startup_preemptirqsoff, | 733 | .selftest = trace_selftest_startup_preemptirqsoff, |
691 | #endif | 734 | #endif |
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 3c5c5dfea0b3..bd90e1b06088 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c | |||
@@ -26,7 +26,7 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) | |||
26 | trace_init_global_iter(&iter); | 26 | trace_init_global_iter(&iter); |
27 | 27 | ||
28 | for_each_tracing_cpu(cpu) { | 28 | for_each_tracing_cpu(cpu) { |
29 | atomic_inc(&iter.tr->data[cpu]->disabled); | 29 | atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); |
30 | } | 30 | } |
31 | 31 | ||
32 | old_userobj = trace_flags; | 32 | old_userobj = trace_flags; |
@@ -43,17 +43,17 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) | |||
43 | iter.iter_flags |= TRACE_FILE_LAT_FMT; | 43 | iter.iter_flags |= TRACE_FILE_LAT_FMT; |
44 | iter.pos = -1; | 44 | iter.pos = -1; |
45 | 45 | ||
46 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | 46 | if (cpu_file == RING_BUFFER_ALL_CPUS) { |
47 | for_each_tracing_cpu(cpu) { | 47 | for_each_tracing_cpu(cpu) { |
48 | iter.buffer_iter[cpu] = | 48 | iter.buffer_iter[cpu] = |
49 | ring_buffer_read_prepare(iter.tr->buffer, cpu); | 49 | ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu); |
50 | ring_buffer_read_start(iter.buffer_iter[cpu]); | 50 | ring_buffer_read_start(iter.buffer_iter[cpu]); |
51 | tracing_iter_reset(&iter, cpu); | 51 | tracing_iter_reset(&iter, cpu); |
52 | } | 52 | } |
53 | } else { | 53 | } else { |
54 | iter.cpu_file = cpu_file; | 54 | iter.cpu_file = cpu_file; |
55 | iter.buffer_iter[cpu_file] = | 55 | iter.buffer_iter[cpu_file] = |
56 | ring_buffer_read_prepare(iter.tr->buffer, cpu_file); | 56 | ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file); |
57 | ring_buffer_read_start(iter.buffer_iter[cpu_file]); | 57 | ring_buffer_read_start(iter.buffer_iter[cpu_file]); |
58 | tracing_iter_reset(&iter, cpu_file); | 58 | tracing_iter_reset(&iter, cpu_file); |
59 | } | 59 | } |
@@ -83,7 +83,7 @@ out: | |||
83 | trace_flags = old_userobj; | 83 | trace_flags = old_userobj; |
84 | 84 | ||
85 | for_each_tracing_cpu(cpu) { | 85 | for_each_tracing_cpu(cpu) { |
86 | atomic_dec(&iter.tr->data[cpu]->disabled); | 86 | atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); |
87 | } | 87 | } |
88 | 88 | ||
89 | for_each_tracing_cpu(cpu) | 89 | for_each_tracing_cpu(cpu) |
@@ -115,7 +115,7 @@ static int kdb_ftdump(int argc, const char **argv) | |||
115 | !cpu_online(cpu_file)) | 115 | !cpu_online(cpu_file)) |
116 | return KDB_BADINT; | 116 | return KDB_BADINT; |
117 | } else { | 117 | } else { |
118 | cpu_file = TRACE_PIPE_ALL_CPU; | 118 | cpu_file = RING_BUFFER_ALL_CPUS; |
119 | } | 119 | } |
120 | 120 | ||
121 | kdb_trap_printk++; | 121 | kdb_trap_printk++; |
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index fd3c8aae55e5..a5e8f4878bfa 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c | |||
@@ -31,7 +31,7 @@ static void mmio_reset_data(struct trace_array *tr) | |||
31 | overrun_detected = false; | 31 | overrun_detected = false; |
32 | prev_overruns = 0; | 32 | prev_overruns = 0; |
33 | 33 | ||
34 | tracing_reset_online_cpus(tr); | 34 | tracing_reset_online_cpus(&tr->trace_buffer); |
35 | } | 35 | } |
36 | 36 | ||
37 | static int mmio_trace_init(struct trace_array *tr) | 37 | static int mmio_trace_init(struct trace_array *tr) |
@@ -128,7 +128,7 @@ static void mmio_close(struct trace_iterator *iter) | |||
128 | static unsigned long count_overruns(struct trace_iterator *iter) | 128 | static unsigned long count_overruns(struct trace_iterator *iter) |
129 | { | 129 | { |
130 | unsigned long cnt = atomic_xchg(&dropped_count, 0); | 130 | unsigned long cnt = atomic_xchg(&dropped_count, 0); |
131 | unsigned long over = ring_buffer_overruns(iter->tr->buffer); | 131 | unsigned long over = ring_buffer_overruns(iter->trace_buffer->buffer); |
132 | 132 | ||
133 | if (over > prev_overruns) | 133 | if (over > prev_overruns) |
134 | cnt += over - prev_overruns; | 134 | cnt += over - prev_overruns; |
@@ -309,7 +309,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, | |||
309 | struct mmiotrace_rw *rw) | 309 | struct mmiotrace_rw *rw) |
310 | { | 310 | { |
311 | struct ftrace_event_call *call = &event_mmiotrace_rw; | 311 | struct ftrace_event_call *call = &event_mmiotrace_rw; |
312 | struct ring_buffer *buffer = tr->buffer; | 312 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
313 | struct ring_buffer_event *event; | 313 | struct ring_buffer_event *event; |
314 | struct trace_mmiotrace_rw *entry; | 314 | struct trace_mmiotrace_rw *entry; |
315 | int pc = preempt_count(); | 315 | int pc = preempt_count(); |
@@ -330,7 +330,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, | |||
330 | void mmio_trace_rw(struct mmiotrace_rw *rw) | 330 | void mmio_trace_rw(struct mmiotrace_rw *rw) |
331 | { | 331 | { |
332 | struct trace_array *tr = mmio_trace_array; | 332 | struct trace_array *tr = mmio_trace_array; |
333 | struct trace_array_cpu *data = tr->data[smp_processor_id()]; | 333 | struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); |
334 | __trace_mmiotrace_rw(tr, data, rw); | 334 | __trace_mmiotrace_rw(tr, data, rw); |
335 | } | 335 | } |
336 | 336 | ||
@@ -339,7 +339,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, | |||
339 | struct mmiotrace_map *map) | 339 | struct mmiotrace_map *map) |
340 | { | 340 | { |
341 | struct ftrace_event_call *call = &event_mmiotrace_map; | 341 | struct ftrace_event_call *call = &event_mmiotrace_map; |
342 | struct ring_buffer *buffer = tr->buffer; | 342 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
343 | struct ring_buffer_event *event; | 343 | struct ring_buffer_event *event; |
344 | struct trace_mmiotrace_map *entry; | 344 | struct trace_mmiotrace_map *entry; |
345 | int pc = preempt_count(); | 345 | int pc = preempt_count(); |
@@ -363,7 +363,7 @@ void mmio_trace_mapping(struct mmiotrace_map *map) | |||
363 | struct trace_array_cpu *data; | 363 | struct trace_array_cpu *data; |
364 | 364 | ||
365 | preempt_disable(); | 365 | preempt_disable(); |
366 | data = tr->data[smp_processor_id()]; | 366 | data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id()); |
367 | __trace_mmiotrace_map(tr, data, map); | 367 | __trace_mmiotrace_map(tr, data, map); |
368 | preempt_enable(); | 368 | preempt_enable(); |
369 | } | 369 | } |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 697e88d13907..bb922d9ee51b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -14,7 +14,7 @@ | |||
14 | /* must be a power of 2 */ | 14 | /* must be a power of 2 */ |
15 | #define EVENT_HASHSIZE 128 | 15 | #define EVENT_HASHSIZE 128 |
16 | 16 | ||
17 | DECLARE_RWSEM(trace_event_mutex); | 17 | DECLARE_RWSEM(trace_event_sem); |
18 | 18 | ||
19 | static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; | 19 | static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; |
20 | 20 | ||
@@ -37,6 +37,22 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s) | |||
37 | return ret; | 37 | return ret; |
38 | } | 38 | } |
39 | 39 | ||
40 | enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) | ||
41 | { | ||
42 | struct trace_seq *s = &iter->seq; | ||
43 | struct trace_entry *entry = iter->ent; | ||
44 | struct bputs_entry *field; | ||
45 | int ret; | ||
46 | |||
47 | trace_assign_type(field, entry); | ||
48 | |||
49 | ret = trace_seq_puts(s, field->str); | ||
50 | if (!ret) | ||
51 | return TRACE_TYPE_PARTIAL_LINE; | ||
52 | |||
53 | return TRACE_TYPE_HANDLED; | ||
54 | } | ||
55 | |||
40 | enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) | 56 | enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) |
41 | { | 57 | { |
42 | struct trace_seq *s = &iter->seq; | 58 | struct trace_seq *s = &iter->seq; |
@@ -397,6 +413,32 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) | |||
397 | } | 413 | } |
398 | EXPORT_SYMBOL(ftrace_print_hex_seq); | 414 | EXPORT_SYMBOL(ftrace_print_hex_seq); |
399 | 415 | ||
416 | int ftrace_raw_output_prep(struct trace_iterator *iter, | ||
417 | struct trace_event *trace_event) | ||
418 | { | ||
419 | struct ftrace_event_call *event; | ||
420 | struct trace_seq *s = &iter->seq; | ||
421 | struct trace_seq *p = &iter->tmp_seq; | ||
422 | struct trace_entry *entry; | ||
423 | int ret; | ||
424 | |||
425 | event = container_of(trace_event, struct ftrace_event_call, event); | ||
426 | entry = iter->ent; | ||
427 | |||
428 | if (entry->type != event->event.type) { | ||
429 | WARN_ON_ONCE(1); | ||
430 | return TRACE_TYPE_UNHANDLED; | ||
431 | } | ||
432 | |||
433 | trace_seq_init(p); | ||
434 | ret = trace_seq_printf(s, "%s: ", event->name); | ||
435 | if (!ret) | ||
436 | return TRACE_TYPE_PARTIAL_LINE; | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | EXPORT_SYMBOL(ftrace_raw_output_prep); | ||
441 | |||
400 | #ifdef CONFIG_KRETPROBES | 442 | #ifdef CONFIG_KRETPROBES |
401 | static inline const char *kretprobed(const char *name) | 443 | static inline const char *kretprobed(const char *name) |
402 | { | 444 | { |
@@ -617,7 +659,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) | |||
617 | { | 659 | { |
618 | unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE; | 660 | unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE; |
619 | unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; | 661 | unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; |
620 | unsigned long long abs_ts = iter->ts - iter->tr->time_start; | 662 | unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start; |
621 | unsigned long long rel_ts = next_ts - iter->ts; | 663 | unsigned long long rel_ts = next_ts - iter->ts; |
622 | struct trace_seq *s = &iter->seq; | 664 | struct trace_seq *s = &iter->seq; |
623 | 665 | ||
@@ -783,12 +825,12 @@ static int trace_search_list(struct list_head **list) | |||
783 | 825 | ||
784 | void trace_event_read_lock(void) | 826 | void trace_event_read_lock(void) |
785 | { | 827 | { |
786 | down_read(&trace_event_mutex); | 828 | down_read(&trace_event_sem); |
787 | } | 829 | } |
788 | 830 | ||
789 | void trace_event_read_unlock(void) | 831 | void trace_event_read_unlock(void) |
790 | { | 832 | { |
791 | up_read(&trace_event_mutex); | 833 | up_read(&trace_event_sem); |
792 | } | 834 | } |
793 | 835 | ||
794 | /** | 836 | /** |
@@ -811,7 +853,7 @@ int register_ftrace_event(struct trace_event *event) | |||
811 | unsigned key; | 853 | unsigned key; |
812 | int ret = 0; | 854 | int ret = 0; |
813 | 855 | ||
814 | down_write(&trace_event_mutex); | 856 | down_write(&trace_event_sem); |
815 | 857 | ||
816 | if (WARN_ON(!event)) | 858 | if (WARN_ON(!event)) |
817 | goto out; | 859 | goto out; |
@@ -866,14 +908,14 @@ int register_ftrace_event(struct trace_event *event) | |||
866 | 908 | ||
867 | ret = event->type; | 909 | ret = event->type; |
868 | out: | 910 | out: |
869 | up_write(&trace_event_mutex); | 911 | up_write(&trace_event_sem); |
870 | 912 | ||
871 | return ret; | 913 | return ret; |
872 | } | 914 | } |
873 | EXPORT_SYMBOL_GPL(register_ftrace_event); | 915 | EXPORT_SYMBOL_GPL(register_ftrace_event); |
874 | 916 | ||
875 | /* | 917 | /* |
876 | * Used by module code with the trace_event_mutex held for write. | 918 | * Used by module code with the trace_event_sem held for write. |
877 | */ | 919 | */ |
878 | int __unregister_ftrace_event(struct trace_event *event) | 920 | int __unregister_ftrace_event(struct trace_event *event) |
879 | { | 921 | { |
@@ -888,9 +930,9 @@ int __unregister_ftrace_event(struct trace_event *event) | |||
888 | */ | 930 | */ |
889 | int unregister_ftrace_event(struct trace_event *event) | 931 | int unregister_ftrace_event(struct trace_event *event) |
890 | { | 932 | { |
891 | down_write(&trace_event_mutex); | 933 | down_write(&trace_event_sem); |
892 | __unregister_ftrace_event(event); | 934 | __unregister_ftrace_event(event); |
893 | up_write(&trace_event_mutex); | 935 | up_write(&trace_event_sem); |
894 | 936 | ||
895 | return 0; | 937 | return 0; |
896 | } | 938 | } |
@@ -1217,6 +1259,64 @@ static struct trace_event trace_user_stack_event = { | |||
1217 | .funcs = &trace_user_stack_funcs, | 1259 | .funcs = &trace_user_stack_funcs, |
1218 | }; | 1260 | }; |
1219 | 1261 | ||
1262 | /* TRACE_BPUTS */ | ||
1263 | static enum print_line_t | ||
1264 | trace_bputs_print(struct trace_iterator *iter, int flags, | ||
1265 | struct trace_event *event) | ||
1266 | { | ||
1267 | struct trace_entry *entry = iter->ent; | ||
1268 | struct trace_seq *s = &iter->seq; | ||
1269 | struct bputs_entry *field; | ||
1270 | |||
1271 | trace_assign_type(field, entry); | ||
1272 | |||
1273 | if (!seq_print_ip_sym(s, field->ip, flags)) | ||
1274 | goto partial; | ||
1275 | |||
1276 | if (!trace_seq_puts(s, ": ")) | ||
1277 | goto partial; | ||
1278 | |||
1279 | if (!trace_seq_puts(s, field->str)) | ||
1280 | goto partial; | ||
1281 | |||
1282 | return TRACE_TYPE_HANDLED; | ||
1283 | |||
1284 | partial: | ||
1285 | return TRACE_TYPE_PARTIAL_LINE; | ||
1286 | } | ||
1287 | |||
1288 | |||
1289 | static enum print_line_t | ||
1290 | trace_bputs_raw(struct trace_iterator *iter, int flags, | ||
1291 | struct trace_event *event) | ||
1292 | { | ||
1293 | struct bputs_entry *field; | ||
1294 | struct trace_seq *s = &iter->seq; | ||
1295 | |||
1296 | trace_assign_type(field, iter->ent); | ||
1297 | |||
1298 | if (!trace_seq_printf(s, ": %lx : ", field->ip)) | ||
1299 | goto partial; | ||
1300 | |||
1301 | if (!trace_seq_puts(s, field->str)) | ||
1302 | goto partial; | ||
1303 | |||
1304 | return TRACE_TYPE_HANDLED; | ||
1305 | |||
1306 | partial: | ||
1307 | return TRACE_TYPE_PARTIAL_LINE; | ||
1308 | } | ||
1309 | |||
1310 | static struct trace_event_functions trace_bputs_funcs = { | ||
1311 | .trace = trace_bputs_print, | ||
1312 | .raw = trace_bputs_raw, | ||
1313 | }; | ||
1314 | |||
1315 | static struct trace_event trace_bputs_event = { | ||
1316 | .type = TRACE_BPUTS, | ||
1317 | .funcs = &trace_bputs_funcs, | ||
1318 | }; | ||
1319 | |||
1220 | /* TRACE_BPRINT */ | 1320 | /* TRACE_BPRINT */ |
1221 | static enum print_line_t | 1321 | static enum print_line_t |
1222 | trace_bprint_print(struct trace_iterator *iter, int flags, | 1322 | trace_bprint_print(struct trace_iterator *iter, int flags, |
@@ -1329,6 +1429,7 @@ static struct trace_event *events[] __initdata = { | |||
1329 | &trace_wake_event, | 1429 | &trace_wake_event, |
1330 | &trace_stack_event, | 1430 | &trace_stack_event, |
1331 | &trace_user_stack_event, | 1431 | &trace_user_stack_event, |
1432 | &trace_bputs_event, | ||
1332 | &trace_bprint_event, | 1433 | &trace_bprint_event, |
1333 | &trace_print_event, | 1434 | &trace_print_event, |
1334 | NULL | 1435 | NULL |
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index c038eba0492b..127a9d8c8357 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h | |||
@@ -5,6 +5,8 @@ | |||
5 | #include "trace.h" | 5 | #include "trace.h" |
6 | 6 | ||
7 | extern enum print_line_t | 7 | extern enum print_line_t |
8 | trace_print_bputs_msg_only(struct trace_iterator *iter); | ||
9 | extern enum print_line_t | ||
8 | trace_print_bprintk_msg_only(struct trace_iterator *iter); | 10 | trace_print_bprintk_msg_only(struct trace_iterator *iter); |
9 | extern enum print_line_t | 11 | extern enum print_line_t |
10 | trace_print_printk_msg_only(struct trace_iterator *iter); | 12 | trace_print_printk_msg_only(struct trace_iterator *iter); |
@@ -31,7 +33,7 @@ trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); | |||
31 | 33 | ||
32 | /* used by module unregistering */ | 34 | /* used by module unregistering */ |
33 | extern int __unregister_ftrace_event(struct trace_event *event); | 35 | extern int __unregister_ftrace_event(struct trace_event *event); |
34 | extern struct rw_semaphore trace_event_mutex; | 36 | extern struct rw_semaphore trace_event_sem; |
35 | 37 | ||
36 | #define MAX_MEMHEX_BYTES 8 | 38 | #define MAX_MEMHEX_BYTES 8 |
37 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) | 39 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) |
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 3374c792ccd8..4e98e3b257a3 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c | |||
@@ -28,7 +28,7 @@ tracing_sched_switch_trace(struct trace_array *tr, | |||
28 | unsigned long flags, int pc) | 28 | unsigned long flags, int pc) |
29 | { | 29 | { |
30 | struct ftrace_event_call *call = &event_context_switch; | 30 | struct ftrace_event_call *call = &event_context_switch; |
31 | struct ring_buffer *buffer = tr->buffer; | 31 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
32 | struct ring_buffer_event *event; | 32 | struct ring_buffer_event *event; |
33 | struct ctx_switch_entry *entry; | 33 | struct ctx_switch_entry *entry; |
34 | 34 | ||
@@ -69,7 +69,7 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n | |||
69 | pc = preempt_count(); | 69 | pc = preempt_count(); |
70 | local_irq_save(flags); | 70 | local_irq_save(flags); |
71 | cpu = raw_smp_processor_id(); | 71 | cpu = raw_smp_processor_id(); |
72 | data = ctx_trace->data[cpu]; | 72 | data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu); |
73 | 73 | ||
74 | if (likely(!atomic_read(&data->disabled))) | 74 | if (likely(!atomic_read(&data->disabled))) |
75 | tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc); | 75 | tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc); |
@@ -86,7 +86,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, | |||
86 | struct ftrace_event_call *call = &event_wakeup; | 86 | struct ftrace_event_call *call = &event_wakeup; |
87 | struct ring_buffer_event *event; | 87 | struct ring_buffer_event *event; |
88 | struct ctx_switch_entry *entry; | 88 | struct ctx_switch_entry *entry; |
89 | struct ring_buffer *buffer = tr->buffer; | 89 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
90 | 90 | ||
91 | event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, | 91 | event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, |
92 | sizeof(*entry), flags, pc); | 92 | sizeof(*entry), flags, pc); |
@@ -123,7 +123,7 @@ probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) | |||
123 | pc = preempt_count(); | 123 | pc = preempt_count(); |
124 | local_irq_save(flags); | 124 | local_irq_save(flags); |
125 | cpu = raw_smp_processor_id(); | 125 | cpu = raw_smp_processor_id(); |
126 | data = ctx_trace->data[cpu]; | 126 | data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu); |
127 | 127 | ||
128 | if (likely(!atomic_read(&data->disabled))) | 128 | if (likely(!atomic_read(&data->disabled))) |
129 | tracing_sched_wakeup_trace(ctx_trace, wakee, current, | 129 | tracing_sched_wakeup_trace(ctx_trace, wakee, current, |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index fde652c9a511..fee77e15d815 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -37,6 +37,7 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace); | |||
37 | static void wakeup_graph_return(struct ftrace_graph_ret *trace); | 37 | static void wakeup_graph_return(struct ftrace_graph_ret *trace); |
38 | 38 | ||
39 | static int save_flags; | 39 | static int save_flags; |
40 | static bool function_enabled; | ||
40 | 41 | ||
41 | #define TRACE_DISPLAY_GRAPH 1 | 42 | #define TRACE_DISPLAY_GRAPH 1 |
42 | 43 | ||
@@ -89,7 +90,7 @@ func_prolog_preempt_disable(struct trace_array *tr, | |||
89 | if (cpu != wakeup_current_cpu) | 90 | if (cpu != wakeup_current_cpu) |
90 | goto out_enable; | 91 | goto out_enable; |
91 | 92 | ||
92 | *data = tr->data[cpu]; | 93 | *data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
93 | disabled = atomic_inc_return(&(*data)->disabled); | 94 | disabled = atomic_inc_return(&(*data)->disabled); |
94 | if (unlikely(disabled != 1)) | 95 | if (unlikely(disabled != 1)) |
95 | goto out; | 96 | goto out; |
@@ -134,15 +135,60 @@ static struct ftrace_ops trace_ops __read_mostly = | |||
134 | }; | 135 | }; |
135 | #endif /* CONFIG_FUNCTION_TRACER */ | 136 | #endif /* CONFIG_FUNCTION_TRACER */ |
136 | 137 | ||
137 | static int start_func_tracer(int graph) | 138 | static int register_wakeup_function(int graph, int set) |
138 | { | 139 | { |
139 | int ret; | 140 | int ret; |
140 | 141 | ||
141 | if (!graph) | 142 | /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */ |
142 | ret = register_ftrace_function(&trace_ops); | 143 | if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION))) |
143 | else | 144 | return 0; |
145 | |||
146 | if (graph) | ||
144 | ret = register_ftrace_graph(&wakeup_graph_return, | 147 | ret = register_ftrace_graph(&wakeup_graph_return, |
145 | &wakeup_graph_entry); | 148 | &wakeup_graph_entry); |
149 | else | ||
150 | ret = register_ftrace_function(&trace_ops); | ||
151 | |||
152 | if (!ret) | ||
153 | function_enabled = true; | ||
154 | |||
155 | return ret; | ||
156 | } | ||
157 | |||
158 | static void unregister_wakeup_function(int graph) | ||
159 | { | ||
160 | if (!function_enabled) | ||
161 | return; | ||
162 | |||
163 | if (graph) | ||
164 | unregister_ftrace_graph(); | ||
165 | else | ||
166 | unregister_ftrace_function(&trace_ops); | ||
167 | |||
168 | function_enabled = false; | ||
169 | } | ||
170 | |||
171 | static void wakeup_function_set(int set) | ||
172 | { | ||
173 | if (set) | ||
174 | register_wakeup_function(is_graph(), 1); | ||
175 | else | ||
176 | unregister_wakeup_function(is_graph()); | ||
177 | } | ||
178 | |||
179 | static int wakeup_flag_changed(struct tracer *tracer, u32 mask, int set) | ||
180 | { | ||
181 | if (mask & TRACE_ITER_FUNCTION) | ||
182 | wakeup_function_set(set); | ||
183 | |||
184 | return trace_keep_overwrite(tracer, mask, set); | ||
185 | } | ||
186 | |||
187 | static int start_func_tracer(int graph) | ||
188 | { | ||
189 | int ret; | ||
190 | |||
191 | ret = register_wakeup_function(graph, 0); | ||
146 | 192 | ||
147 | if (!ret && tracing_is_enabled()) | 193 | if (!ret && tracing_is_enabled()) |
148 | tracer_enabled = 1; | 194 | tracer_enabled = 1; |
@@ -156,10 +202,7 @@ static void stop_func_tracer(int graph) | |||
156 | { | 202 | { |
157 | tracer_enabled = 0; | 203 | tracer_enabled = 0; |
158 | 204 | ||
159 | if (!graph) | 205 | unregister_wakeup_function(graph); |
160 | unregister_ftrace_function(&trace_ops); | ||
161 | else | ||
162 | unregister_ftrace_graph(); | ||
163 | } | 206 | } |
164 | 207 | ||
165 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 208 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
@@ -353,7 +396,7 @@ probe_wakeup_sched_switch(void *ignore, | |||
353 | 396 | ||
354 | /* disable local data, not wakeup_cpu data */ | 397 | /* disable local data, not wakeup_cpu data */ |
355 | cpu = raw_smp_processor_id(); | 398 | cpu = raw_smp_processor_id(); |
356 | disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); | 399 | disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); |
357 | if (likely(disabled != 1)) | 400 | if (likely(disabled != 1)) |
358 | goto out; | 401 | goto out; |
359 | 402 | ||
@@ -365,7 +408,7 @@ probe_wakeup_sched_switch(void *ignore, | |||
365 | goto out_unlock; | 408 | goto out_unlock; |
366 | 409 | ||
367 | /* The task we are waiting for is waking up */ | 410 | /* The task we are waiting for is waking up */ |
368 | data = wakeup_trace->data[wakeup_cpu]; | 411 | data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); |
369 | 412 | ||
370 | __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); | 413 | __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); |
371 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); | 414 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); |
@@ -387,7 +430,7 @@ out_unlock: | |||
387 | arch_spin_unlock(&wakeup_lock); | 430 | arch_spin_unlock(&wakeup_lock); |
388 | local_irq_restore(flags); | 431 | local_irq_restore(flags); |
389 | out: | 432 | out: |
390 | atomic_dec(&wakeup_trace->data[cpu]->disabled); | 433 | atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); |
391 | } | 434 | } |
392 | 435 | ||
393 | static void __wakeup_reset(struct trace_array *tr) | 436 | static void __wakeup_reset(struct trace_array *tr) |
@@ -405,7 +448,7 @@ static void wakeup_reset(struct trace_array *tr) | |||
405 | { | 448 | { |
406 | unsigned long flags; | 449 | unsigned long flags; |
407 | 450 | ||
408 | tracing_reset_online_cpus(tr); | 451 | tracing_reset_online_cpus(&tr->trace_buffer); |
409 | 452 | ||
410 | local_irq_save(flags); | 453 | local_irq_save(flags); |
411 | arch_spin_lock(&wakeup_lock); | 454 | arch_spin_lock(&wakeup_lock); |
@@ -435,7 +478,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) | |||
435 | return; | 478 | return; |
436 | 479 | ||
437 | pc = preempt_count(); | 480 | pc = preempt_count(); |
438 | disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); | 481 | disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); |
439 | if (unlikely(disabled != 1)) | 482 | if (unlikely(disabled != 1)) |
440 | goto out; | 483 | goto out; |
441 | 484 | ||
@@ -458,7 +501,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) | |||
458 | 501 | ||
459 | local_save_flags(flags); | 502 | local_save_flags(flags); |
460 | 503 | ||
461 | data = wakeup_trace->data[wakeup_cpu]; | 504 | data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu); |
462 | data->preempt_timestamp = ftrace_now(cpu); | 505 | data->preempt_timestamp = ftrace_now(cpu); |
463 | tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); | 506 | tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); |
464 | 507 | ||
@@ -472,7 +515,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) | |||
472 | out_locked: | 515 | out_locked: |
473 | arch_spin_unlock(&wakeup_lock); | 516 | arch_spin_unlock(&wakeup_lock); |
474 | out: | 517 | out: |
475 | atomic_dec(&wakeup_trace->data[cpu]->disabled); | 518 | atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled); |
476 | } | 519 | } |
477 | 520 | ||
478 | static void start_wakeup_tracer(struct trace_array *tr) | 521 | static void start_wakeup_tracer(struct trace_array *tr) |
@@ -543,8 +586,8 @@ static int __wakeup_tracer_init(struct trace_array *tr) | |||
543 | save_flags = trace_flags; | 586 | save_flags = trace_flags; |
544 | 587 | ||
545 | /* non overwrite screws up the latency tracers */ | 588 | /* non overwrite screws up the latency tracers */ |
546 | set_tracer_flag(TRACE_ITER_OVERWRITE, 1); | 589 | set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1); |
547 | set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1); | 590 | set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1); |
548 | 591 | ||
549 | tracing_max_latency = 0; | 592 | tracing_max_latency = 0; |
550 | wakeup_trace = tr; | 593 | wakeup_trace = tr; |
@@ -573,8 +616,8 @@ static void wakeup_tracer_reset(struct trace_array *tr) | |||
573 | /* make sure we put back any tasks we are tracing */ | 616 | /* make sure we put back any tasks we are tracing */ |
574 | wakeup_reset(tr); | 617 | wakeup_reset(tr); |
575 | 618 | ||
576 | set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag); | 619 | set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag); |
577 | set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag); | 620 | set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); |
578 | } | 621 | } |
579 | 622 | ||
580 | static void wakeup_tracer_start(struct trace_array *tr) | 623 | static void wakeup_tracer_start(struct trace_array *tr) |
@@ -600,7 +643,7 @@ static struct tracer wakeup_tracer __read_mostly = | |||
600 | .print_line = wakeup_print_line, | 643 | .print_line = wakeup_print_line, |
601 | .flags = &tracer_flags, | 644 | .flags = &tracer_flags, |
602 | .set_flag = wakeup_set_flag, | 645 | .set_flag = wakeup_set_flag, |
603 | .flag_changed = trace_keep_overwrite, | 646 | .flag_changed = wakeup_flag_changed, |
604 | #ifdef CONFIG_FTRACE_SELFTEST | 647 | #ifdef CONFIG_FTRACE_SELFTEST |
605 | .selftest = trace_selftest_startup_wakeup, | 648 | .selftest = trace_selftest_startup_wakeup, |
606 | #endif | 649 | #endif |
@@ -622,7 +665,7 @@ static struct tracer wakeup_rt_tracer __read_mostly = | |||
622 | .print_line = wakeup_print_line, | 665 | .print_line = wakeup_print_line, |
623 | .flags = &tracer_flags, | 666 | .flags = &tracer_flags, |
624 | .set_flag = wakeup_set_flag, | 667 | .set_flag = wakeup_set_flag, |
625 | .flag_changed = trace_keep_overwrite, | 668 | .flag_changed = wakeup_flag_changed, |
626 | #ifdef CONFIG_FTRACE_SELFTEST | 669 | #ifdef CONFIG_FTRACE_SELFTEST |
627 | .selftest = trace_selftest_startup_wakeup, | 670 | .selftest = trace_selftest_startup_wakeup, |
628 | #endif | 671 | #endif |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 51c819c12c29..55e2cf66967b 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -21,13 +21,13 @@ static inline int trace_valid_entry(struct trace_entry *entry) | |||
21 | return 0; | 21 | return 0; |
22 | } | 22 | } |
23 | 23 | ||
24 | static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) | 24 | static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) |
25 | { | 25 | { |
26 | struct ring_buffer_event *event; | 26 | struct ring_buffer_event *event; |
27 | struct trace_entry *entry; | 27 | struct trace_entry *entry; |
28 | unsigned int loops = 0; | 28 | unsigned int loops = 0; |
29 | 29 | ||
30 | while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) { | 30 | while ((event = ring_buffer_consume(buf->buffer, cpu, NULL, NULL))) { |
31 | entry = ring_buffer_event_data(event); | 31 | entry = ring_buffer_event_data(event); |
32 | 32 | ||
33 | /* | 33 | /* |
@@ -58,7 +58,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) | |||
58 | * Test the trace buffer to see if all the elements | 58 | * Test the trace buffer to see if all the elements |
59 | * are still sane. | 59 | * are still sane. |
60 | */ | 60 | */ |
61 | static int trace_test_buffer(struct trace_array *tr, unsigned long *count) | 61 | static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count) |
62 | { | 62 | { |
63 | unsigned long flags, cnt = 0; | 63 | unsigned long flags, cnt = 0; |
64 | int cpu, ret = 0; | 64 | int cpu, ret = 0; |
@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) | |||
67 | local_irq_save(flags); | 67 | local_irq_save(flags); |
68 | arch_spin_lock(&ftrace_max_lock); | 68 | arch_spin_lock(&ftrace_max_lock); |
69 | 69 | ||
70 | cnt = ring_buffer_entries(tr->buffer); | 70 | cnt = ring_buffer_entries(buf->buffer); |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * The trace_test_buffer_cpu runs a while loop to consume all data. | 73 | * The trace_test_buffer_cpu runs a while loop to consume all data. |
@@ -78,7 +78,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) | |||
78 | */ | 78 | */ |
79 | tracing_off(); | 79 | tracing_off(); |
80 | for_each_possible_cpu(cpu) { | 80 | for_each_possible_cpu(cpu) { |
81 | ret = trace_test_buffer_cpu(tr, cpu); | 81 | ret = trace_test_buffer_cpu(buf, cpu); |
82 | if (ret) | 82 | if (ret) |
83 | break; | 83 | break; |
84 | } | 84 | } |
@@ -355,7 +355,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, | |||
355 | msleep(100); | 355 | msleep(100); |
356 | 356 | ||
357 | /* we should have nothing in the buffer */ | 357 | /* we should have nothing in the buffer */ |
358 | ret = trace_test_buffer(tr, &count); | 358 | ret = trace_test_buffer(&tr->trace_buffer, &count); |
359 | if (ret) | 359 | if (ret) |
360 | goto out; | 360 | goto out; |
361 | 361 | ||
@@ -376,7 +376,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, | |||
376 | ftrace_enabled = 0; | 376 | ftrace_enabled = 0; |
377 | 377 | ||
378 | /* check the trace buffer */ | 378 | /* check the trace buffer */ |
379 | ret = trace_test_buffer(tr, &count); | 379 | ret = trace_test_buffer(&tr->trace_buffer, &count); |
380 | tracing_start(); | 380 | tracing_start(); |
381 | 381 | ||
382 | /* we should only have one item */ | 382 | /* we should only have one item */ |
@@ -666,7 +666,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) | |||
666 | ftrace_enabled = 0; | 666 | ftrace_enabled = 0; |
667 | 667 | ||
668 | /* check the trace buffer */ | 668 | /* check the trace buffer */ |
669 | ret = trace_test_buffer(tr, &count); | 669 | ret = trace_test_buffer(&tr->trace_buffer, &count); |
670 | trace->reset(tr); | 670 | trace->reset(tr); |
671 | tracing_start(); | 671 | tracing_start(); |
672 | 672 | ||
@@ -703,8 +703,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) | |||
703 | /* Maximum number of functions to trace before diagnosing a hang */ | 703 | /* Maximum number of functions to trace before diagnosing a hang */ |
704 | #define GRAPH_MAX_FUNC_TEST 100000000 | 704 | #define GRAPH_MAX_FUNC_TEST 100000000 |
705 | 705 | ||
706 | static void | ||
707 | __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode); | ||
708 | static unsigned int graph_hang_thresh; | 706 | static unsigned int graph_hang_thresh; |
709 | 707 | ||
710 | /* Wrap the real function entry probe to avoid possible hanging */ | 708 | /* Wrap the real function entry probe to avoid possible hanging */ |
@@ -714,8 +712,11 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) | |||
714 | if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { | 712 | if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { |
715 | ftrace_graph_stop(); | 713 | ftrace_graph_stop(); |
716 | printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); | 714 | printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); |
717 | if (ftrace_dump_on_oops) | 715 | if (ftrace_dump_on_oops) { |
718 | __ftrace_dump(false, DUMP_ALL); | 716 | ftrace_dump(DUMP_ALL); |
717 | /* ftrace_dump() disables tracing */ | ||
718 | tracing_on(); | ||
719 | } | ||
719 | return 0; | 720 | return 0; |
720 | } | 721 | } |
721 | 722 | ||
@@ -737,7 +738,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, | |||
737 | * Simulate the init() callback but we attach a watchdog callback | 738 | * Simulate the init() callback but we attach a watchdog callback |
738 | * to detect and recover from possible hangs | 739 | * to detect and recover from possible hangs |
739 | */ | 740 | */ |
740 | tracing_reset_online_cpus(tr); | 741 | tracing_reset_online_cpus(&tr->trace_buffer); |
741 | set_graph_array(tr); | 742 | set_graph_array(tr); |
742 | ret = register_ftrace_graph(&trace_graph_return, | 743 | ret = register_ftrace_graph(&trace_graph_return, |
743 | &trace_graph_entry_watchdog); | 744 | &trace_graph_entry_watchdog); |
@@ -760,7 +761,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, | |||
760 | tracing_stop(); | 761 | tracing_stop(); |
761 | 762 | ||
762 | /* check the trace buffer */ | 763 | /* check the trace buffer */ |
763 | ret = trace_test_buffer(tr, &count); | 764 | ret = trace_test_buffer(&tr->trace_buffer, &count); |
764 | 765 | ||
765 | trace->reset(tr); | 766 | trace->reset(tr); |
766 | tracing_start(); | 767 | tracing_start(); |
@@ -815,9 +816,9 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) | |||
815 | /* stop the tracing. */ | 816 | /* stop the tracing. */ |
816 | tracing_stop(); | 817 | tracing_stop(); |
817 | /* check both trace buffers */ | 818 | /* check both trace buffers */ |
818 | ret = trace_test_buffer(tr, NULL); | 819 | ret = trace_test_buffer(&tr->trace_buffer, NULL); |
819 | if (!ret) | 820 | if (!ret) |
820 | ret = trace_test_buffer(&max_tr, &count); | 821 | ret = trace_test_buffer(&tr->max_buffer, &count); |
821 | trace->reset(tr); | 822 | trace->reset(tr); |
822 | tracing_start(); | 823 | tracing_start(); |
823 | 824 | ||
@@ -877,9 +878,9 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) | |||
877 | /* stop the tracing. */ | 878 | /* stop the tracing. */ |
878 | tracing_stop(); | 879 | tracing_stop(); |
879 | /* check both trace buffers */ | 880 | /* check both trace buffers */ |
880 | ret = trace_test_buffer(tr, NULL); | 881 | ret = trace_test_buffer(&tr->trace_buffer, NULL); |
881 | if (!ret) | 882 | if (!ret) |
882 | ret = trace_test_buffer(&max_tr, &count); | 883 | ret = trace_test_buffer(&tr->max_buffer, &count); |
883 | trace->reset(tr); | 884 | trace->reset(tr); |
884 | tracing_start(); | 885 | tracing_start(); |
885 | 886 | ||
@@ -943,11 +944,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * | |||
943 | /* stop the tracing. */ | 944 | /* stop the tracing. */ |
944 | tracing_stop(); | 945 | tracing_stop(); |
945 | /* check both trace buffers */ | 946 | /* check both trace buffers */ |
946 | ret = trace_test_buffer(tr, NULL); | 947 | ret = trace_test_buffer(&tr->trace_buffer, NULL); |
947 | if (ret) | 948 | if (ret) |
948 | goto out; | 949 | goto out; |
949 | 950 | ||
950 | ret = trace_test_buffer(&max_tr, &count); | 951 | ret = trace_test_buffer(&tr->max_buffer, &count); |
951 | if (ret) | 952 | if (ret) |
952 | goto out; | 953 | goto out; |
953 | 954 | ||
@@ -973,11 +974,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * | |||
973 | /* stop the tracing. */ | 974 | /* stop the tracing. */ |
974 | tracing_stop(); | 975 | tracing_stop(); |
975 | /* check both trace buffers */ | 976 | /* check both trace buffers */ |
976 | ret = trace_test_buffer(tr, NULL); | 977 | ret = trace_test_buffer(&tr->trace_buffer, NULL); |
977 | if (ret) | 978 | if (ret) |
978 | goto out; | 979 | goto out; |
979 | 980 | ||
980 | ret = trace_test_buffer(&max_tr, &count); | 981 | ret = trace_test_buffer(&tr->max_buffer, &count); |
981 | 982 | ||
982 | if (!ret && !count) { | 983 | if (!ret && !count) { |
983 | printk(KERN_CONT ".. no entries found .."); | 984 | printk(KERN_CONT ".. no entries found .."); |
@@ -1084,10 +1085,10 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) | |||
1084 | /* stop the tracing. */ | 1085 | /* stop the tracing. */ |
1085 | tracing_stop(); | 1086 | tracing_stop(); |
1086 | /* check both trace buffers */ | 1087 | /* check both trace buffers */ |
1087 | ret = trace_test_buffer(tr, NULL); | 1088 | ret = trace_test_buffer(&tr->trace_buffer, NULL); |
1088 | printk("ret = %d\n", ret); | 1089 | printk("ret = %d\n", ret); |
1089 | if (!ret) | 1090 | if (!ret) |
1090 | ret = trace_test_buffer(&max_tr, &count); | 1091 | ret = trace_test_buffer(&tr->max_buffer, &count); |
1091 | 1092 | ||
1092 | 1093 | ||
1093 | trace->reset(tr); | 1094 | trace->reset(tr); |
@@ -1126,7 +1127,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr | |||
1126 | /* stop the tracing. */ | 1127 | /* stop the tracing. */ |
1127 | tracing_stop(); | 1128 | tracing_stop(); |
1128 | /* check the trace buffer */ | 1129 | /* check the trace buffer */ |
1129 | ret = trace_test_buffer(tr, &count); | 1130 | ret = trace_test_buffer(&tr->trace_buffer, &count); |
1130 | trace->reset(tr); | 1131 | trace->reset(tr); |
1131 | tracing_start(); | 1132 | tracing_start(); |
1132 | 1133 | ||
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 83a8b5b7bd35..b20428c5efe2 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -20,13 +20,24 @@ | |||
20 | 20 | ||
21 | #define STACK_TRACE_ENTRIES 500 | 21 | #define STACK_TRACE_ENTRIES 500 |
22 | 22 | ||
23 | #ifdef CC_USING_FENTRY | ||
24 | # define fentry 1 | ||
25 | #else | ||
26 | # define fentry 0 | ||
27 | #endif | ||
28 | |||
23 | static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = | 29 | static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = |
24 | { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; | 30 | { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; |
25 | static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; | 31 | static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; |
26 | 32 | ||
33 | /* | ||
34 | * Reserve one entry for the passed in ip. This will allow | ||
35 | * us to remove most or all of the stack size overhead | ||
36 | * added by the stack tracer itself. | ||
37 | */ | ||
27 | static struct stack_trace max_stack_trace = { | 38 | static struct stack_trace max_stack_trace = { |
28 | .max_entries = STACK_TRACE_ENTRIES, | 39 | .max_entries = STACK_TRACE_ENTRIES - 1, |
29 | .entries = stack_dump_trace, | 40 | .entries = &stack_dump_trace[1], |
30 | }; | 41 | }; |
31 | 42 | ||
32 | static unsigned long max_stack_size; | 43 | static unsigned long max_stack_size; |
@@ -39,25 +50,34 @@ static DEFINE_MUTEX(stack_sysctl_mutex); | |||
39 | int stack_tracer_enabled; | 50 | int stack_tracer_enabled; |
40 | static int last_stack_tracer_enabled; | 51 | static int last_stack_tracer_enabled; |
41 | 52 | ||
42 | static inline void check_stack(void) | 53 | static inline void |
54 | check_stack(unsigned long ip, unsigned long *stack) | ||
43 | { | 55 | { |
44 | unsigned long this_size, flags; | 56 | unsigned long this_size, flags; |
45 | unsigned long *p, *top, *start; | 57 | unsigned long *p, *top, *start; |
58 | static int tracer_frame; | ||
59 | int frame_size = ACCESS_ONCE(tracer_frame); | ||
46 | int i; | 60 | int i; |
47 | 61 | ||
48 | this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1); | 62 | this_size = ((unsigned long)stack) & (THREAD_SIZE-1); |
49 | this_size = THREAD_SIZE - this_size; | 63 | this_size = THREAD_SIZE - this_size; |
64 | /* Remove the frame of the tracer */ | ||
65 | this_size -= frame_size; | ||
50 | 66 | ||
51 | if (this_size <= max_stack_size) | 67 | if (this_size <= max_stack_size) |
52 | return; | 68 | return; |
53 | 69 | ||
54 | /* we do not handle interrupt stacks yet */ | 70 | /* we do not handle interrupt stacks yet */ |
55 | if (!object_is_on_stack(&this_size)) | 71 | if (!object_is_on_stack(stack)) |
56 | return; | 72 | return; |
57 | 73 | ||
58 | local_irq_save(flags); | 74 | local_irq_save(flags); |
59 | arch_spin_lock(&max_stack_lock); | 75 | arch_spin_lock(&max_stack_lock); |
60 | 76 | ||
77 | /* In case another CPU set the tracer_frame on us */ | ||
78 | if (unlikely(!frame_size)) | ||
79 | this_size -= tracer_frame; | ||
80 | |||
61 | /* a race could have already updated it */ | 81 | /* a race could have already updated it */ |
62 | if (this_size <= max_stack_size) | 82 | if (this_size <= max_stack_size) |
63 | goto out; | 83 | goto out; |
@@ -70,10 +90,18 @@ static inline void check_stack(void) | |||
70 | save_stack_trace(&max_stack_trace); | 90 | save_stack_trace(&max_stack_trace); |
71 | 91 | ||
72 | /* | 92 | /* |
93 | * Add the passed in ip from the function tracer. | ||
94 | * Searching for this on the stack will skip over | ||
95 | * most of the overhead from the stack tracer itself. | ||
96 | */ | ||
97 | stack_dump_trace[0] = ip; | ||
98 | max_stack_trace.nr_entries++; | ||
99 | |||
100 | /* | ||
73 | * Now find where in the stack these are. | 101 | * Now find where in the stack these are. |
74 | */ | 102 | */ |
75 | i = 0; | 103 | i = 0; |
76 | start = &this_size; | 104 | start = stack; |
77 | top = (unsigned long *) | 105 | top = (unsigned long *) |
78 | (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE); | 106 | (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE); |
79 | 107 | ||
@@ -97,6 +125,18 @@ static inline void check_stack(void) | |||
97 | found = 1; | 125 | found = 1; |
98 | /* Start the search from here */ | 126 | /* Start the search from here */ |
99 | start = p + 1; | 127 | start = p + 1; |
128 | /* | ||
129 | * We do not want to show the overhead | ||
130 | * of the stack tracer stack in the | ||
131 | * max stack. If we haven't figured | ||
132 | * out what that is, then figure it out | ||
133 | * now. | ||
134 | */ | ||
135 | if (unlikely(!tracer_frame) && i == 1) { | ||
136 | tracer_frame = (p - stack) * | ||
137 | sizeof(unsigned long); | ||
138 | max_stack_size -= tracer_frame; | ||
139 | } | ||
100 | } | 140 | } |
101 | } | 141 | } |
102 | 142 | ||
@@ -113,6 +153,7 @@ static void | |||
113 | stack_trace_call(unsigned long ip, unsigned long parent_ip, | 153 | stack_trace_call(unsigned long ip, unsigned long parent_ip, |
114 | struct ftrace_ops *op, struct pt_regs *pt_regs) | 154 | struct ftrace_ops *op, struct pt_regs *pt_regs) |
115 | { | 155 | { |
156 | unsigned long stack; | ||
116 | int cpu; | 157 | int cpu; |
117 | 158 | ||
118 | preempt_disable_notrace(); | 159 | preempt_disable_notrace(); |
@@ -122,7 +163,26 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, | |||
122 | if (per_cpu(trace_active, cpu)++ != 0) | 163 | if (per_cpu(trace_active, cpu)++ != 0) |
123 | goto out; | 164 | goto out; |
124 | 165 | ||
125 | check_stack(); | 166 | /* |
167 | * When fentry is used, the traced function does not get | ||
168 | * its stack frame set up, and we lose the parent. | ||
169 | * The ip is pretty useless because the function tracer | ||
170 | * was called before that function set up its stack frame. | ||
171 | * In this case, we use the parent ip. | ||
172 | * | ||
173 | * By adding the return address of either the parent ip | ||
174 | * or the current ip we can disregard most of the stack usage | ||
175 | * caused by the stack tracer itself. | ||
176 | * | ||
177 | * The function tracer always reports the address of where the | ||
178 | * mcount call was, but the stack will hold the return address. | ||
179 | */ | ||
180 | if (fentry) | ||
181 | ip = parent_ip; | ||
182 | else | ||
183 | ip += MCOUNT_INSN_SIZE; | ||
184 | |||
185 | check_stack(ip, &stack); | ||
126 | 186 | ||
127 | out: | 187 | out: |
128 | per_cpu(trace_active, cpu)--; | 188 | per_cpu(trace_active, cpu)--; |
@@ -371,6 +431,8 @@ static __init int stack_trace_init(void) | |||
371 | struct dentry *d_tracer; | 431 | struct dentry *d_tracer; |
372 | 432 | ||
373 | d_tracer = tracing_init_dentry(); | 433 | d_tracer = tracing_init_dentry(); |
434 | if (!d_tracer) | ||
435 | return 0; | ||
374 | 436 | ||
375 | trace_create_file("stack_max_size", 0644, d_tracer, | 437 | trace_create_file("stack_max_size", 0644, d_tracer, |
376 | &max_stack_size, &stack_max_size_fops); | 438 | &max_stack_size, &stack_max_size_fops); |
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 96cffb269e73..847f88a6194b 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c | |||
@@ -307,6 +307,8 @@ static int tracing_stat_init(void) | |||
307 | struct dentry *d_tracing; | 307 | struct dentry *d_tracing; |
308 | 308 | ||
309 | d_tracing = tracing_init_dentry(); | 309 | d_tracing = tracing_init_dentry(); |
310 | if (!d_tracing) | ||
311 | return 0; | ||
310 | 312 | ||
311 | stat_dir = debugfs_create_dir("trace_stat", d_tracing); | 313 | stat_dir = debugfs_create_dir("trace_stat", d_tracing); |
312 | if (!stat_dir) | 314 | if (!stat_dir) |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 7a809e321058..8f2ac73c7a5f 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -12,10 +12,6 @@ | |||
12 | #include "trace.h" | 12 | #include "trace.h" |
13 | 13 | ||
14 | static DEFINE_MUTEX(syscall_trace_lock); | 14 | static DEFINE_MUTEX(syscall_trace_lock); |
15 | static int sys_refcount_enter; | ||
16 | static int sys_refcount_exit; | ||
17 | static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); | ||
18 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); | ||
19 | 15 | ||
20 | static int syscall_enter_register(struct ftrace_event_call *event, | 16 | static int syscall_enter_register(struct ftrace_event_call *event, |
21 | enum trace_reg type, void *data); | 17 | enum trace_reg type, void *data); |
@@ -41,7 +37,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name | |||
41 | /* | 37 | /* |
42 | * Only compare after the "sys" prefix. Archs that use | 38 | * Only compare after the "sys" prefix. Archs that use |
43 | * syscall wrappers may have syscalls symbols aliases prefixed | 39 | * syscall wrappers may have syscalls symbols aliases prefixed |
44 | * with "SyS" instead of "sys", leading to an unwanted | 40 | * with ".SyS" or ".sys" instead of "sys", leading to an unwanted |
45 | * mismatch. | 41 | * mismatch. |
46 | */ | 42 | */ |
47 | return !strcmp(sym + 3, name + 3); | 43 | return !strcmp(sym + 3, name + 3); |
@@ -265,7 +261,7 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call) | |||
265 | kfree(call->print_fmt); | 261 | kfree(call->print_fmt); |
266 | } | 262 | } |
267 | 263 | ||
268 | static int syscall_enter_define_fields(struct ftrace_event_call *call) | 264 | static int __init syscall_enter_define_fields(struct ftrace_event_call *call) |
269 | { | 265 | { |
270 | struct syscall_trace_enter trace; | 266 | struct syscall_trace_enter trace; |
271 | struct syscall_metadata *meta = call->data; | 267 | struct syscall_metadata *meta = call->data; |
@@ -288,7 +284,7 @@ static int syscall_enter_define_fields(struct ftrace_event_call *call) | |||
288 | return ret; | 284 | return ret; |
289 | } | 285 | } |
290 | 286 | ||
291 | static int syscall_exit_define_fields(struct ftrace_event_call *call) | 287 | static int __init syscall_exit_define_fields(struct ftrace_event_call *call) |
292 | { | 288 | { |
293 | struct syscall_trace_exit trace; | 289 | struct syscall_trace_exit trace; |
294 | int ret; | 290 | int ret; |
@@ -303,8 +299,9 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call) | |||
303 | return ret; | 299 | return ret; |
304 | } | 300 | } |
305 | 301 | ||
306 | static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) | 302 | static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) |
307 | { | 303 | { |
304 | struct trace_array *tr = data; | ||
308 | struct syscall_trace_enter *entry; | 305 | struct syscall_trace_enter *entry; |
309 | struct syscall_metadata *sys_data; | 306 | struct syscall_metadata *sys_data; |
310 | struct ring_buffer_event *event; | 307 | struct ring_buffer_event *event; |
@@ -315,7 +312,7 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
315 | syscall_nr = trace_get_syscall_nr(current, regs); | 312 | syscall_nr = trace_get_syscall_nr(current, regs); |
316 | if (syscall_nr < 0) | 313 | if (syscall_nr < 0) |
317 | return; | 314 | return; |
318 | if (!test_bit(syscall_nr, enabled_enter_syscalls)) | 315 | if (!test_bit(syscall_nr, tr->enabled_enter_syscalls)) |
319 | return; | 316 | return; |
320 | 317 | ||
321 | sys_data = syscall_nr_to_meta(syscall_nr); | 318 | sys_data = syscall_nr_to_meta(syscall_nr); |
@@ -324,7 +321,8 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
324 | 321 | ||
325 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; | 322 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; |
326 | 323 | ||
327 | event = trace_current_buffer_lock_reserve(&buffer, | 324 | buffer = tr->trace_buffer.buffer; |
325 | event = trace_buffer_lock_reserve(buffer, | ||
328 | sys_data->enter_event->event.type, size, 0, 0); | 326 | sys_data->enter_event->event.type, size, 0, 0); |
329 | if (!event) | 327 | if (!event) |
330 | return; | 328 | return; |
@@ -338,8 +336,9 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
338 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 336 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); |
339 | } | 337 | } |
340 | 338 | ||
341 | static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | 339 | static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) |
342 | { | 340 | { |
341 | struct trace_array *tr = data; | ||
343 | struct syscall_trace_exit *entry; | 342 | struct syscall_trace_exit *entry; |
344 | struct syscall_metadata *sys_data; | 343 | struct syscall_metadata *sys_data; |
345 | struct ring_buffer_event *event; | 344 | struct ring_buffer_event *event; |
@@ -349,14 +348,15 @@ static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
349 | syscall_nr = trace_get_syscall_nr(current, regs); | 348 | syscall_nr = trace_get_syscall_nr(current, regs); |
350 | if (syscall_nr < 0) | 349 | if (syscall_nr < 0) |
351 | return; | 350 | return; |
352 | if (!test_bit(syscall_nr, enabled_exit_syscalls)) | 351 | if (!test_bit(syscall_nr, tr->enabled_exit_syscalls)) |
353 | return; | 352 | return; |
354 | 353 | ||
355 | sys_data = syscall_nr_to_meta(syscall_nr); | 354 | sys_data = syscall_nr_to_meta(syscall_nr); |
356 | if (!sys_data) | 355 | if (!sys_data) |
357 | return; | 356 | return; |
358 | 357 | ||
359 | event = trace_current_buffer_lock_reserve(&buffer, | 358 | buffer = tr->trace_buffer.buffer; |
359 | event = trace_buffer_lock_reserve(buffer, | ||
360 | sys_data->exit_event->event.type, sizeof(*entry), 0, 0); | 360 | sys_data->exit_event->event.type, sizeof(*entry), 0, 0); |
361 | if (!event) | 361 | if (!event) |
362 | return; | 362 | return; |
@@ -370,8 +370,10 @@ static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
370 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 370 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); |
371 | } | 371 | } |
372 | 372 | ||
373 | static int reg_event_syscall_enter(struct ftrace_event_call *call) | 373 | static int reg_event_syscall_enter(struct ftrace_event_file *file, |
374 | struct ftrace_event_call *call) | ||
374 | { | 375 | { |
376 | struct trace_array *tr = file->tr; | ||
375 | int ret = 0; | 377 | int ret = 0; |
376 | int num; | 378 | int num; |
377 | 379 | ||
@@ -379,33 +381,37 @@ static int reg_event_syscall_enter(struct ftrace_event_call *call) | |||
379 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) | 381 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) |
380 | return -ENOSYS; | 382 | return -ENOSYS; |
381 | mutex_lock(&syscall_trace_lock); | 383 | mutex_lock(&syscall_trace_lock); |
382 | if (!sys_refcount_enter) | 384 | if (!tr->sys_refcount_enter) |
383 | ret = register_trace_sys_enter(ftrace_syscall_enter, NULL); | 385 | ret = register_trace_sys_enter(ftrace_syscall_enter, tr); |
384 | if (!ret) { | 386 | if (!ret) { |
385 | set_bit(num, enabled_enter_syscalls); | 387 | set_bit(num, tr->enabled_enter_syscalls); |
386 | sys_refcount_enter++; | 388 | tr->sys_refcount_enter++; |
387 | } | 389 | } |
388 | mutex_unlock(&syscall_trace_lock); | 390 | mutex_unlock(&syscall_trace_lock); |
389 | return ret; | 391 | return ret; |
390 | } | 392 | } |
391 | 393 | ||
392 | static void unreg_event_syscall_enter(struct ftrace_event_call *call) | 394 | static void unreg_event_syscall_enter(struct ftrace_event_file *file, |
395 | struct ftrace_event_call *call) | ||
393 | { | 396 | { |
397 | struct trace_array *tr = file->tr; | ||
394 | int num; | 398 | int num; |
395 | 399 | ||
396 | num = ((struct syscall_metadata *)call->data)->syscall_nr; | 400 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
397 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) | 401 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) |
398 | return; | 402 | return; |
399 | mutex_lock(&syscall_trace_lock); | 403 | mutex_lock(&syscall_trace_lock); |
400 | sys_refcount_enter--; | 404 | tr->sys_refcount_enter--; |
401 | clear_bit(num, enabled_enter_syscalls); | 405 | clear_bit(num, tr->enabled_enter_syscalls); |
402 | if (!sys_refcount_enter) | 406 | if (!tr->sys_refcount_enter) |
403 | unregister_trace_sys_enter(ftrace_syscall_enter, NULL); | 407 | unregister_trace_sys_enter(ftrace_syscall_enter, tr); |
404 | mutex_unlock(&syscall_trace_lock); | 408 | mutex_unlock(&syscall_trace_lock); |
405 | } | 409 | } |
406 | 410 | ||
407 | static int reg_event_syscall_exit(struct ftrace_event_call *call) | 411 | static int reg_event_syscall_exit(struct ftrace_event_file *file, |
412 | struct ftrace_event_call *call) | ||
408 | { | 413 | { |
414 | struct trace_array *tr = file->tr; | ||
409 | int ret = 0; | 415 | int ret = 0; |
410 | int num; | 416 | int num; |
411 | 417 | ||
@@ -413,28 +419,30 @@ static int reg_event_syscall_exit(struct ftrace_event_call *call) | |||
413 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) | 419 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) |
414 | return -ENOSYS; | 420 | return -ENOSYS; |
415 | mutex_lock(&syscall_trace_lock); | 421 | mutex_lock(&syscall_trace_lock); |
416 | if (!sys_refcount_exit) | 422 | if (!tr->sys_refcount_exit) |
417 | ret = register_trace_sys_exit(ftrace_syscall_exit, NULL); | 423 | ret = register_trace_sys_exit(ftrace_syscall_exit, tr); |
418 | if (!ret) { | 424 | if (!ret) { |
419 | set_bit(num, enabled_exit_syscalls); | 425 | set_bit(num, tr->enabled_exit_syscalls); |
420 | sys_refcount_exit++; | 426 | tr->sys_refcount_exit++; |
421 | } | 427 | } |
422 | mutex_unlock(&syscall_trace_lock); | 428 | mutex_unlock(&syscall_trace_lock); |
423 | return ret; | 429 | return ret; |
424 | } | 430 | } |
425 | 431 | ||
426 | static void unreg_event_syscall_exit(struct ftrace_event_call *call) | 432 | static void unreg_event_syscall_exit(struct ftrace_event_file *file, |
433 | struct ftrace_event_call *call) | ||
427 | { | 434 | { |
435 | struct trace_array *tr = file->tr; | ||
428 | int num; | 436 | int num; |
429 | 437 | ||
430 | num = ((struct syscall_metadata *)call->data)->syscall_nr; | 438 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
431 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) | 439 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) |
432 | return; | 440 | return; |
433 | mutex_lock(&syscall_trace_lock); | 441 | mutex_lock(&syscall_trace_lock); |
434 | sys_refcount_exit--; | 442 | tr->sys_refcount_exit--; |
435 | clear_bit(num, enabled_exit_syscalls); | 443 | clear_bit(num, tr->enabled_exit_syscalls); |
436 | if (!sys_refcount_exit) | 444 | if (!tr->sys_refcount_exit) |
437 | unregister_trace_sys_exit(ftrace_syscall_exit, NULL); | 445 | unregister_trace_sys_exit(ftrace_syscall_exit, tr); |
438 | mutex_unlock(&syscall_trace_lock); | 446 | mutex_unlock(&syscall_trace_lock); |
439 | } | 447 | } |
440 | 448 | ||
@@ -471,7 +479,7 @@ struct trace_event_functions exit_syscall_print_funcs = { | |||
471 | .trace = print_syscall_exit, | 479 | .trace = print_syscall_exit, |
472 | }; | 480 | }; |
473 | 481 | ||
474 | struct ftrace_event_class event_class_syscall_enter = { | 482 | struct ftrace_event_class __refdata event_class_syscall_enter = { |
475 | .system = "syscalls", | 483 | .system = "syscalls", |
476 | .reg = syscall_enter_register, | 484 | .reg = syscall_enter_register, |
477 | .define_fields = syscall_enter_define_fields, | 485 | .define_fields = syscall_enter_define_fields, |
@@ -479,7 +487,7 @@ struct ftrace_event_class event_class_syscall_enter = { | |||
479 | .raw_init = init_syscall_trace, | 487 | .raw_init = init_syscall_trace, |
480 | }; | 488 | }; |
481 | 489 | ||
482 | struct ftrace_event_class event_class_syscall_exit = { | 490 | struct ftrace_event_class __refdata event_class_syscall_exit = { |
483 | .system = "syscalls", | 491 | .system = "syscalls", |
484 | .reg = syscall_exit_register, | 492 | .reg = syscall_exit_register, |
485 | .define_fields = syscall_exit_define_fields, | 493 | .define_fields = syscall_exit_define_fields, |
@@ -685,11 +693,13 @@ static void perf_sysexit_disable(struct ftrace_event_call *call) | |||
685 | static int syscall_enter_register(struct ftrace_event_call *event, | 693 | static int syscall_enter_register(struct ftrace_event_call *event, |
686 | enum trace_reg type, void *data) | 694 | enum trace_reg type, void *data) |
687 | { | 695 | { |
696 | struct ftrace_event_file *file = data; | ||
697 | |||
688 | switch (type) { | 698 | switch (type) { |
689 | case TRACE_REG_REGISTER: | 699 | case TRACE_REG_REGISTER: |
690 | return reg_event_syscall_enter(event); | 700 | return reg_event_syscall_enter(file, event); |
691 | case TRACE_REG_UNREGISTER: | 701 | case TRACE_REG_UNREGISTER: |
692 | unreg_event_syscall_enter(event); | 702 | unreg_event_syscall_enter(file, event); |
693 | return 0; | 703 | return 0; |
694 | 704 | ||
695 | #ifdef CONFIG_PERF_EVENTS | 705 | #ifdef CONFIG_PERF_EVENTS |
@@ -711,11 +721,13 @@ static int syscall_enter_register(struct ftrace_event_call *event, | |||
711 | static int syscall_exit_register(struct ftrace_event_call *event, | 721 | static int syscall_exit_register(struct ftrace_event_call *event, |
712 | enum trace_reg type, void *data) | 722 | enum trace_reg type, void *data) |
713 | { | 723 | { |
724 | struct ftrace_event_file *file = data; | ||
725 | |||
714 | switch (type) { | 726 | switch (type) { |
715 | case TRACE_REG_REGISTER: | 727 | case TRACE_REG_REGISTER: |
716 | return reg_event_syscall_exit(event); | 728 | return reg_event_syscall_exit(file, event); |
717 | case TRACE_REG_UNREGISTER: | 729 | case TRACE_REG_UNREGISTER: |
718 | unreg_event_syscall_exit(event); | 730 | unreg_event_syscall_exit(file, event); |
719 | return 0; | 731 | return 0; |
720 | 732 | ||
721 | #ifdef CONFIG_PERF_EVENTS | 733 | #ifdef CONFIG_PERF_EVENTS |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 0c05a4592047..29f26540e9c9 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -112,7 +112,8 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, | |||
112 | int nr_probes = 0; | 112 | int nr_probes = 0; |
113 | struct tracepoint_func *old, *new; | 113 | struct tracepoint_func *old, *new; |
114 | 114 | ||
115 | WARN_ON(!probe); | 115 | if (WARN_ON(!probe)) |
116 | return ERR_PTR(-EINVAL); | ||
116 | 117 | ||
117 | debug_print_probes(entry); | 118 | debug_print_probes(entry); |
118 | old = entry->funcs; | 119 | old = entry->funcs; |
@@ -152,13 +153,18 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, | |||
152 | 153 | ||
153 | debug_print_probes(entry); | 154 | debug_print_probes(entry); |
154 | /* (N -> M), (N > 1, M >= 0) probes */ | 155 | /* (N -> M), (N > 1, M >= 0) probes */ |
155 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { | 156 | if (probe) { |
156 | if (!probe || | 157 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { |
157 | (old[nr_probes].func == probe && | 158 | if (old[nr_probes].func == probe && |
158 | old[nr_probes].data == data)) | 159 | old[nr_probes].data == data) |
159 | nr_del++; | 160 | nr_del++; |
161 | } | ||
160 | } | 162 | } |
161 | 163 | ||
164 | /* | ||
165 | * If probe is NULL, then nr_probes = nr_del = 0, and then the | ||
166 | * entire entry will be removed. | ||
167 | */ | ||
162 | if (nr_probes - nr_del == 0) { | 168 | if (nr_probes - nr_del == 0) { |
163 | /* N -> 0, (N > 1) */ | 169 | /* N -> 0, (N > 1) */ |
164 | entry->funcs = NULL; | 170 | entry->funcs = NULL; |
@@ -173,8 +179,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, | |||
173 | if (new == NULL) | 179 | if (new == NULL) |
174 | return ERR_PTR(-ENOMEM); | 180 | return ERR_PTR(-ENOMEM); |
175 | for (i = 0; old[i].func; i++) | 181 | for (i = 0; old[i].func; i++) |
176 | if (probe && | 182 | if (old[i].func != probe || old[i].data != data) |
177 | (old[i].func != probe || old[i].data != data)) | ||
178 | new[j++] = old[i]; | 183 | new[j++] = old[i]; |
179 | new[nr_probes - nr_del].func = NULL; | 184 | new[nr_probes - nr_del].func = NULL; |
180 | entry->refcount = nr_probes - nr_del; | 185 | entry->refcount = nr_probes - nr_del; |