diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-06-17 06:52:15 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-17 06:56:49 -0400 |
commit | eadb8a091b27a840de7450f84ecff5ef13476424 (patch) | |
tree | 58c3782d40def63baa8167f3d31e3048cb4c7660 /kernel/trace | |
parent | 73874005cd8800440be4299bd095387fff4b90ac (diff) | |
parent | 65795efbd380a832ae508b04dba8f8e53f0b84d9 (diff) |
Merge branch 'linus' into tracing/hw-breakpoints
Conflicts:
arch/x86/Kconfig
arch/x86/kernel/traps.c
arch/x86/power/cpu.c
arch/x86/power/cpu_32.c
kernel/Makefile
Semantic conflict:
arch/x86/kernel/hw_breakpoint.c
Merge reason: Resolve the conflicts, move from put_cpu_no_sched() to
put_cpu() in arch/x86/kernel/hw_breakpoint.c.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 70 | ||||
-rw-r--r-- | kernel/trace/Makefile | 5 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 91 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 79 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 114 | ||||
-rw-r--r-- | kernel/trace/trace.c | 26 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 85 | ||||
-rw-r--r-- | kernel/trace/trace_output.h | 4 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_sysprof.c | 3 |
13 files changed, 357 insertions, 138 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d7f01e6e8ba5..ae048a2dbbe8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -56,6 +56,13 @@ config CONTEXT_SWITCH_TRACER | |||
56 | select MARKERS | 56 | select MARKERS |
57 | bool | 57 | bool |
58 | 58 | ||
59 | # All tracer options should select GENERIC_TRACER. For those options that are | ||
60 | # enabled by all tracers (context switch and event tracer) they select TRACING. | ||
61 | # This allows those options to appear when no other tracer is selected. But the | ||
62 | # options do not appear when something else selects it. We need the two options | ||
63 | # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the | ||
64 | # hidding of the automatic options options. | ||
65 | |||
59 | config TRACING | 66 | config TRACING |
60 | bool | 67 | bool |
61 | select DEBUG_FS | 68 | select DEBUG_FS |
@@ -66,6 +73,10 @@ config TRACING | |||
66 | select BINARY_PRINTF | 73 | select BINARY_PRINTF |
67 | select EVENT_TRACING | 74 | select EVENT_TRACING |
68 | 75 | ||
76 | config GENERIC_TRACER | ||
77 | bool | ||
78 | select TRACING | ||
79 | |||
69 | # | 80 | # |
70 | # Minimum requirements an architecture has to meet for us to | 81 | # Minimum requirements an architecture has to meet for us to |
71 | # be able to offer generic tracing facilities: | 82 | # be able to offer generic tracing facilities: |
@@ -95,7 +106,7 @@ config FUNCTION_TRACER | |||
95 | depends on HAVE_FUNCTION_TRACER | 106 | depends on HAVE_FUNCTION_TRACER |
96 | select FRAME_POINTER | 107 | select FRAME_POINTER |
97 | select KALLSYMS | 108 | select KALLSYMS |
98 | select TRACING | 109 | select GENERIC_TRACER |
99 | select CONTEXT_SWITCH_TRACER | 110 | select CONTEXT_SWITCH_TRACER |
100 | help | 111 | help |
101 | Enable the kernel to trace every kernel function. This is done | 112 | Enable the kernel to trace every kernel function. This is done |
@@ -126,7 +137,7 @@ config IRQSOFF_TRACER | |||
126 | depends on TRACE_IRQFLAGS_SUPPORT | 137 | depends on TRACE_IRQFLAGS_SUPPORT |
127 | depends on GENERIC_TIME | 138 | depends on GENERIC_TIME |
128 | select TRACE_IRQFLAGS | 139 | select TRACE_IRQFLAGS |
129 | select TRACING | 140 | select GENERIC_TRACER |
130 | select TRACER_MAX_TRACE | 141 | select TRACER_MAX_TRACE |
131 | help | 142 | help |
132 | This option measures the time spent in irqs-off critical | 143 | This option measures the time spent in irqs-off critical |
@@ -136,7 +147,7 @@ config IRQSOFF_TRACER | |||
136 | disabled by default and can be runtime (re-)started | 147 | disabled by default and can be runtime (re-)started |
137 | via: | 148 | via: |
138 | 149 | ||
139 | echo 0 > /debugfs/tracing/tracing_max_latency | 150 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
140 | 151 | ||
141 | (Note that kernel size and overhead increases with this option | 152 | (Note that kernel size and overhead increases with this option |
142 | enabled. This option and the preempt-off timing option can be | 153 | enabled. This option and the preempt-off timing option can be |
@@ -147,7 +158,7 @@ config PREEMPT_TRACER | |||
147 | default n | 158 | default n |
148 | depends on GENERIC_TIME | 159 | depends on GENERIC_TIME |
149 | depends on PREEMPT | 160 | depends on PREEMPT |
150 | select TRACING | 161 | select GENERIC_TRACER |
151 | select TRACER_MAX_TRACE | 162 | select TRACER_MAX_TRACE |
152 | help | 163 | help |
153 | This option measures the time spent in preemption off critical | 164 | This option measures the time spent in preemption off critical |
@@ -157,7 +168,7 @@ config PREEMPT_TRACER | |||
157 | disabled by default and can be runtime (re-)started | 168 | disabled by default and can be runtime (re-)started |
158 | via: | 169 | via: |
159 | 170 | ||
160 | echo 0 > /debugfs/tracing/tracing_max_latency | 171 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
161 | 172 | ||
162 | (Note that kernel size and overhead increases with this option | 173 | (Note that kernel size and overhead increases with this option |
163 | enabled. This option and the irqs-off timing option can be | 174 | enabled. This option and the irqs-off timing option can be |
@@ -166,7 +177,7 @@ config PREEMPT_TRACER | |||
166 | config SYSPROF_TRACER | 177 | config SYSPROF_TRACER |
167 | bool "Sysprof Tracer" | 178 | bool "Sysprof Tracer" |
168 | depends on X86 | 179 | depends on X86 |
169 | select TRACING | 180 | select GENERIC_TRACER |
170 | select CONTEXT_SWITCH_TRACER | 181 | select CONTEXT_SWITCH_TRACER |
171 | help | 182 | help |
172 | This tracer provides the trace needed by the 'Sysprof' userspace | 183 | This tracer provides the trace needed by the 'Sysprof' userspace |
@@ -174,44 +185,33 @@ config SYSPROF_TRACER | |||
174 | 185 | ||
175 | config SCHED_TRACER | 186 | config SCHED_TRACER |
176 | bool "Scheduling Latency Tracer" | 187 | bool "Scheduling Latency Tracer" |
177 | select TRACING | 188 | select GENERIC_TRACER |
178 | select CONTEXT_SWITCH_TRACER | 189 | select CONTEXT_SWITCH_TRACER |
179 | select TRACER_MAX_TRACE | 190 | select TRACER_MAX_TRACE |
180 | help | 191 | help |
181 | This tracer tracks the latency of the highest priority task | 192 | This tracer tracks the latency of the highest priority task |
182 | to be scheduled in, starting from the point it has woken up. | 193 | to be scheduled in, starting from the point it has woken up. |
183 | 194 | ||
184 | config ENABLE_CONTEXT_SWITCH_TRACER | 195 | config ENABLE_DEFAULT_TRACERS |
185 | bool "Trace process context switches" | 196 | bool "Trace process context switches and events" |
186 | select TRACING | 197 | depends on !GENERIC_TRACER |
187 | select CONTEXT_SWITCH_TRACER | ||
188 | help | ||
189 | This tracer gets called from the context switch and records | ||
190 | all switching of tasks. | ||
191 | |||
192 | config ENABLE_EVENT_TRACING | ||
193 | bool "Trace various events in the kernel" | ||
194 | select TRACING | 198 | select TRACING |
195 | help | 199 | help |
196 | This tracer hooks to various trace points in the kernel | 200 | This tracer hooks to various trace points in the kernel |
197 | allowing the user to pick and choose which trace point they | 201 | allowing the user to pick and choose which trace point they |
198 | want to trace. | 202 | want to trace. It also includes the sched_switch tracer plugin. |
199 | |||
200 | Note, all tracers enable event tracing. This option is | ||
201 | only a convenience to enable event tracing when no other | ||
202 | tracers are selected. | ||
203 | 203 | ||
204 | config FTRACE_SYSCALLS | 204 | config FTRACE_SYSCALLS |
205 | bool "Trace syscalls" | 205 | bool "Trace syscalls" |
206 | depends on HAVE_FTRACE_SYSCALLS | 206 | depends on HAVE_FTRACE_SYSCALLS |
207 | select TRACING | 207 | select GENERIC_TRACER |
208 | select KALLSYMS | 208 | select KALLSYMS |
209 | help | 209 | help |
210 | Basic tracer to catch the syscall entry and exit events. | 210 | Basic tracer to catch the syscall entry and exit events. |
211 | 211 | ||
212 | config BOOT_TRACER | 212 | config BOOT_TRACER |
213 | bool "Trace boot initcalls" | 213 | bool "Trace boot initcalls" |
214 | select TRACING | 214 | select GENERIC_TRACER |
215 | select CONTEXT_SWITCH_TRACER | 215 | select CONTEXT_SWITCH_TRACER |
216 | help | 216 | help |
217 | This tracer helps developers to optimize boot times: it records | 217 | This tracer helps developers to optimize boot times: it records |
@@ -228,7 +228,7 @@ config BOOT_TRACER | |||
228 | 228 | ||
229 | config TRACE_BRANCH_PROFILING | 229 | config TRACE_BRANCH_PROFILING |
230 | bool | 230 | bool |
231 | select TRACING | 231 | select GENERIC_TRACER |
232 | 232 | ||
233 | choice | 233 | choice |
234 | prompt "Branch Profiling" | 234 | prompt "Branch Profiling" |
@@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES | |||
261 | This tracer profiles all the the likely and unlikely macros | 261 | This tracer profiles all the the likely and unlikely macros |
262 | in the kernel. It will display the results in: | 262 | in the kernel. It will display the results in: |
263 | 263 | ||
264 | /debugfs/tracing/profile_annotated_branch | 264 | /sys/kernel/debug/tracing/profile_annotated_branch |
265 | 265 | ||
266 | Note: this will add a significant overhead, only turn this | 266 | Note: this will add a significant overhead, only turn this |
267 | on if you need to profile the system's use of these macros. | 267 | on if you need to profile the system's use of these macros. |
@@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES | |||
274 | taken in the kernel is recorded whether it hit or miss. | 274 | taken in the kernel is recorded whether it hit or miss. |
275 | The results will be displayed in: | 275 | The results will be displayed in: |
276 | 276 | ||
277 | /debugfs/tracing/profile_branch | 277 | /sys/kernel/debug/tracing/profile_branch |
278 | 278 | ||
279 | This option also enables the likely/unlikely profiler. | 279 | This option also enables the likely/unlikely profiler. |
280 | 280 | ||
@@ -308,7 +308,7 @@ config BRANCH_TRACER | |||
308 | config POWER_TRACER | 308 | config POWER_TRACER |
309 | bool "Trace power consumption behavior" | 309 | bool "Trace power consumption behavior" |
310 | depends on X86 | 310 | depends on X86 |
311 | select TRACING | 311 | select GENERIC_TRACER |
312 | help | 312 | help |
313 | This tracer helps developers to analyze and optimize the kernels | 313 | This tracer helps developers to analyze and optimize the kernels |
314 | power management decisions, specifically the C-state and P-state | 314 | power management decisions, specifically the C-state and P-state |
@@ -344,7 +344,7 @@ config STACK_TRACER | |||
344 | select KALLSYMS | 344 | select KALLSYMS |
345 | help | 345 | help |
346 | This special tracer records the maximum stack footprint of the | 346 | This special tracer records the maximum stack footprint of the |
347 | kernel and displays it in debugfs/tracing/stack_trace. | 347 | kernel and displays it in /sys/kernel/debug/tracing/stack_trace. |
348 | 348 | ||
349 | This tracer works by hooking into every function call that the | 349 | This tracer works by hooking into every function call that the |
350 | kernel executes, and keeping a maximum stack depth value and | 350 | kernel executes, and keeping a maximum stack depth value and |
@@ -363,14 +363,14 @@ config STACK_TRACER | |||
363 | config HW_BRANCH_TRACER | 363 | config HW_BRANCH_TRACER |
364 | depends on HAVE_HW_BRANCH_TRACER | 364 | depends on HAVE_HW_BRANCH_TRACER |
365 | bool "Trace hw branches" | 365 | bool "Trace hw branches" |
366 | select TRACING | 366 | select GENERIC_TRACER |
367 | help | 367 | help |
368 | This tracer records all branches on the system in a circular | 368 | This tracer records all branches on the system in a circular |
369 | buffer giving access to the last N branches for each cpu. | 369 | buffer giving access to the last N branches for each cpu. |
370 | 370 | ||
371 | config KMEMTRACE | 371 | config KMEMTRACE |
372 | bool "Trace SLAB allocations" | 372 | bool "Trace SLAB allocations" |
373 | select TRACING | 373 | select GENERIC_TRACER |
374 | help | 374 | help |
375 | kmemtrace provides tracing for slab allocator functions, such as | 375 | kmemtrace provides tracing for slab allocator functions, such as |
376 | kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected | 376 | kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected |
@@ -390,7 +390,7 @@ config KMEMTRACE | |||
390 | 390 | ||
391 | config WORKQUEUE_TRACER | 391 | config WORKQUEUE_TRACER |
392 | bool "Trace workqueues" | 392 | bool "Trace workqueues" |
393 | select TRACING | 393 | select GENERIC_TRACER |
394 | help | 394 | help |
395 | The workqueue tracer provides some statistical informations | 395 | The workqueue tracer provides some statistical informations |
396 | about each cpu workqueue thread such as the number of the | 396 | about each cpu workqueue thread such as the number of the |
@@ -406,7 +406,7 @@ config BLK_DEV_IO_TRACE | |||
406 | select RELAY | 406 | select RELAY |
407 | select DEBUG_FS | 407 | select DEBUG_FS |
408 | select TRACEPOINTS | 408 | select TRACEPOINTS |
409 | select TRACING | 409 | select GENERIC_TRACER |
410 | select STACKTRACE | 410 | select STACKTRACE |
411 | help | 411 | help |
412 | Say Y here if you want to be able to trace the block layer actions | 412 | Say Y here if you want to be able to trace the block layer actions |
@@ -467,7 +467,7 @@ config FTRACE_SELFTEST | |||
467 | 467 | ||
468 | config FTRACE_STARTUP_TEST | 468 | config FTRACE_STARTUP_TEST |
469 | bool "Perform a startup test on ftrace" | 469 | bool "Perform a startup test on ftrace" |
470 | depends on TRACING | 470 | depends on GENERIC_TRACER |
471 | select FTRACE_SELFTEST | 471 | select FTRACE_SELFTEST |
472 | help | 472 | help |
473 | This option performs a series of startup tests on ftrace. On bootup | 473 | This option performs a series of startup tests on ftrace. On bootup |
@@ -478,7 +478,7 @@ config FTRACE_STARTUP_TEST | |||
478 | config MMIOTRACE | 478 | config MMIOTRACE |
479 | bool "Memory mapped IO tracing" | 479 | bool "Memory mapped IO tracing" |
480 | depends on HAVE_MMIOTRACE_SUPPORT && PCI | 480 | depends on HAVE_MMIOTRACE_SUPPORT && PCI |
481 | select TRACING | 481 | select GENERIC_TRACER |
482 | help | 482 | help |
483 | Mmiotrace traces Memory Mapped I/O access and is meant for | 483 | Mmiotrace traces Memory Mapped I/O access and is meant for |
484 | debugging and reverse engineering. It is called from the ioremap | 484 | debugging and reverse engineering. It is called from the ioremap |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 658aace8c41e..ce3b1cd02732 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -45,7 +45,10 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o | |||
45 | obj-$(CONFIG_POWER_TRACER) += trace_power.o | 45 | obj-$(CONFIG_POWER_TRACER) += trace_power.o |
46 | obj-$(CONFIG_KMEMTRACE) += kmemtrace.o | 46 | obj-$(CONFIG_KMEMTRACE) += kmemtrace.o |
47 | obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o | 47 | obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o |
48 | obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o | 48 | obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o |
49 | ifeq ($(CONFIG_BLOCK),y) | ||
50 | obj-$(CONFIG_EVENT_TRACING) += blktrace.o | ||
51 | endif | ||
49 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o | 52 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o |
50 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o | 53 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o |
51 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 54 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e3abf55bc8e5..39af8af6fc30 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -23,10 +23,14 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/debugfs.h> | 24 | #include <linux/debugfs.h> |
25 | #include <linux/time.h> | 25 | #include <linux/time.h> |
26 | #include <trace/block.h> | ||
27 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
27 | |||
28 | #include <trace/events/block.h> | ||
29 | |||
28 | #include "trace_output.h" | 30 | #include "trace_output.h" |
29 | 31 | ||
32 | #ifdef CONFIG_BLK_DEV_IO_TRACE | ||
33 | |||
30 | static unsigned int blktrace_seq __read_mostly = 1; | 34 | static unsigned int blktrace_seq __read_mostly = 1; |
31 | 35 | ||
32 | static struct trace_array *blk_tr; | 36 | static struct trace_array *blk_tr; |
@@ -665,12 +669,12 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, | |||
665 | 669 | ||
666 | if (blk_pc_request(rq)) { | 670 | if (blk_pc_request(rq)) { |
667 | what |= BLK_TC_ACT(BLK_TC_PC); | 671 | what |= BLK_TC_ACT(BLK_TC_PC); |
668 | __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, | 672 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, |
669 | rq->cmd_len, rq->cmd); | 673 | what, rq->errors, rq->cmd_len, rq->cmd); |
670 | } else { | 674 | } else { |
671 | what |= BLK_TC_ACT(BLK_TC_FS); | 675 | what |= BLK_TC_ACT(BLK_TC_FS); |
672 | __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, | 676 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, |
673 | rw, what, rq->errors, 0, NULL); | 677 | what, rq->errors, 0, NULL); |
674 | } | 678 | } |
675 | } | 679 | } |
676 | 680 | ||
@@ -877,11 +881,11 @@ void blk_add_driver_data(struct request_queue *q, | |||
877 | return; | 881 | return; |
878 | 882 | ||
879 | if (blk_pc_request(rq)) | 883 | if (blk_pc_request(rq)) |
880 | __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, | 884 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, |
881 | rq->errors, len, data); | 885 | BLK_TA_DRV_DATA, rq->errors, len, data); |
882 | else | 886 | else |
883 | __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, | 887 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, |
884 | 0, BLK_TA_DRV_DATA, rq->errors, len, data); | 888 | BLK_TA_DRV_DATA, rq->errors, len, data); |
885 | } | 889 | } |
886 | EXPORT_SYMBOL_GPL(blk_add_driver_data); | 890 | EXPORT_SYMBOL_GPL(blk_add_driver_data); |
887 | 891 | ||
@@ -1658,3 +1662,72 @@ int blk_trace_init_sysfs(struct device *dev) | |||
1658 | return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); | 1662 | return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); |
1659 | } | 1663 | } |
1660 | 1664 | ||
1665 | #endif /* CONFIG_BLK_DEV_IO_TRACE */ | ||
1666 | |||
1667 | #ifdef CONFIG_EVENT_TRACING | ||
1668 | |||
1669 | void blk_dump_cmd(char *buf, struct request *rq) | ||
1670 | { | ||
1671 | int i, end; | ||
1672 | int len = rq->cmd_len; | ||
1673 | unsigned char *cmd = rq->cmd; | ||
1674 | |||
1675 | if (!blk_pc_request(rq)) { | ||
1676 | buf[0] = '\0'; | ||
1677 | return; | ||
1678 | } | ||
1679 | |||
1680 | for (end = len - 1; end >= 0; end--) | ||
1681 | if (cmd[end]) | ||
1682 | break; | ||
1683 | end++; | ||
1684 | |||
1685 | for (i = 0; i < len; i++) { | ||
1686 | buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); | ||
1687 | if (i == end && end != len - 1) { | ||
1688 | sprintf(buf, " .."); | ||
1689 | break; | ||
1690 | } | ||
1691 | } | ||
1692 | } | ||
1693 | |||
1694 | void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | ||
1695 | { | ||
1696 | int i = 0; | ||
1697 | |||
1698 | if (rw & WRITE) | ||
1699 | rwbs[i++] = 'W'; | ||
1700 | else if (rw & 1 << BIO_RW_DISCARD) | ||
1701 | rwbs[i++] = 'D'; | ||
1702 | else if (bytes) | ||
1703 | rwbs[i++] = 'R'; | ||
1704 | else | ||
1705 | rwbs[i++] = 'N'; | ||
1706 | |||
1707 | if (rw & 1 << BIO_RW_AHEAD) | ||
1708 | rwbs[i++] = 'A'; | ||
1709 | if (rw & 1 << BIO_RW_BARRIER) | ||
1710 | rwbs[i++] = 'B'; | ||
1711 | if (rw & 1 << BIO_RW_SYNCIO) | ||
1712 | rwbs[i++] = 'S'; | ||
1713 | if (rw & 1 << BIO_RW_META) | ||
1714 | rwbs[i++] = 'M'; | ||
1715 | |||
1716 | rwbs[i] = '\0'; | ||
1717 | } | ||
1718 | |||
1719 | void blk_fill_rwbs_rq(char *rwbs, struct request *rq) | ||
1720 | { | ||
1721 | int rw = rq->cmd_flags & 0x03; | ||
1722 | int bytes; | ||
1723 | |||
1724 | if (blk_discard_rq(rq)) | ||
1725 | rw |= (1 << BIO_RW_DISCARD); | ||
1726 | |||
1727 | bytes = blk_rq_bytes(rq); | ||
1728 | |||
1729 | blk_fill_rwbs(rwbs, rw, bytes); | ||
1730 | } | ||
1731 | |||
1732 | #endif /* CONFIG_EVENT_TRACING */ | ||
1733 | |||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 140699a9a8a7..bb60732ade0c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <trace/events/sched.h> | 32 | #include <trace/events/sched.h> |
33 | 33 | ||
34 | #include <asm/ftrace.h> | 34 | #include <asm/ftrace.h> |
35 | #include <asm/setup.h> | ||
35 | 36 | ||
36 | #include "trace_output.h" | 37 | #include "trace_output.h" |
37 | #include "trace_stat.h" | 38 | #include "trace_stat.h" |
@@ -598,7 +599,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) | |||
598 | local_irq_save(flags); | 599 | local_irq_save(flags); |
599 | 600 | ||
600 | stat = &__get_cpu_var(ftrace_profile_stats); | 601 | stat = &__get_cpu_var(ftrace_profile_stats); |
601 | if (!stat->hash) | 602 | if (!stat->hash || !ftrace_profile_enabled) |
602 | goto out; | 603 | goto out; |
603 | 604 | ||
604 | rec = ftrace_find_profiled_func(stat, ip); | 605 | rec = ftrace_find_profiled_func(stat, ip); |
@@ -629,7 +630,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) | |||
629 | 630 | ||
630 | local_irq_save(flags); | 631 | local_irq_save(flags); |
631 | stat = &__get_cpu_var(ftrace_profile_stats); | 632 | stat = &__get_cpu_var(ftrace_profile_stats); |
632 | if (!stat->hash) | 633 | if (!stat->hash || !ftrace_profile_enabled) |
633 | goto out; | 634 | goto out; |
634 | 635 | ||
635 | calltime = trace->rettime - trace->calltime; | 636 | calltime = trace->rettime - trace->calltime; |
@@ -723,6 +724,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, | |||
723 | ftrace_profile_enabled = 1; | 724 | ftrace_profile_enabled = 1; |
724 | } else { | 725 | } else { |
725 | ftrace_profile_enabled = 0; | 726 | ftrace_profile_enabled = 0; |
727 | /* | ||
728 | * unregister_ftrace_profiler calls stop_machine | ||
729 | * so this acts like an synchronize_sched. | ||
730 | */ | ||
726 | unregister_ftrace_profiler(); | 731 | unregister_ftrace_profiler(); |
727 | } | 732 | } |
728 | } | 733 | } |
@@ -2369,6 +2374,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset) | |||
2369 | ftrace_set_regex(buf, len, reset, 0); | 2374 | ftrace_set_regex(buf, len, reset, 0); |
2370 | } | 2375 | } |
2371 | 2376 | ||
2377 | /* | ||
2378 | * command line interface to allow users to set filters on boot up. | ||
2379 | */ | ||
2380 | #define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE | ||
2381 | static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; | ||
2382 | static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; | ||
2383 | |||
2384 | static int __init set_ftrace_notrace(char *str) | ||
2385 | { | ||
2386 | strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); | ||
2387 | return 1; | ||
2388 | } | ||
2389 | __setup("ftrace_notrace=", set_ftrace_notrace); | ||
2390 | |||
2391 | static int __init set_ftrace_filter(char *str) | ||
2392 | { | ||
2393 | strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); | ||
2394 | return 1; | ||
2395 | } | ||
2396 | __setup("ftrace_filter=", set_ftrace_filter); | ||
2397 | |||
2398 | static void __init set_ftrace_early_filter(char *buf, int enable) | ||
2399 | { | ||
2400 | char *func; | ||
2401 | |||
2402 | while (buf) { | ||
2403 | func = strsep(&buf, ","); | ||
2404 | ftrace_set_regex(func, strlen(func), 0, enable); | ||
2405 | } | ||
2406 | } | ||
2407 | |||
2408 | static void __init set_ftrace_early_filters(void) | ||
2409 | { | ||
2410 | if (ftrace_filter_buf[0]) | ||
2411 | set_ftrace_early_filter(ftrace_filter_buf, 1); | ||
2412 | if (ftrace_notrace_buf[0]) | ||
2413 | set_ftrace_early_filter(ftrace_notrace_buf, 0); | ||
2414 | } | ||
2415 | |||
2372 | static int | 2416 | static int |
2373 | ftrace_regex_release(struct inode *inode, struct file *file, int enable) | 2417 | ftrace_regex_release(struct inode *inode, struct file *file, int enable) |
2374 | { | 2418 | { |
@@ -2829,6 +2873,8 @@ void __init ftrace_init(void) | |||
2829 | if (ret) | 2873 | if (ret) |
2830 | pr_warning("Failed to register trace ftrace module notifier\n"); | 2874 | pr_warning("Failed to register trace ftrace module notifier\n"); |
2831 | 2875 | ||
2876 | set_ftrace_early_filters(); | ||
2877 | |||
2832 | return; | 2878 | return; |
2833 | failed: | 2879 | failed: |
2834 | ftrace_disabled = 1; | 2880 | ftrace_disabled = 1; |
@@ -3172,12 +3218,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) | |||
3172 | } | 3218 | } |
3173 | 3219 | ||
3174 | if (t->ret_stack == NULL) { | 3220 | if (t->ret_stack == NULL) { |
3175 | t->curr_ret_stack = -1; | ||
3176 | /* Make sure IRQs see the -1 first: */ | ||
3177 | barrier(); | ||
3178 | t->ret_stack = ret_stack_list[start++]; | ||
3179 | atomic_set(&t->tracing_graph_pause, 0); | 3221 | atomic_set(&t->tracing_graph_pause, 0); |
3180 | atomic_set(&t->trace_overrun, 0); | 3222 | atomic_set(&t->trace_overrun, 0); |
3223 | t->curr_ret_stack = -1; | ||
3224 | /* Make sure the tasks see the -1 first: */ | ||
3225 | smp_wmb(); | ||
3226 | t->ret_stack = ret_stack_list[start++]; | ||
3181 | } | 3227 | } |
3182 | } while_each_thread(g, t); | 3228 | } while_each_thread(g, t); |
3183 | 3229 | ||
@@ -3235,8 +3281,10 @@ static int start_graph_tracing(void) | |||
3235 | return -ENOMEM; | 3281 | return -ENOMEM; |
3236 | 3282 | ||
3237 | /* The cpu_boot init_task->ret_stack will never be freed */ | 3283 | /* The cpu_boot init_task->ret_stack will never be freed */ |
3238 | for_each_online_cpu(cpu) | 3284 | for_each_online_cpu(cpu) { |
3239 | ftrace_graph_init_task(idle_task(cpu)); | 3285 | if (!idle_task(cpu)->ret_stack) |
3286 | ftrace_graph_init_task(idle_task(cpu)); | ||
3287 | } | ||
3240 | 3288 | ||
3241 | do { | 3289 | do { |
3242 | ret = alloc_retstack_tasklist(ret_stack_list); | 3290 | ret = alloc_retstack_tasklist(ret_stack_list); |
@@ -3328,18 +3376,25 @@ void unregister_ftrace_graph(void) | |||
3328 | /* Allocate a return stack for newly created task */ | 3376 | /* Allocate a return stack for newly created task */ |
3329 | void ftrace_graph_init_task(struct task_struct *t) | 3377 | void ftrace_graph_init_task(struct task_struct *t) |
3330 | { | 3378 | { |
3379 | /* Make sure we do not use the parent ret_stack */ | ||
3380 | t->ret_stack = NULL; | ||
3381 | |||
3331 | if (ftrace_graph_active) { | 3382 | if (ftrace_graph_active) { |
3332 | t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH | 3383 | struct ftrace_ret_stack *ret_stack; |
3384 | |||
3385 | ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH | ||
3333 | * sizeof(struct ftrace_ret_stack), | 3386 | * sizeof(struct ftrace_ret_stack), |
3334 | GFP_KERNEL); | 3387 | GFP_KERNEL); |
3335 | if (!t->ret_stack) | 3388 | if (!ret_stack) |
3336 | return; | 3389 | return; |
3337 | t->curr_ret_stack = -1; | 3390 | t->curr_ret_stack = -1; |
3338 | atomic_set(&t->tracing_graph_pause, 0); | 3391 | atomic_set(&t->tracing_graph_pause, 0); |
3339 | atomic_set(&t->trace_overrun, 0); | 3392 | atomic_set(&t->trace_overrun, 0); |
3340 | t->ftrace_timestamp = 0; | 3393 | t->ftrace_timestamp = 0; |
3341 | } else | 3394 | /* make curr_ret_stack visable before we add the ret_stack */ |
3342 | t->ret_stack = NULL; | 3395 | smp_wmb(); |
3396 | t->ret_stack = ret_stack; | ||
3397 | } | ||
3343 | } | 3398 | } |
3344 | 3399 | ||
3345 | void ftrace_graph_exit_task(struct task_struct *t) | 3400 | void ftrace_graph_exit_task(struct task_struct *t) |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 16b24d49604c..dc4dc70171ce 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/debugfs.h> | 10 | #include <linux/debugfs.h> |
11 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
12 | #include <linux/hardirq.h> | 12 | #include <linux/hardirq.h> |
13 | #include <linux/kmemcheck.h> | ||
13 | #include <linux/module.h> | 14 | #include <linux/module.h> |
14 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
15 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
@@ -370,6 +371,9 @@ static inline int test_time_stamp(u64 delta) | |||
370 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 371 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
371 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 372 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
372 | 373 | ||
374 | /* Max number of timestamps that can fit on a page */ | ||
375 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP) | ||
376 | |||
373 | int ring_buffer_print_page_header(struct trace_seq *s) | 377 | int ring_buffer_print_page_header(struct trace_seq *s) |
374 | { | 378 | { |
375 | struct buffer_data_page field; | 379 | struct buffer_data_page field; |
@@ -423,6 +427,8 @@ struct ring_buffer { | |||
423 | atomic_t record_disabled; | 427 | atomic_t record_disabled; |
424 | cpumask_var_t cpumask; | 428 | cpumask_var_t cpumask; |
425 | 429 | ||
430 | struct lock_class_key *reader_lock_key; | ||
431 | |||
426 | struct mutex mutex; | 432 | struct mutex mutex; |
427 | 433 | ||
428 | struct ring_buffer_per_cpu **buffers; | 434 | struct ring_buffer_per_cpu **buffers; |
@@ -562,6 +568,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
562 | cpu_buffer->cpu = cpu; | 568 | cpu_buffer->cpu = cpu; |
563 | cpu_buffer->buffer = buffer; | 569 | cpu_buffer->buffer = buffer; |
564 | spin_lock_init(&cpu_buffer->reader_lock); | 570 | spin_lock_init(&cpu_buffer->reader_lock); |
571 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | ||
565 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 572 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; |
566 | INIT_LIST_HEAD(&cpu_buffer->pages); | 573 | INIT_LIST_HEAD(&cpu_buffer->pages); |
567 | 574 | ||
@@ -632,7 +639,8 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
632 | * when the buffer wraps. If this flag is not set, the buffer will | 639 | * when the buffer wraps. If this flag is not set, the buffer will |
633 | * drop data when the tail hits the head. | 640 | * drop data when the tail hits the head. |
634 | */ | 641 | */ |
635 | struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | 642 | struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, |
643 | struct lock_class_key *key) | ||
636 | { | 644 | { |
637 | struct ring_buffer *buffer; | 645 | struct ring_buffer *buffer; |
638 | int bsize; | 646 | int bsize; |
@@ -655,6 +663,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
655 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 663 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
656 | buffer->flags = flags; | 664 | buffer->flags = flags; |
657 | buffer->clock = trace_clock_local; | 665 | buffer->clock = trace_clock_local; |
666 | buffer->reader_lock_key = key; | ||
658 | 667 | ||
659 | /* need at least two pages */ | 668 | /* need at least two pages */ |
660 | if (buffer->pages == 1) | 669 | if (buffer->pages == 1) |
@@ -712,7 +721,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
712 | kfree(buffer); | 721 | kfree(buffer); |
713 | return NULL; | 722 | return NULL; |
714 | } | 723 | } |
715 | EXPORT_SYMBOL_GPL(ring_buffer_alloc); | 724 | EXPORT_SYMBOL_GPL(__ring_buffer_alloc); |
716 | 725 | ||
717 | /** | 726 | /** |
718 | * ring_buffer_free - free a ring buffer. | 727 | * ring_buffer_free - free a ring buffer. |
@@ -1262,6 +1271,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1262 | if (tail < BUF_PAGE_SIZE) { | 1271 | if (tail < BUF_PAGE_SIZE) { |
1263 | /* Mark the rest of the page with padding */ | 1272 | /* Mark the rest of the page with padding */ |
1264 | event = __rb_page_index(tail_page, tail); | 1273 | event = __rb_page_index(tail_page, tail); |
1274 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1265 | rb_event_set_padding(event); | 1275 | rb_event_set_padding(event); |
1266 | } | 1276 | } |
1267 | 1277 | ||
@@ -1319,6 +1329,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1319 | return NULL; | 1329 | return NULL; |
1320 | 1330 | ||
1321 | event = __rb_page_index(tail_page, tail); | 1331 | event = __rb_page_index(tail_page, tail); |
1332 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1322 | rb_update_event(event, type, length); | 1333 | rb_update_event(event, type, length); |
1323 | 1334 | ||
1324 | /* The passed in type is zero for DATA */ | 1335 | /* The passed in type is zero for DATA */ |
@@ -1335,6 +1346,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1335 | return event; | 1346 | return event; |
1336 | } | 1347 | } |
1337 | 1348 | ||
1349 | static inline int | ||
1350 | rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | ||
1351 | struct ring_buffer_event *event) | ||
1352 | { | ||
1353 | unsigned long new_index, old_index; | ||
1354 | struct buffer_page *bpage; | ||
1355 | unsigned long index; | ||
1356 | unsigned long addr; | ||
1357 | |||
1358 | new_index = rb_event_index(event); | ||
1359 | old_index = new_index + rb_event_length(event); | ||
1360 | addr = (unsigned long)event; | ||
1361 | addr &= PAGE_MASK; | ||
1362 | |||
1363 | bpage = cpu_buffer->tail_page; | ||
1364 | |||
1365 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | ||
1366 | /* | ||
1367 | * This is on the tail page. It is possible that | ||
1368 | * a write could come in and move the tail page | ||
1369 | * and write to the next page. That is fine | ||
1370 | * because we just shorten what is on this page. | ||
1371 | */ | ||
1372 | index = local_cmpxchg(&bpage->write, old_index, new_index); | ||
1373 | if (index == old_index) | ||
1374 | return 1; | ||
1375 | } | ||
1376 | |||
1377 | /* could not discard */ | ||
1378 | return 0; | ||
1379 | } | ||
1380 | |||
1338 | static int | 1381 | static int |
1339 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 1382 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
1340 | u64 *ts, u64 *delta) | 1383 | u64 *ts, u64 *delta) |
@@ -1377,16 +1420,23 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1377 | event->array[0] = *delta >> TS_SHIFT; | 1420 | event->array[0] = *delta >> TS_SHIFT; |
1378 | } else { | 1421 | } else { |
1379 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1422 | cpu_buffer->commit_page->page->time_stamp = *ts; |
1380 | event->time_delta = 0; | 1423 | /* try to discard, since we do not need this */ |
1381 | event->array[0] = 0; | 1424 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1425 | /* nope, just zero it */ | ||
1426 | event->time_delta = 0; | ||
1427 | event->array[0] = 0; | ||
1428 | } | ||
1382 | } | 1429 | } |
1383 | cpu_buffer->write_stamp = *ts; | 1430 | cpu_buffer->write_stamp = *ts; |
1384 | /* let the caller know this was the commit */ | 1431 | /* let the caller know this was the commit */ |
1385 | ret = 1; | 1432 | ret = 1; |
1386 | } else { | 1433 | } else { |
1387 | /* Darn, this is just wasted space */ | 1434 | /* Try to discard the event */ |
1388 | event->time_delta = 0; | 1435 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1389 | event->array[0] = 0; | 1436 | /* Darn, this is just wasted space */ |
1437 | event->time_delta = 0; | ||
1438 | event->array[0] = 0; | ||
1439 | } | ||
1390 | ret = 0; | 1440 | ret = 0; |
1391 | } | 1441 | } |
1392 | 1442 | ||
@@ -1682,10 +1732,6 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1682 | struct ring_buffer_event *event) | 1732 | struct ring_buffer_event *event) |
1683 | { | 1733 | { |
1684 | struct ring_buffer_per_cpu *cpu_buffer; | 1734 | struct ring_buffer_per_cpu *cpu_buffer; |
1685 | unsigned long new_index, old_index; | ||
1686 | struct buffer_page *bpage; | ||
1687 | unsigned long index; | ||
1688 | unsigned long addr; | ||
1689 | int cpu; | 1735 | int cpu; |
1690 | 1736 | ||
1691 | /* The event is discarded regardless */ | 1737 | /* The event is discarded regardless */ |
@@ -1701,24 +1747,8 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1701 | cpu = smp_processor_id(); | 1747 | cpu = smp_processor_id(); |
1702 | cpu_buffer = buffer->buffers[cpu]; | 1748 | cpu_buffer = buffer->buffers[cpu]; |
1703 | 1749 | ||
1704 | new_index = rb_event_index(event); | 1750 | if (!rb_try_to_discard(cpu_buffer, event)) |
1705 | old_index = new_index + rb_event_length(event); | 1751 | goto out; |
1706 | addr = (unsigned long)event; | ||
1707 | addr &= PAGE_MASK; | ||
1708 | |||
1709 | bpage = cpu_buffer->tail_page; | ||
1710 | |||
1711 | if (bpage == (void *)addr && rb_page_write(bpage) == old_index) { | ||
1712 | /* | ||
1713 | * This is on the tail page. It is possible that | ||
1714 | * a write could come in and move the tail page | ||
1715 | * and write to the next page. That is fine | ||
1716 | * because we just shorten what is on this page. | ||
1717 | */ | ||
1718 | index = local_cmpxchg(&bpage->write, old_index, new_index); | ||
1719 | if (index == old_index) | ||
1720 | goto out; | ||
1721 | } | ||
1722 | 1752 | ||
1723 | /* | 1753 | /* |
1724 | * The commit is still visible by the reader, so we | 1754 | * The commit is still visible by the reader, so we |
@@ -2253,8 +2283,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
2253 | * Check if we are at the end of the buffer. | 2283 | * Check if we are at the end of the buffer. |
2254 | */ | 2284 | */ |
2255 | if (iter->head >= rb_page_size(iter->head_page)) { | 2285 | if (iter->head >= rb_page_size(iter->head_page)) { |
2256 | if (RB_WARN_ON(buffer, | 2286 | /* discarded commits can make the page empty */ |
2257 | iter->head_page == cpu_buffer->commit_page)) | 2287 | if (iter->head_page == cpu_buffer->commit_page) |
2258 | return; | 2288 | return; |
2259 | rb_inc_iter(iter); | 2289 | rb_inc_iter(iter); |
2260 | return; | 2290 | return; |
@@ -2297,12 +2327,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2297 | /* | 2327 | /* |
2298 | * We repeat when a timestamp is encountered. It is possible | 2328 | * We repeat when a timestamp is encountered. It is possible |
2299 | * to get multiple timestamps from an interrupt entering just | 2329 | * to get multiple timestamps from an interrupt entering just |
2300 | * as one timestamp is about to be written. The max times | 2330 | * as one timestamp is about to be written, or from discarded |
2301 | * that this can happen is the number of nested interrupts we | 2331 | * commits. The most that we can have is the number on a single page. |
2302 | * can have. Nesting 10 deep of interrupts is clearly | ||
2303 | * an anomaly. | ||
2304 | */ | 2332 | */ |
2305 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) | 2333 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) |
2306 | return NULL; | 2334 | return NULL; |
2307 | 2335 | ||
2308 | reader = rb_get_reader_page(cpu_buffer); | 2336 | reader = rb_get_reader_page(cpu_buffer); |
@@ -2368,14 +2396,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2368 | 2396 | ||
2369 | again: | 2397 | again: |
2370 | /* | 2398 | /* |
2371 | * We repeat when a timestamp is encountered. It is possible | 2399 | * We repeat when a timestamp is encountered. |
2372 | * to get multiple timestamps from an interrupt entering just | 2400 | * We can get multiple timestamps by nested interrupts or also |
2373 | * as one timestamp is about to be written. The max times | 2401 | * if filtering is on (discarding commits). Since discarding |
2374 | * that this can happen is the number of nested interrupts we | 2402 | * commits can be frequent we can get a lot of timestamps. |
2375 | * can have. Nesting 10 deep of interrupts is clearly | 2403 | * But we limit them by not adding timestamps if they begin |
2376 | * an anomaly. | 2404 | * at the start of a page. |
2377 | */ | 2405 | */ |
2378 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) | 2406 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) |
2379 | return NULL; | 2407 | return NULL; |
2380 | 2408 | ||
2381 | if (rb_per_cpu_empty(cpu_buffer)) | 2409 | if (rb_per_cpu_empty(cpu_buffer)) |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a3a8a87d7e91..c1878bfb2e1e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock = | |||
344 | /* | 344 | /* |
345 | * Copy the new maximum trace into the separate maximum-trace | 345 | * Copy the new maximum trace into the separate maximum-trace |
346 | * structure. (this way the maximum trace is permanently saved, | 346 | * structure. (this way the maximum trace is permanently saved, |
347 | * for later retrieval via /debugfs/tracing/latency_trace) | 347 | * for later retrieval via /sys/kernel/debug/tracing/latency_trace) |
348 | */ | 348 | */ |
349 | static void | 349 | static void |
350 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | 350 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) |
@@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = { | |||
2414 | 2414 | ||
2415 | static const char readme_msg[] = | 2415 | static const char readme_msg[] = |
2416 | "tracing mini-HOWTO:\n\n" | 2416 | "tracing mini-HOWTO:\n\n" |
2417 | "# mkdir /debug\n" | 2417 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" |
2418 | "# mount -t debugfs nodev /debug\n\n" | 2418 | "# cat /sys/kernel/debug/tracing/available_tracers\n" |
2419 | "# cat /debug/tracing/available_tracers\n" | ||
2420 | "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" | 2419 | "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" |
2421 | "# cat /debug/tracing/current_tracer\n" | 2420 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
2422 | "nop\n" | 2421 | "nop\n" |
2423 | "# echo sched_switch > /debug/tracing/current_tracer\n" | 2422 | "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n" |
2424 | "# cat /debug/tracing/current_tracer\n" | 2423 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
2425 | "sched_switch\n" | 2424 | "sched_switch\n" |
2426 | "# cat /debug/tracing/trace_options\n" | 2425 | "# cat /sys/kernel/debug/tracing/trace_options\n" |
2427 | "noprint-parent nosym-offset nosym-addr noverbose\n" | 2426 | "noprint-parent nosym-offset nosym-addr noverbose\n" |
2428 | "# echo print-parent > /debug/tracing/trace_options\n" | 2427 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" |
2429 | "# echo 1 > /debug/tracing/tracing_enabled\n" | 2428 | "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n" |
2430 | "# cat /debug/tracing/trace > /tmp/trace.txt\n" | 2429 | "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" |
2431 | "echo 0 > /debug/tracing/tracing_enabled\n" | 2430 | "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n" |
2432 | ; | 2431 | ; |
2433 | 2432 | ||
2434 | static ssize_t | 2433 | static ssize_t |
@@ -2826,6 +2825,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
2826 | /* trace pipe does not show start of buffer */ | 2825 | /* trace pipe does not show start of buffer */ |
2827 | cpumask_setall(iter->started); | 2826 | cpumask_setall(iter->started); |
2828 | 2827 | ||
2828 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
2829 | iter->iter_flags |= TRACE_FILE_LAT_FMT; | ||
2830 | |||
2829 | iter->cpu_file = cpu_file; | 2831 | iter->cpu_file = cpu_file; |
2830 | iter->tr = &global_trace; | 2832 | iter->tr = &global_trace; |
2831 | mutex_init(&iter->mutex); | 2833 | mutex_init(&iter->mutex); |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6c81f9c21426..aa08be69a1b6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -1050,12 +1050,13 @@ static void trace_module_remove_events(struct module *mod) | |||
1050 | struct ftrace_event_call *call, *p; | 1050 | struct ftrace_event_call *call, *p; |
1051 | bool found = false; | 1051 | bool found = false; |
1052 | 1052 | ||
1053 | down_write(&trace_event_mutex); | ||
1053 | list_for_each_entry_safe(call, p, &ftrace_events, list) { | 1054 | list_for_each_entry_safe(call, p, &ftrace_events, list) { |
1054 | if (call->mod == mod) { | 1055 | if (call->mod == mod) { |
1055 | found = true; | 1056 | found = true; |
1056 | ftrace_event_enable_disable(call, 0); | 1057 | ftrace_event_enable_disable(call, 0); |
1057 | if (call->event) | 1058 | if (call->event) |
1058 | unregister_ftrace_event(call->event); | 1059 | __unregister_ftrace_event(call->event); |
1059 | debugfs_remove_recursive(call->dir); | 1060 | debugfs_remove_recursive(call->dir); |
1060 | list_del(&call->list); | 1061 | list_del(&call->list); |
1061 | trace_destroy_fields(call); | 1062 | trace_destroy_fields(call); |
@@ -1079,6 +1080,7 @@ static void trace_module_remove_events(struct module *mod) | |||
1079 | */ | 1080 | */ |
1080 | if (found) | 1081 | if (found) |
1081 | tracing_reset_current_online_cpus(); | 1082 | tracing_reset_current_online_cpus(); |
1083 | up_write(&trace_event_mutex); | ||
1082 | } | 1084 | } |
1083 | 1085 | ||
1084 | static int trace_module_notify(struct notifier_block *self, | 1086 | static int trace_module_notify(struct notifier_block *self, |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index a7430b16d243..db6e54bdb596 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -478,12 +478,12 @@ enum { | |||
478 | 478 | ||
479 | static int is_string_field(const char *type) | 479 | static int is_string_field(const char *type) |
480 | { | 480 | { |
481 | if (strstr(type, "__data_loc") && strstr(type, "char")) | ||
482 | return FILTER_DYN_STRING; | ||
483 | |||
481 | if (strchr(type, '[') && strstr(type, "char")) | 484 | if (strchr(type, '[') && strstr(type, "char")) |
482 | return FILTER_STATIC_STRING; | 485 | return FILTER_STATIC_STRING; |
483 | 486 | ||
484 | if (!strcmp(type, "__str_loc")) | ||
485 | return FILTER_DYN_STRING; | ||
486 | |||
487 | return 0; | 487 | return 0; |
488 | } | 488 | } |
489 | 489 | ||
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 10f6ad7d85f6..8b592418d8b2 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) | |||
65 | if (!current->ret_stack) | 65 | if (!current->ret_stack) |
66 | return -EBUSY; | 66 | return -EBUSY; |
67 | 67 | ||
68 | /* | ||
69 | * We must make sure the ret_stack is tested before we read | ||
70 | * anything else. | ||
71 | */ | ||
72 | smp_rmb(); | ||
73 | |||
68 | /* The return trace stack is full */ | 74 | /* The return trace stack is full */ |
69 | if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { | 75 | if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { |
70 | atomic_inc(¤t->trace_overrun); | 76 | atomic_inc(¤t->trace_overrun); |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c12d95db2f56..7938f3ae93e3 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -14,9 +14,10 @@ | |||
14 | /* must be a power of 2 */ | 14 | /* must be a power of 2 */ |
15 | #define EVENT_HASHSIZE 128 | 15 | #define EVENT_HASHSIZE 128 |
16 | 16 | ||
17 | static DECLARE_RWSEM(trace_event_mutex); | 17 | DECLARE_RWSEM(trace_event_mutex); |
18 | 18 | ||
19 | DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq); | 19 | DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq); |
20 | EXPORT_PER_CPU_SYMBOL(ftrace_event_seq); | ||
20 | 21 | ||
21 | static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; | 22 | static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; |
22 | 23 | ||
@@ -99,6 +100,38 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) | |||
99 | } | 100 | } |
100 | EXPORT_SYMBOL_GPL(trace_seq_printf); | 101 | EXPORT_SYMBOL_GPL(trace_seq_printf); |
101 | 102 | ||
103 | /** | ||
104 | * trace_seq_vprintf - sequence printing of trace information | ||
105 | * @s: trace sequence descriptor | ||
106 | * @fmt: printf format string | ||
107 | * | ||
108 | * The tracer may use either sequence operations or its own | ||
109 | * copy to user routines. To simplify formating of a trace | ||
110 | * trace_seq_printf is used to store strings into a special | ||
111 | * buffer (@s). Then the output may be either used by | ||
112 | * the sequencer or pulled into another buffer. | ||
113 | */ | ||
114 | int | ||
115 | trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) | ||
116 | { | ||
117 | int len = (PAGE_SIZE - 1) - s->len; | ||
118 | int ret; | ||
119 | |||
120 | if (!len) | ||
121 | return 0; | ||
122 | |||
123 | ret = vsnprintf(s->buffer + s->len, len, fmt, args); | ||
124 | |||
125 | /* If we can't write it all, don't bother writing anything */ | ||
126 | if (ret >= len) | ||
127 | return 0; | ||
128 | |||
129 | s->len += ret; | ||
130 | |||
131 | return len; | ||
132 | } | ||
133 | EXPORT_SYMBOL_GPL(trace_seq_vprintf); | ||
134 | |||
102 | int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) | 135 | int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) |
103 | { | 136 | { |
104 | int len = (PAGE_SIZE - 1) - s->len; | 137 | int len = (PAGE_SIZE - 1) - s->len; |
@@ -222,10 +255,9 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, | |||
222 | { | 255 | { |
223 | unsigned long mask; | 256 | unsigned long mask; |
224 | const char *str; | 257 | const char *str; |
258 | const char *ret = p->buffer + p->len; | ||
225 | int i; | 259 | int i; |
226 | 260 | ||
227 | trace_seq_init(p); | ||
228 | |||
229 | for (i = 0; flag_array[i].name && flags; i++) { | 261 | for (i = 0; flag_array[i].name && flags; i++) { |
230 | 262 | ||
231 | mask = flag_array[i].mask; | 263 | mask = flag_array[i].mask; |
@@ -248,16 +280,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, | |||
248 | 280 | ||
249 | trace_seq_putc(p, 0); | 281 | trace_seq_putc(p, 0); |
250 | 282 | ||
251 | return p->buffer; | 283 | return ret; |
252 | } | 284 | } |
285 | EXPORT_SYMBOL(ftrace_print_flags_seq); | ||
253 | 286 | ||
254 | const char * | 287 | const char * |
255 | ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, | 288 | ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, |
256 | const struct trace_print_flags *symbol_array) | 289 | const struct trace_print_flags *symbol_array) |
257 | { | 290 | { |
258 | int i; | 291 | int i; |
259 | 292 | const char *ret = p->buffer + p->len; | |
260 | trace_seq_init(p); | ||
261 | 293 | ||
262 | for (i = 0; symbol_array[i].name; i++) { | 294 | for (i = 0; symbol_array[i].name; i++) { |
263 | 295 | ||
@@ -273,8 +305,9 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, | |||
273 | 305 | ||
274 | trace_seq_putc(p, 0); | 306 | trace_seq_putc(p, 0); |
275 | 307 | ||
276 | return p->buffer; | 308 | return ret; |
277 | } | 309 | } |
310 | EXPORT_SYMBOL(ftrace_print_symbols_seq); | ||
278 | 311 | ||
279 | #ifdef CONFIG_KRETPROBES | 312 | #ifdef CONFIG_KRETPROBES |
280 | static inline const char *kretprobed(const char *name) | 313 | static inline const char *kretprobed(const char *name) |
@@ -386,17 +419,20 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, | |||
386 | 419 | ||
387 | if (ip == ULONG_MAX || !ret) | 420 | if (ip == ULONG_MAX || !ret) |
388 | break; | 421 | break; |
389 | if (i && ret) | 422 | if (ret) |
390 | ret = trace_seq_puts(s, " <- "); | 423 | ret = trace_seq_puts(s, " => "); |
391 | if (!ip) { | 424 | if (!ip) { |
392 | if (ret) | 425 | if (ret) |
393 | ret = trace_seq_puts(s, "??"); | 426 | ret = trace_seq_puts(s, "??"); |
427 | if (ret) | ||
428 | ret = trace_seq_puts(s, "\n"); | ||
394 | continue; | 429 | continue; |
395 | } | 430 | } |
396 | if (!ret) | 431 | if (!ret) |
397 | break; | 432 | break; |
398 | if (ret) | 433 | if (ret) |
399 | ret = seq_print_user_ip(s, mm, ip, sym_flags); | 434 | ret = seq_print_user_ip(s, mm, ip, sym_flags); |
435 | ret = trace_seq_puts(s, "\n"); | ||
400 | } | 436 | } |
401 | 437 | ||
402 | if (mm) | 438 | if (mm) |
@@ -666,6 +702,16 @@ int register_ftrace_event(struct trace_event *event) | |||
666 | } | 702 | } |
667 | EXPORT_SYMBOL_GPL(register_ftrace_event); | 703 | EXPORT_SYMBOL_GPL(register_ftrace_event); |
668 | 704 | ||
705 | /* | ||
706 | * Used by module code with the trace_event_mutex held for write. | ||
707 | */ | ||
708 | int __unregister_ftrace_event(struct trace_event *event) | ||
709 | { | ||
710 | hlist_del(&event->node); | ||
711 | list_del(&event->list); | ||
712 | return 0; | ||
713 | } | ||
714 | |||
669 | /** | 715 | /** |
670 | * unregister_ftrace_event - remove a no longer used event | 716 | * unregister_ftrace_event - remove a no longer used event |
671 | * @event: the event to remove | 717 | * @event: the event to remove |
@@ -673,8 +719,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_event); | |||
673 | int unregister_ftrace_event(struct trace_event *event) | 719 | int unregister_ftrace_event(struct trace_event *event) |
674 | { | 720 | { |
675 | down_write(&trace_event_mutex); | 721 | down_write(&trace_event_mutex); |
676 | hlist_del(&event->node); | 722 | __unregister_ftrace_event(event); |
677 | list_del(&event->list); | ||
678 | up_write(&trace_event_mutex); | 723 | up_write(&trace_event_mutex); |
679 | 724 | ||
680 | return 0; | 725 | return 0; |
@@ -972,16 +1017,16 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, | |||
972 | 1017 | ||
973 | trace_assign_type(field, iter->ent); | 1018 | trace_assign_type(field, iter->ent); |
974 | 1019 | ||
1020 | if (!trace_seq_puts(s, "<stack trace>\n")) | ||
1021 | goto partial; | ||
975 | for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { | 1022 | for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { |
976 | if (!field->caller[i]) | 1023 | if (!field->caller[i] || (field->caller[i] == ULONG_MAX)) |
977 | break; | 1024 | break; |
978 | if (i) { | 1025 | if (!trace_seq_puts(s, " => ")) |
979 | if (!trace_seq_puts(s, " <= ")) | 1026 | goto partial; |
980 | goto partial; | ||
981 | 1027 | ||
982 | if (!seq_print_ip_sym(s, field->caller[i], flags)) | 1028 | if (!seq_print_ip_sym(s, field->caller[i], flags)) |
983 | goto partial; | 1029 | goto partial; |
984 | } | ||
985 | if (!trace_seq_puts(s, "\n")) | 1030 | if (!trace_seq_puts(s, "\n")) |
986 | goto partial; | 1031 | goto partial; |
987 | } | 1032 | } |
@@ -1009,10 +1054,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, | |||
1009 | 1054 | ||
1010 | trace_assign_type(field, iter->ent); | 1055 | trace_assign_type(field, iter->ent); |
1011 | 1056 | ||
1012 | if (!seq_print_userip_objs(field, s, flags)) | 1057 | if (!trace_seq_puts(s, "<user stack trace>\n")) |
1013 | goto partial; | 1058 | goto partial; |
1014 | 1059 | ||
1015 | if (!trace_seq_putc(s, '\n')) | 1060 | if (!seq_print_userip_objs(field, s, flags)) |
1016 | goto partial; | 1061 | goto partial; |
1017 | 1062 | ||
1018 | return TRACE_TYPE_HANDLED; | 1063 | return TRACE_TYPE_HANDLED; |
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index ac240e76eb01..d38bec4a9c30 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h | |||
@@ -27,6 +27,10 @@ extern struct trace_event *ftrace_find_event(int type); | |||
27 | extern enum print_line_t trace_nop_print(struct trace_iterator *iter, | 27 | extern enum print_line_t trace_nop_print(struct trace_iterator *iter, |
28 | int flags); | 28 | int flags); |
29 | 29 | ||
30 | /* used by module unregistering */ | ||
31 | extern int __unregister_ftrace_event(struct trace_event *event); | ||
32 | extern struct rw_semaphore trace_event_mutex; | ||
33 | |||
30 | #define MAX_MEMHEX_BYTES 8 | 34 | #define MAX_MEMHEX_BYTES 8 |
31 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) | 35 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) |
32 | 36 | ||
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 1796f00524e1..2d7aebd71dbd 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -265,7 +265,7 @@ static int t_show(struct seq_file *m, void *v) | |||
265 | seq_printf(m, " Depth Size Location" | 265 | seq_printf(m, " Depth Size Location" |
266 | " (%d entries)\n" | 266 | " (%d entries)\n" |
267 | " ----- ---- --------\n", | 267 | " ----- ---- --------\n", |
268 | max_stack_trace.nr_entries); | 268 | max_stack_trace.nr_entries - 1); |
269 | 269 | ||
270 | if (!stack_tracer_enabled && !max_stack_size) | 270 | if (!stack_tracer_enabled && !max_stack_size) |
271 | print_disabled(m); | 271 | print_disabled(m); |
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index e04b76cc238a..f6693969287d 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c | |||
@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused) | |||
203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
204 | hrtimer->function = stack_trace_timer_fn; | 204 | hrtimer->function = stack_trace_timer_fn; |
205 | 205 | ||
206 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); | 206 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), |
207 | HRTIMER_MODE_REL_PINNED); | ||
207 | } | 208 | } |
208 | 209 | ||
209 | static void start_stack_timers(void) | 210 | static void start_stack_timers(void) |