aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-06-17 06:52:15 -0400
committerIngo Molnar <mingo@elte.hu>2009-06-17 06:56:49 -0400
commiteadb8a091b27a840de7450f84ecff5ef13476424 (patch)
tree58c3782d40def63baa8167f3d31e3048cb4c7660 /kernel/trace
parent73874005cd8800440be4299bd095387fff4b90ac (diff)
parent65795efbd380a832ae508b04dba8f8e53f0b84d9 (diff)
Merge branch 'linus' into tracing/hw-breakpoints
Conflicts: arch/x86/Kconfig arch/x86/kernel/traps.c arch/x86/power/cpu.c arch/x86/power/cpu_32.c kernel/Makefile Semantic conflict: arch/x86/kernel/hw_breakpoint.c Merge reason: Resolve the conflicts, move from put_cpu_no_sched() to put_cpu() in arch/x86/kernel/hw_breakpoint.c. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig70
-rw-r--r--kernel/trace/Makefile5
-rw-r--r--kernel/trace/blktrace.c91
-rw-r--r--kernel/trace/ftrace.c79
-rw-r--r--kernel/trace/ring_buffer.c114
-rw-r--r--kernel/trace/trace.c26
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--kernel/trace/trace_events_filter.c6
-rw-r--r--kernel/trace/trace_functions_graph.c6
-rw-r--r--kernel/trace/trace_output.c85
-rw-r--r--kernel/trace/trace_output.h4
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/trace/trace_sysprof.c3
13 files changed, 357 insertions, 138 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d7f01e6e8ba5..ae048a2dbbe8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -56,6 +56,13 @@ config CONTEXT_SWITCH_TRACER
56 select MARKERS 56 select MARKERS
57 bool 57 bool
58 58
59# All tracer options should select GENERIC_TRACER. For those options that are
60# enabled by all tracers (context switch and event tracer) they select TRACING.
61# This allows those options to appear when no other tracer is selected. But the
62# options do not appear when something else selects it. We need the two options
63# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
64# hidding of the automatic options options.
65
59config TRACING 66config TRACING
60 bool 67 bool
61 select DEBUG_FS 68 select DEBUG_FS
@@ -66,6 +73,10 @@ config TRACING
66 select BINARY_PRINTF 73 select BINARY_PRINTF
67 select EVENT_TRACING 74 select EVENT_TRACING
68 75
76config GENERIC_TRACER
77 bool
78 select TRACING
79
69# 80#
70# Minimum requirements an architecture has to meet for us to 81# Minimum requirements an architecture has to meet for us to
71# be able to offer generic tracing facilities: 82# be able to offer generic tracing facilities:
@@ -95,7 +106,7 @@ config FUNCTION_TRACER
95 depends on HAVE_FUNCTION_TRACER 106 depends on HAVE_FUNCTION_TRACER
96 select FRAME_POINTER 107 select FRAME_POINTER
97 select KALLSYMS 108 select KALLSYMS
98 select TRACING 109 select GENERIC_TRACER
99 select CONTEXT_SWITCH_TRACER 110 select CONTEXT_SWITCH_TRACER
100 help 111 help
101 Enable the kernel to trace every kernel function. This is done 112 Enable the kernel to trace every kernel function. This is done
@@ -126,7 +137,7 @@ config IRQSOFF_TRACER
126 depends on TRACE_IRQFLAGS_SUPPORT 137 depends on TRACE_IRQFLAGS_SUPPORT
127 depends on GENERIC_TIME 138 depends on GENERIC_TIME
128 select TRACE_IRQFLAGS 139 select TRACE_IRQFLAGS
129 select TRACING 140 select GENERIC_TRACER
130 select TRACER_MAX_TRACE 141 select TRACER_MAX_TRACE
131 help 142 help
132 This option measures the time spent in irqs-off critical 143 This option measures the time spent in irqs-off critical
@@ -136,7 +147,7 @@ config IRQSOFF_TRACER
136 disabled by default and can be runtime (re-)started 147 disabled by default and can be runtime (re-)started
137 via: 148 via:
138 149
139 echo 0 > /debugfs/tracing/tracing_max_latency 150 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
140 151
141 (Note that kernel size and overhead increases with this option 152 (Note that kernel size and overhead increases with this option
142 enabled. This option and the preempt-off timing option can be 153 enabled. This option and the preempt-off timing option can be
@@ -147,7 +158,7 @@ config PREEMPT_TRACER
147 default n 158 default n
148 depends on GENERIC_TIME 159 depends on GENERIC_TIME
149 depends on PREEMPT 160 depends on PREEMPT
150 select TRACING 161 select GENERIC_TRACER
151 select TRACER_MAX_TRACE 162 select TRACER_MAX_TRACE
152 help 163 help
153 This option measures the time spent in preemption off critical 164 This option measures the time spent in preemption off critical
@@ -157,7 +168,7 @@ config PREEMPT_TRACER
157 disabled by default and can be runtime (re-)started 168 disabled by default and can be runtime (re-)started
158 via: 169 via:
159 170
160 echo 0 > /debugfs/tracing/tracing_max_latency 171 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
161 172
162 (Note that kernel size and overhead increases with this option 173 (Note that kernel size and overhead increases with this option
163 enabled. This option and the irqs-off timing option can be 174 enabled. This option and the irqs-off timing option can be
@@ -166,7 +177,7 @@ config PREEMPT_TRACER
166config SYSPROF_TRACER 177config SYSPROF_TRACER
167 bool "Sysprof Tracer" 178 bool "Sysprof Tracer"
168 depends on X86 179 depends on X86
169 select TRACING 180 select GENERIC_TRACER
170 select CONTEXT_SWITCH_TRACER 181 select CONTEXT_SWITCH_TRACER
171 help 182 help
172 This tracer provides the trace needed by the 'Sysprof' userspace 183 This tracer provides the trace needed by the 'Sysprof' userspace
@@ -174,44 +185,33 @@ config SYSPROF_TRACER
174 185
175config SCHED_TRACER 186config SCHED_TRACER
176 bool "Scheduling Latency Tracer" 187 bool "Scheduling Latency Tracer"
177 select TRACING 188 select GENERIC_TRACER
178 select CONTEXT_SWITCH_TRACER 189 select CONTEXT_SWITCH_TRACER
179 select TRACER_MAX_TRACE 190 select TRACER_MAX_TRACE
180 help 191 help
181 This tracer tracks the latency of the highest priority task 192 This tracer tracks the latency of the highest priority task
182 to be scheduled in, starting from the point it has woken up. 193 to be scheduled in, starting from the point it has woken up.
183 194
184config ENABLE_CONTEXT_SWITCH_TRACER 195config ENABLE_DEFAULT_TRACERS
185 bool "Trace process context switches" 196 bool "Trace process context switches and events"
186 select TRACING 197 depends on !GENERIC_TRACER
187 select CONTEXT_SWITCH_TRACER
188 help
189 This tracer gets called from the context switch and records
190 all switching of tasks.
191
192config ENABLE_EVENT_TRACING
193 bool "Trace various events in the kernel"
194 select TRACING 198 select TRACING
195 help 199 help
196 This tracer hooks to various trace points in the kernel 200 This tracer hooks to various trace points in the kernel
197 allowing the user to pick and choose which trace point they 201 allowing the user to pick and choose which trace point they
198 want to trace. 202 want to trace. It also includes the sched_switch tracer plugin.
199
200 Note, all tracers enable event tracing. This option is
201 only a convenience to enable event tracing when no other
202 tracers are selected.
203 203
204config FTRACE_SYSCALLS 204config FTRACE_SYSCALLS
205 bool "Trace syscalls" 205 bool "Trace syscalls"
206 depends on HAVE_FTRACE_SYSCALLS 206 depends on HAVE_FTRACE_SYSCALLS
207 select TRACING 207 select GENERIC_TRACER
208 select KALLSYMS 208 select KALLSYMS
209 help 209 help
210 Basic tracer to catch the syscall entry and exit events. 210 Basic tracer to catch the syscall entry and exit events.
211 211
212config BOOT_TRACER 212config BOOT_TRACER
213 bool "Trace boot initcalls" 213 bool "Trace boot initcalls"
214 select TRACING 214 select GENERIC_TRACER
215 select CONTEXT_SWITCH_TRACER 215 select CONTEXT_SWITCH_TRACER
216 help 216 help
217 This tracer helps developers to optimize boot times: it records 217 This tracer helps developers to optimize boot times: it records
@@ -228,7 +228,7 @@ config BOOT_TRACER
228 228
229config TRACE_BRANCH_PROFILING 229config TRACE_BRANCH_PROFILING
230 bool 230 bool
231 select TRACING 231 select GENERIC_TRACER
232 232
233choice 233choice
234 prompt "Branch Profiling" 234 prompt "Branch Profiling"
@@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES
261 This tracer profiles all the the likely and unlikely macros 261 This tracer profiles all the the likely and unlikely macros
262 in the kernel. It will display the results in: 262 in the kernel. It will display the results in:
263 263
264 /debugfs/tracing/profile_annotated_branch 264 /sys/kernel/debug/tracing/profile_annotated_branch
265 265
266 Note: this will add a significant overhead, only turn this 266 Note: this will add a significant overhead, only turn this
267 on if you need to profile the system's use of these macros. 267 on if you need to profile the system's use of these macros.
@@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES
274 taken in the kernel is recorded whether it hit or miss. 274 taken in the kernel is recorded whether it hit or miss.
275 The results will be displayed in: 275 The results will be displayed in:
276 276
277 /debugfs/tracing/profile_branch 277 /sys/kernel/debug/tracing/profile_branch
278 278
279 This option also enables the likely/unlikely profiler. 279 This option also enables the likely/unlikely profiler.
280 280
@@ -308,7 +308,7 @@ config BRANCH_TRACER
308config POWER_TRACER 308config POWER_TRACER
309 bool "Trace power consumption behavior" 309 bool "Trace power consumption behavior"
310 depends on X86 310 depends on X86
311 select TRACING 311 select GENERIC_TRACER
312 help 312 help
313 This tracer helps developers to analyze and optimize the kernels 313 This tracer helps developers to analyze and optimize the kernels
314 power management decisions, specifically the C-state and P-state 314 power management decisions, specifically the C-state and P-state
@@ -344,7 +344,7 @@ config STACK_TRACER
344 select KALLSYMS 344 select KALLSYMS
345 help 345 help
346 This special tracer records the maximum stack footprint of the 346 This special tracer records the maximum stack footprint of the
347 kernel and displays it in debugfs/tracing/stack_trace. 347 kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
348 348
349 This tracer works by hooking into every function call that the 349 This tracer works by hooking into every function call that the
350 kernel executes, and keeping a maximum stack depth value and 350 kernel executes, and keeping a maximum stack depth value and
@@ -363,14 +363,14 @@ config STACK_TRACER
363config HW_BRANCH_TRACER 363config HW_BRANCH_TRACER
364 depends on HAVE_HW_BRANCH_TRACER 364 depends on HAVE_HW_BRANCH_TRACER
365 bool "Trace hw branches" 365 bool "Trace hw branches"
366 select TRACING 366 select GENERIC_TRACER
367 help 367 help
368 This tracer records all branches on the system in a circular 368 This tracer records all branches on the system in a circular
369 buffer giving access to the last N branches for each cpu. 369 buffer giving access to the last N branches for each cpu.
370 370
371config KMEMTRACE 371config KMEMTRACE
372 bool "Trace SLAB allocations" 372 bool "Trace SLAB allocations"
373 select TRACING 373 select GENERIC_TRACER
374 help 374 help
375 kmemtrace provides tracing for slab allocator functions, such as 375 kmemtrace provides tracing for slab allocator functions, such as
376 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected 376 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
@@ -390,7 +390,7 @@ config KMEMTRACE
390 390
391config WORKQUEUE_TRACER 391config WORKQUEUE_TRACER
392 bool "Trace workqueues" 392 bool "Trace workqueues"
393 select TRACING 393 select GENERIC_TRACER
394 help 394 help
395 The workqueue tracer provides some statistical informations 395 The workqueue tracer provides some statistical informations
396 about each cpu workqueue thread such as the number of the 396 about each cpu workqueue thread such as the number of the
@@ -406,7 +406,7 @@ config BLK_DEV_IO_TRACE
406 select RELAY 406 select RELAY
407 select DEBUG_FS 407 select DEBUG_FS
408 select TRACEPOINTS 408 select TRACEPOINTS
409 select TRACING 409 select GENERIC_TRACER
410 select STACKTRACE 410 select STACKTRACE
411 help 411 help
412 Say Y here if you want to be able to trace the block layer actions 412 Say Y here if you want to be able to trace the block layer actions
@@ -467,7 +467,7 @@ config FTRACE_SELFTEST
467 467
468config FTRACE_STARTUP_TEST 468config FTRACE_STARTUP_TEST
469 bool "Perform a startup test on ftrace" 469 bool "Perform a startup test on ftrace"
470 depends on TRACING 470 depends on GENERIC_TRACER
471 select FTRACE_SELFTEST 471 select FTRACE_SELFTEST
472 help 472 help
473 This option performs a series of startup tests on ftrace. On bootup 473 This option performs a series of startup tests on ftrace. On bootup
@@ -478,7 +478,7 @@ config FTRACE_STARTUP_TEST
478config MMIOTRACE 478config MMIOTRACE
479 bool "Memory mapped IO tracing" 479 bool "Memory mapped IO tracing"
480 depends on HAVE_MMIOTRACE_SUPPORT && PCI 480 depends on HAVE_MMIOTRACE_SUPPORT && PCI
481 select TRACING 481 select GENERIC_TRACER
482 help 482 help
483 Mmiotrace traces Memory Mapped I/O access and is meant for 483 Mmiotrace traces Memory Mapped I/O access and is meant for
484 debugging and reverse engineering. It is called from the ioremap 484 debugging and reverse engineering. It is called from the ioremap
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 658aace8c41e..ce3b1cd02732 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -45,7 +45,10 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_POWER_TRACER) += trace_power.o 45obj-$(CONFIG_POWER_TRACER) += trace_power.o
46obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 46obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
47obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 47obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
48obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 48obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
49ifeq ($(CONFIG_BLOCK),y)
50obj-$(CONFIG_EVENT_TRACING) += blktrace.o
51endif
49obj-$(CONFIG_EVENT_TRACING) += trace_events.o 52obj-$(CONFIG_EVENT_TRACING) += trace_events.o
50obj-$(CONFIG_EVENT_TRACING) += trace_export.o 53obj-$(CONFIG_EVENT_TRACING) += trace_export.o
51obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e3abf55bc8e5..39af8af6fc30 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,10 +23,14 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <trace/block.h>
27#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27
28#include <trace/events/block.h>
29
28#include "trace_output.h" 30#include "trace_output.h"
29 31
32#ifdef CONFIG_BLK_DEV_IO_TRACE
33
30static unsigned int blktrace_seq __read_mostly = 1; 34static unsigned int blktrace_seq __read_mostly = 1;
31 35
32static struct trace_array *blk_tr; 36static struct trace_array *blk_tr;
@@ -665,12 +669,12 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
665 669
666 if (blk_pc_request(rq)) { 670 if (blk_pc_request(rq)) {
667 what |= BLK_TC_ACT(BLK_TC_PC); 671 what |= BLK_TC_ACT(BLK_TC_PC);
668 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, 672 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
669 rq->cmd_len, rq->cmd); 673 what, rq->errors, rq->cmd_len, rq->cmd);
670 } else { 674 } else {
671 what |= BLK_TC_ACT(BLK_TC_FS); 675 what |= BLK_TC_ACT(BLK_TC_FS);
672 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, 676 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
673 rw, what, rq->errors, 0, NULL); 677 what, rq->errors, 0, NULL);
674 } 678 }
675} 679}
676 680
@@ -877,11 +881,11 @@ void blk_add_driver_data(struct request_queue *q,
877 return; 881 return;
878 882
879 if (blk_pc_request(rq)) 883 if (blk_pc_request(rq))
880 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, 884 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
881 rq->errors, len, data); 885 BLK_TA_DRV_DATA, rq->errors, len, data);
882 else 886 else
883 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, 887 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
884 0, BLK_TA_DRV_DATA, rq->errors, len, data); 888 BLK_TA_DRV_DATA, rq->errors, len, data);
885} 889}
886EXPORT_SYMBOL_GPL(blk_add_driver_data); 890EXPORT_SYMBOL_GPL(blk_add_driver_data);
887 891
@@ -1658,3 +1662,72 @@ int blk_trace_init_sysfs(struct device *dev)
1658 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); 1662 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
1659} 1663}
1660 1664
1665#endif /* CONFIG_BLK_DEV_IO_TRACE */
1666
1667#ifdef CONFIG_EVENT_TRACING
1668
1669void blk_dump_cmd(char *buf, struct request *rq)
1670{
1671 int i, end;
1672 int len = rq->cmd_len;
1673 unsigned char *cmd = rq->cmd;
1674
1675 if (!blk_pc_request(rq)) {
1676 buf[0] = '\0';
1677 return;
1678 }
1679
1680 for (end = len - 1; end >= 0; end--)
1681 if (cmd[end])
1682 break;
1683 end++;
1684
1685 for (i = 0; i < len; i++) {
1686 buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
1687 if (i == end && end != len - 1) {
1688 sprintf(buf, " ..");
1689 break;
1690 }
1691 }
1692}
1693
1694void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1695{
1696 int i = 0;
1697
1698 if (rw & WRITE)
1699 rwbs[i++] = 'W';
1700 else if (rw & 1 << BIO_RW_DISCARD)
1701 rwbs[i++] = 'D';
1702 else if (bytes)
1703 rwbs[i++] = 'R';
1704 else
1705 rwbs[i++] = 'N';
1706
1707 if (rw & 1 << BIO_RW_AHEAD)
1708 rwbs[i++] = 'A';
1709 if (rw & 1 << BIO_RW_BARRIER)
1710 rwbs[i++] = 'B';
1711 if (rw & 1 << BIO_RW_SYNCIO)
1712 rwbs[i++] = 'S';
1713 if (rw & 1 << BIO_RW_META)
1714 rwbs[i++] = 'M';
1715
1716 rwbs[i] = '\0';
1717}
1718
1719void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
1720{
1721 int rw = rq->cmd_flags & 0x03;
1722 int bytes;
1723
1724 if (blk_discard_rq(rq))
1725 rw |= (1 << BIO_RW_DISCARD);
1726
1727 bytes = blk_rq_bytes(rq);
1728
1729 blk_fill_rwbs(rwbs, rw, bytes);
1730}
1731
1732#endif /* CONFIG_EVENT_TRACING */
1733
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 140699a9a8a7..bb60732ade0c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -32,6 +32,7 @@
32#include <trace/events/sched.h> 32#include <trace/events/sched.h>
33 33
34#include <asm/ftrace.h> 34#include <asm/ftrace.h>
35#include <asm/setup.h>
35 36
36#include "trace_output.h" 37#include "trace_output.h"
37#include "trace_stat.h" 38#include "trace_stat.h"
@@ -598,7 +599,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
598 local_irq_save(flags); 599 local_irq_save(flags);
599 600
600 stat = &__get_cpu_var(ftrace_profile_stats); 601 stat = &__get_cpu_var(ftrace_profile_stats);
601 if (!stat->hash) 602 if (!stat->hash || !ftrace_profile_enabled)
602 goto out; 603 goto out;
603 604
604 rec = ftrace_find_profiled_func(stat, ip); 605 rec = ftrace_find_profiled_func(stat, ip);
@@ -629,7 +630,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
629 630
630 local_irq_save(flags); 631 local_irq_save(flags);
631 stat = &__get_cpu_var(ftrace_profile_stats); 632 stat = &__get_cpu_var(ftrace_profile_stats);
632 if (!stat->hash) 633 if (!stat->hash || !ftrace_profile_enabled)
633 goto out; 634 goto out;
634 635
635 calltime = trace->rettime - trace->calltime; 636 calltime = trace->rettime - trace->calltime;
@@ -723,6 +724,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
723 ftrace_profile_enabled = 1; 724 ftrace_profile_enabled = 1;
724 } else { 725 } else {
725 ftrace_profile_enabled = 0; 726 ftrace_profile_enabled = 0;
727 /*
728 * unregister_ftrace_profiler calls stop_machine
729 * so this acts like an synchronize_sched.
730 */
726 unregister_ftrace_profiler(); 731 unregister_ftrace_profiler();
727 } 732 }
728 } 733 }
@@ -2369,6 +2374,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset)
2369 ftrace_set_regex(buf, len, reset, 0); 2374 ftrace_set_regex(buf, len, reset, 0);
2370} 2375}
2371 2376
2377/*
2378 * command line interface to allow users to set filters on boot up.
2379 */
2380#define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE
2381static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
2382static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
2383
2384static int __init set_ftrace_notrace(char *str)
2385{
2386 strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
2387 return 1;
2388}
2389__setup("ftrace_notrace=", set_ftrace_notrace);
2390
2391static int __init set_ftrace_filter(char *str)
2392{
2393 strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
2394 return 1;
2395}
2396__setup("ftrace_filter=", set_ftrace_filter);
2397
2398static void __init set_ftrace_early_filter(char *buf, int enable)
2399{
2400 char *func;
2401
2402 while (buf) {
2403 func = strsep(&buf, ",");
2404 ftrace_set_regex(func, strlen(func), 0, enable);
2405 }
2406}
2407
2408static void __init set_ftrace_early_filters(void)
2409{
2410 if (ftrace_filter_buf[0])
2411 set_ftrace_early_filter(ftrace_filter_buf, 1);
2412 if (ftrace_notrace_buf[0])
2413 set_ftrace_early_filter(ftrace_notrace_buf, 0);
2414}
2415
2372static int 2416static int
2373ftrace_regex_release(struct inode *inode, struct file *file, int enable) 2417ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2374{ 2418{
@@ -2829,6 +2873,8 @@ void __init ftrace_init(void)
2829 if (ret) 2873 if (ret)
2830 pr_warning("Failed to register trace ftrace module notifier\n"); 2874 pr_warning("Failed to register trace ftrace module notifier\n");
2831 2875
2876 set_ftrace_early_filters();
2877
2832 return; 2878 return;
2833 failed: 2879 failed:
2834 ftrace_disabled = 1; 2880 ftrace_disabled = 1;
@@ -3172,12 +3218,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
3172 } 3218 }
3173 3219
3174 if (t->ret_stack == NULL) { 3220 if (t->ret_stack == NULL) {
3175 t->curr_ret_stack = -1;
3176 /* Make sure IRQs see the -1 first: */
3177 barrier();
3178 t->ret_stack = ret_stack_list[start++];
3179 atomic_set(&t->tracing_graph_pause, 0); 3221 atomic_set(&t->tracing_graph_pause, 0);
3180 atomic_set(&t->trace_overrun, 0); 3222 atomic_set(&t->trace_overrun, 0);
3223 t->curr_ret_stack = -1;
3224 /* Make sure the tasks see the -1 first: */
3225 smp_wmb();
3226 t->ret_stack = ret_stack_list[start++];
3181 } 3227 }
3182 } while_each_thread(g, t); 3228 } while_each_thread(g, t);
3183 3229
@@ -3235,8 +3281,10 @@ static int start_graph_tracing(void)
3235 return -ENOMEM; 3281 return -ENOMEM;
3236 3282
3237 /* The cpu_boot init_task->ret_stack will never be freed */ 3283 /* The cpu_boot init_task->ret_stack will never be freed */
3238 for_each_online_cpu(cpu) 3284 for_each_online_cpu(cpu) {
3239 ftrace_graph_init_task(idle_task(cpu)); 3285 if (!idle_task(cpu)->ret_stack)
3286 ftrace_graph_init_task(idle_task(cpu));
3287 }
3240 3288
3241 do { 3289 do {
3242 ret = alloc_retstack_tasklist(ret_stack_list); 3290 ret = alloc_retstack_tasklist(ret_stack_list);
@@ -3328,18 +3376,25 @@ void unregister_ftrace_graph(void)
3328/* Allocate a return stack for newly created task */ 3376/* Allocate a return stack for newly created task */
3329void ftrace_graph_init_task(struct task_struct *t) 3377void ftrace_graph_init_task(struct task_struct *t)
3330{ 3378{
3379 /* Make sure we do not use the parent ret_stack */
3380 t->ret_stack = NULL;
3381
3331 if (ftrace_graph_active) { 3382 if (ftrace_graph_active) {
3332 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH 3383 struct ftrace_ret_stack *ret_stack;
3384
3385 ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
3333 * sizeof(struct ftrace_ret_stack), 3386 * sizeof(struct ftrace_ret_stack),
3334 GFP_KERNEL); 3387 GFP_KERNEL);
3335 if (!t->ret_stack) 3388 if (!ret_stack)
3336 return; 3389 return;
3337 t->curr_ret_stack = -1; 3390 t->curr_ret_stack = -1;
3338 atomic_set(&t->tracing_graph_pause, 0); 3391 atomic_set(&t->tracing_graph_pause, 0);
3339 atomic_set(&t->trace_overrun, 0); 3392 atomic_set(&t->trace_overrun, 0);
3340 t->ftrace_timestamp = 0; 3393 t->ftrace_timestamp = 0;
3341 } else 3394 /* make curr_ret_stack visable before we add the ret_stack */
3342 t->ret_stack = NULL; 3395 smp_wmb();
3396 t->ret_stack = ret_stack;
3397 }
3343} 3398}
3344 3399
3345void ftrace_graph_exit_task(struct task_struct *t) 3400void ftrace_graph_exit_task(struct task_struct *t)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 16b24d49604c..dc4dc70171ce 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -10,6 +10,7 @@
10#include <linux/debugfs.h> 10#include <linux/debugfs.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/hardirq.h> 12#include <linux/hardirq.h>
13#include <linux/kmemcheck.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/percpu.h> 15#include <linux/percpu.h>
15#include <linux/mutex.h> 16#include <linux/mutex.h>
@@ -370,6 +371,9 @@ static inline int test_time_stamp(u64 delta)
370/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 371/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
371#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 372#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
372 373
374/* Max number of timestamps that can fit on a page */
375#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
376
373int ring_buffer_print_page_header(struct trace_seq *s) 377int ring_buffer_print_page_header(struct trace_seq *s)
374{ 378{
375 struct buffer_data_page field; 379 struct buffer_data_page field;
@@ -423,6 +427,8 @@ struct ring_buffer {
423 atomic_t record_disabled; 427 atomic_t record_disabled;
424 cpumask_var_t cpumask; 428 cpumask_var_t cpumask;
425 429
430 struct lock_class_key *reader_lock_key;
431
426 struct mutex mutex; 432 struct mutex mutex;
427 433
428 struct ring_buffer_per_cpu **buffers; 434 struct ring_buffer_per_cpu **buffers;
@@ -562,6 +568,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
562 cpu_buffer->cpu = cpu; 568 cpu_buffer->cpu = cpu;
563 cpu_buffer->buffer = buffer; 569 cpu_buffer->buffer = buffer;
564 spin_lock_init(&cpu_buffer->reader_lock); 570 spin_lock_init(&cpu_buffer->reader_lock);
571 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
565 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 572 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
566 INIT_LIST_HEAD(&cpu_buffer->pages); 573 INIT_LIST_HEAD(&cpu_buffer->pages);
567 574
@@ -632,7 +639,8 @@ static int rb_cpu_notify(struct notifier_block *self,
632 * when the buffer wraps. If this flag is not set, the buffer will 639 * when the buffer wraps. If this flag is not set, the buffer will
633 * drop data when the tail hits the head. 640 * drop data when the tail hits the head.
634 */ 641 */
635struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) 642struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
643 struct lock_class_key *key)
636{ 644{
637 struct ring_buffer *buffer; 645 struct ring_buffer *buffer;
638 int bsize; 646 int bsize;
@@ -655,6 +663,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
655 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 663 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
656 buffer->flags = flags; 664 buffer->flags = flags;
657 buffer->clock = trace_clock_local; 665 buffer->clock = trace_clock_local;
666 buffer->reader_lock_key = key;
658 667
659 /* need at least two pages */ 668 /* need at least two pages */
660 if (buffer->pages == 1) 669 if (buffer->pages == 1)
@@ -712,7 +721,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
712 kfree(buffer); 721 kfree(buffer);
713 return NULL; 722 return NULL;
714} 723}
715EXPORT_SYMBOL_GPL(ring_buffer_alloc); 724EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
716 725
717/** 726/**
718 * ring_buffer_free - free a ring buffer. 727 * ring_buffer_free - free a ring buffer.
@@ -1262,6 +1271,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1262 if (tail < BUF_PAGE_SIZE) { 1271 if (tail < BUF_PAGE_SIZE) {
1263 /* Mark the rest of the page with padding */ 1272 /* Mark the rest of the page with padding */
1264 event = __rb_page_index(tail_page, tail); 1273 event = __rb_page_index(tail_page, tail);
1274 kmemcheck_annotate_bitfield(event, bitfield);
1265 rb_event_set_padding(event); 1275 rb_event_set_padding(event);
1266 } 1276 }
1267 1277
@@ -1319,6 +1329,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1319 return NULL; 1329 return NULL;
1320 1330
1321 event = __rb_page_index(tail_page, tail); 1331 event = __rb_page_index(tail_page, tail);
1332 kmemcheck_annotate_bitfield(event, bitfield);
1322 rb_update_event(event, type, length); 1333 rb_update_event(event, type, length);
1323 1334
1324 /* The passed in type is zero for DATA */ 1335 /* The passed in type is zero for DATA */
@@ -1335,6 +1346,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1335 return event; 1346 return event;
1336} 1347}
1337 1348
1349static inline int
1350rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1351 struct ring_buffer_event *event)
1352{
1353 unsigned long new_index, old_index;
1354 struct buffer_page *bpage;
1355 unsigned long index;
1356 unsigned long addr;
1357
1358 new_index = rb_event_index(event);
1359 old_index = new_index + rb_event_length(event);
1360 addr = (unsigned long)event;
1361 addr &= PAGE_MASK;
1362
1363 bpage = cpu_buffer->tail_page;
1364
1365 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
1366 /*
1367 * This is on the tail page. It is possible that
1368 * a write could come in and move the tail page
1369 * and write to the next page. That is fine
1370 * because we just shorten what is on this page.
1371 */
1372 index = local_cmpxchg(&bpage->write, old_index, new_index);
1373 if (index == old_index)
1374 return 1;
1375 }
1376
1377 /* could not discard */
1378 return 0;
1379}
1380
1338static int 1381static int
1339rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, 1382rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1340 u64 *ts, u64 *delta) 1383 u64 *ts, u64 *delta)
@@ -1377,16 +1420,23 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1377 event->array[0] = *delta >> TS_SHIFT; 1420 event->array[0] = *delta >> TS_SHIFT;
1378 } else { 1421 } else {
1379 cpu_buffer->commit_page->page->time_stamp = *ts; 1422 cpu_buffer->commit_page->page->time_stamp = *ts;
1380 event->time_delta = 0; 1423 /* try to discard, since we do not need this */
1381 event->array[0] = 0; 1424 if (!rb_try_to_discard(cpu_buffer, event)) {
1425 /* nope, just zero it */
1426 event->time_delta = 0;
1427 event->array[0] = 0;
1428 }
1382 } 1429 }
1383 cpu_buffer->write_stamp = *ts; 1430 cpu_buffer->write_stamp = *ts;
1384 /* let the caller know this was the commit */ 1431 /* let the caller know this was the commit */
1385 ret = 1; 1432 ret = 1;
1386 } else { 1433 } else {
1387 /* Darn, this is just wasted space */ 1434 /* Try to discard the event */
1388 event->time_delta = 0; 1435 if (!rb_try_to_discard(cpu_buffer, event)) {
1389 event->array[0] = 0; 1436 /* Darn, this is just wasted space */
1437 event->time_delta = 0;
1438 event->array[0] = 0;
1439 }
1390 ret = 0; 1440 ret = 0;
1391 } 1441 }
1392 1442
@@ -1682,10 +1732,6 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1682 struct ring_buffer_event *event) 1732 struct ring_buffer_event *event)
1683{ 1733{
1684 struct ring_buffer_per_cpu *cpu_buffer; 1734 struct ring_buffer_per_cpu *cpu_buffer;
1685 unsigned long new_index, old_index;
1686 struct buffer_page *bpage;
1687 unsigned long index;
1688 unsigned long addr;
1689 int cpu; 1735 int cpu;
1690 1736
1691 /* The event is discarded regardless */ 1737 /* The event is discarded regardless */
@@ -1701,24 +1747,8 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1701 cpu = smp_processor_id(); 1747 cpu = smp_processor_id();
1702 cpu_buffer = buffer->buffers[cpu]; 1748 cpu_buffer = buffer->buffers[cpu];
1703 1749
1704 new_index = rb_event_index(event); 1750 if (!rb_try_to_discard(cpu_buffer, event))
1705 old_index = new_index + rb_event_length(event); 1751 goto out;
1706 addr = (unsigned long)event;
1707 addr &= PAGE_MASK;
1708
1709 bpage = cpu_buffer->tail_page;
1710
1711 if (bpage == (void *)addr && rb_page_write(bpage) == old_index) {
1712 /*
1713 * This is on the tail page. It is possible that
1714 * a write could come in and move the tail page
1715 * and write to the next page. That is fine
1716 * because we just shorten what is on this page.
1717 */
1718 index = local_cmpxchg(&bpage->write, old_index, new_index);
1719 if (index == old_index)
1720 goto out;
1721 }
1722 1752
1723 /* 1753 /*
1724 * The commit is still visible by the reader, so we 1754 * The commit is still visible by the reader, so we
@@ -2253,8 +2283,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
2253 * Check if we are at the end of the buffer. 2283 * Check if we are at the end of the buffer.
2254 */ 2284 */
2255 if (iter->head >= rb_page_size(iter->head_page)) { 2285 if (iter->head >= rb_page_size(iter->head_page)) {
2256 if (RB_WARN_ON(buffer, 2286 /* discarded commits can make the page empty */
2257 iter->head_page == cpu_buffer->commit_page)) 2287 if (iter->head_page == cpu_buffer->commit_page)
2258 return; 2288 return;
2259 rb_inc_iter(iter); 2289 rb_inc_iter(iter);
2260 return; 2290 return;
@@ -2297,12 +2327,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2297 /* 2327 /*
2298 * We repeat when a timestamp is encountered. It is possible 2328 * We repeat when a timestamp is encountered. It is possible
2299 * to get multiple timestamps from an interrupt entering just 2329 * to get multiple timestamps from an interrupt entering just
2300 * as one timestamp is about to be written. The max times 2330 * as one timestamp is about to be written, or from discarded
2301 * that this can happen is the number of nested interrupts we 2331 * commits. The most that we can have is the number on a single page.
2302 * can have. Nesting 10 deep of interrupts is clearly
2303 * an anomaly.
2304 */ 2332 */
2305 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) 2333 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
2306 return NULL; 2334 return NULL;
2307 2335
2308 reader = rb_get_reader_page(cpu_buffer); 2336 reader = rb_get_reader_page(cpu_buffer);
@@ -2368,14 +2396,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2368 2396
2369 again: 2397 again:
2370 /* 2398 /*
2371 * We repeat when a timestamp is encountered. It is possible 2399 * We repeat when a timestamp is encountered.
2372 * to get multiple timestamps from an interrupt entering just 2400 * We can get multiple timestamps by nested interrupts or also
2373 * as one timestamp is about to be written. The max times 2401 * if filtering is on (discarding commits). Since discarding
2374 * that this can happen is the number of nested interrupts we 2402 * commits can be frequent we can get a lot of timestamps.
2375 * can have. Nesting 10 deep of interrupts is clearly 2403 * But we limit them by not adding timestamps if they begin
2376 * an anomaly. 2404 * at the start of a page.
2377 */ 2405 */
2378 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) 2406 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
2379 return NULL; 2407 return NULL;
2380 2408
2381 if (rb_per_cpu_empty(cpu_buffer)) 2409 if (rb_per_cpu_empty(cpu_buffer))
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a3a8a87d7e91..c1878bfb2e1e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock =
344/* 344/*
345 * Copy the new maximum trace into the separate maximum-trace 345 * Copy the new maximum trace into the separate maximum-trace
346 * structure. (this way the maximum trace is permanently saved, 346 * structure. (this way the maximum trace is permanently saved,
347 * for later retrieval via /debugfs/tracing/latency_trace) 347 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
348 */ 348 */
349static void 349static void
350__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 350__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
@@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = {
2414 2414
2415static const char readme_msg[] = 2415static const char readme_msg[] =
2416 "tracing mini-HOWTO:\n\n" 2416 "tracing mini-HOWTO:\n\n"
2417 "# mkdir /debug\n" 2417 "# mount -t debugfs nodev /sys/kernel/debug\n\n"
2418 "# mount -t debugfs nodev /debug\n\n" 2418 "# cat /sys/kernel/debug/tracing/available_tracers\n"
2419 "# cat /debug/tracing/available_tracers\n"
2420 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" 2419 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2421 "# cat /debug/tracing/current_tracer\n" 2420 "# cat /sys/kernel/debug/tracing/current_tracer\n"
2422 "nop\n" 2421 "nop\n"
2423 "# echo sched_switch > /debug/tracing/current_tracer\n" 2422 "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
2424 "# cat /debug/tracing/current_tracer\n" 2423 "# cat /sys/kernel/debug/tracing/current_tracer\n"
2425 "sched_switch\n" 2424 "sched_switch\n"
2426 "# cat /debug/tracing/trace_options\n" 2425 "# cat /sys/kernel/debug/tracing/trace_options\n"
2427 "noprint-parent nosym-offset nosym-addr noverbose\n" 2426 "noprint-parent nosym-offset nosym-addr noverbose\n"
2428 "# echo print-parent > /debug/tracing/trace_options\n" 2427 "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
2429 "# echo 1 > /debug/tracing/tracing_enabled\n" 2428 "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
2430 "# cat /debug/tracing/trace > /tmp/trace.txt\n" 2429 "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
2431 "echo 0 > /debug/tracing/tracing_enabled\n" 2430 "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
2432; 2431;
2433 2432
2434static ssize_t 2433static ssize_t
@@ -2826,6 +2825,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2826 /* trace pipe does not show start of buffer */ 2825 /* trace pipe does not show start of buffer */
2827 cpumask_setall(iter->started); 2826 cpumask_setall(iter->started);
2828 2827
2828 if (trace_flags & TRACE_ITER_LATENCY_FMT)
2829 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2830
2829 iter->cpu_file = cpu_file; 2831 iter->cpu_file = cpu_file;
2830 iter->tr = &global_trace; 2832 iter->tr = &global_trace;
2831 mutex_init(&iter->mutex); 2833 mutex_init(&iter->mutex);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6c81f9c21426..aa08be69a1b6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1050,12 +1050,13 @@ static void trace_module_remove_events(struct module *mod)
1050 struct ftrace_event_call *call, *p; 1050 struct ftrace_event_call *call, *p;
1051 bool found = false; 1051 bool found = false;
1052 1052
1053 down_write(&trace_event_mutex);
1053 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1054 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1054 if (call->mod == mod) { 1055 if (call->mod == mod) {
1055 found = true; 1056 found = true;
1056 ftrace_event_enable_disable(call, 0); 1057 ftrace_event_enable_disable(call, 0);
1057 if (call->event) 1058 if (call->event)
1058 unregister_ftrace_event(call->event); 1059 __unregister_ftrace_event(call->event);
1059 debugfs_remove_recursive(call->dir); 1060 debugfs_remove_recursive(call->dir);
1060 list_del(&call->list); 1061 list_del(&call->list);
1061 trace_destroy_fields(call); 1062 trace_destroy_fields(call);
@@ -1079,6 +1080,7 @@ static void trace_module_remove_events(struct module *mod)
1079 */ 1080 */
1080 if (found) 1081 if (found)
1081 tracing_reset_current_online_cpus(); 1082 tracing_reset_current_online_cpus();
1083 up_write(&trace_event_mutex);
1082} 1084}
1083 1085
1084static int trace_module_notify(struct notifier_block *self, 1086static int trace_module_notify(struct notifier_block *self,
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index a7430b16d243..db6e54bdb596 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -478,12 +478,12 @@ enum {
478 478
479static int is_string_field(const char *type) 479static int is_string_field(const char *type)
480{ 480{
481 if (strstr(type, "__data_loc") && strstr(type, "char"))
482 return FILTER_DYN_STRING;
483
481 if (strchr(type, '[') && strstr(type, "char")) 484 if (strchr(type, '[') && strstr(type, "char"))
482 return FILTER_STATIC_STRING; 485 return FILTER_STATIC_STRING;
483 486
484 if (!strcmp(type, "__str_loc"))
485 return FILTER_DYN_STRING;
486
487 return 0; 487 return 0;
488} 488}
489 489
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 10f6ad7d85f6..8b592418d8b2 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
65 if (!current->ret_stack) 65 if (!current->ret_stack)
66 return -EBUSY; 66 return -EBUSY;
67 67
68 /*
69 * We must make sure the ret_stack is tested before we read
70 * anything else.
71 */
72 smp_rmb();
73
68 /* The return trace stack is full */ 74 /* The return trace stack is full */
69 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { 75 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
70 atomic_inc(&current->trace_overrun); 76 atomic_inc(&current->trace_overrun);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index c12d95db2f56..7938f3ae93e3 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -14,9 +14,10 @@
14/* must be a power of 2 */ 14/* must be a power of 2 */
15#define EVENT_HASHSIZE 128 15#define EVENT_HASHSIZE 128
16 16
17static DECLARE_RWSEM(trace_event_mutex); 17DECLARE_RWSEM(trace_event_mutex);
18 18
19DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq); 19DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
20EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
20 21
21static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; 22static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
22 23
@@ -99,6 +100,38 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
99} 100}
100EXPORT_SYMBOL_GPL(trace_seq_printf); 101EXPORT_SYMBOL_GPL(trace_seq_printf);
101 102
103/**
104 * trace_seq_vprintf - sequence printing of trace information
105 * @s: trace sequence descriptor
106 * @fmt: printf format string
107 *
108 * The tracer may use either sequence operations or its own
109 * copy to user routines. To simplify formating of a trace
110 * trace_seq_printf is used to store strings into a special
111 * buffer (@s). Then the output may be either used by
112 * the sequencer or pulled into another buffer.
113 */
114int
115trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
116{
117 int len = (PAGE_SIZE - 1) - s->len;
118 int ret;
119
120 if (!len)
121 return 0;
122
123 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
124
125 /* If we can't write it all, don't bother writing anything */
126 if (ret >= len)
127 return 0;
128
129 s->len += ret;
130
131 return len;
132}
133EXPORT_SYMBOL_GPL(trace_seq_vprintf);
134
102int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) 135int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
103{ 136{
104 int len = (PAGE_SIZE - 1) - s->len; 137 int len = (PAGE_SIZE - 1) - s->len;
@@ -222,10 +255,9 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
222{ 255{
223 unsigned long mask; 256 unsigned long mask;
224 const char *str; 257 const char *str;
258 const char *ret = p->buffer + p->len;
225 int i; 259 int i;
226 260
227 trace_seq_init(p);
228
229 for (i = 0; flag_array[i].name && flags; i++) { 261 for (i = 0; flag_array[i].name && flags; i++) {
230 262
231 mask = flag_array[i].mask; 263 mask = flag_array[i].mask;
@@ -248,16 +280,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
248 280
249 trace_seq_putc(p, 0); 281 trace_seq_putc(p, 0);
250 282
251 return p->buffer; 283 return ret;
252} 284}
285EXPORT_SYMBOL(ftrace_print_flags_seq);
253 286
254const char * 287const char *
255ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, 288ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
256 const struct trace_print_flags *symbol_array) 289 const struct trace_print_flags *symbol_array)
257{ 290{
258 int i; 291 int i;
259 292 const char *ret = p->buffer + p->len;
260 trace_seq_init(p);
261 293
262 for (i = 0; symbol_array[i].name; i++) { 294 for (i = 0; symbol_array[i].name; i++) {
263 295
@@ -273,8 +305,9 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
273 305
274 trace_seq_putc(p, 0); 306 trace_seq_putc(p, 0);
275 307
276 return p->buffer; 308 return ret;
277} 309}
310EXPORT_SYMBOL(ftrace_print_symbols_seq);
278 311
279#ifdef CONFIG_KRETPROBES 312#ifdef CONFIG_KRETPROBES
280static inline const char *kretprobed(const char *name) 313static inline const char *kretprobed(const char *name)
@@ -386,17 +419,20 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
386 419
387 if (ip == ULONG_MAX || !ret) 420 if (ip == ULONG_MAX || !ret)
388 break; 421 break;
389 if (i && ret) 422 if (ret)
390 ret = trace_seq_puts(s, " <- "); 423 ret = trace_seq_puts(s, " => ");
391 if (!ip) { 424 if (!ip) {
392 if (ret) 425 if (ret)
393 ret = trace_seq_puts(s, "??"); 426 ret = trace_seq_puts(s, "??");
427 if (ret)
428 ret = trace_seq_puts(s, "\n");
394 continue; 429 continue;
395 } 430 }
396 if (!ret) 431 if (!ret)
397 break; 432 break;
398 if (ret) 433 if (ret)
399 ret = seq_print_user_ip(s, mm, ip, sym_flags); 434 ret = seq_print_user_ip(s, mm, ip, sym_flags);
435 ret = trace_seq_puts(s, "\n");
400 } 436 }
401 437
402 if (mm) 438 if (mm)
@@ -666,6 +702,16 @@ int register_ftrace_event(struct trace_event *event)
666} 702}
667EXPORT_SYMBOL_GPL(register_ftrace_event); 703EXPORT_SYMBOL_GPL(register_ftrace_event);
668 704
705/*
706 * Used by module code with the trace_event_mutex held for write.
707 */
708int __unregister_ftrace_event(struct trace_event *event)
709{
710 hlist_del(&event->node);
711 list_del(&event->list);
712 return 0;
713}
714
669/** 715/**
670 * unregister_ftrace_event - remove a no longer used event 716 * unregister_ftrace_event - remove a no longer used event
671 * @event: the event to remove 717 * @event: the event to remove
@@ -673,8 +719,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_event);
673int unregister_ftrace_event(struct trace_event *event) 719int unregister_ftrace_event(struct trace_event *event)
674{ 720{
675 down_write(&trace_event_mutex); 721 down_write(&trace_event_mutex);
676 hlist_del(&event->node); 722 __unregister_ftrace_event(event);
677 list_del(&event->list);
678 up_write(&trace_event_mutex); 723 up_write(&trace_event_mutex);
679 724
680 return 0; 725 return 0;
@@ -972,16 +1017,16 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
972 1017
973 trace_assign_type(field, iter->ent); 1018 trace_assign_type(field, iter->ent);
974 1019
1020 if (!trace_seq_puts(s, "<stack trace>\n"))
1021 goto partial;
975 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1022 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
976 if (!field->caller[i]) 1023 if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
977 break; 1024 break;
978 if (i) { 1025 if (!trace_seq_puts(s, " => "))
979 if (!trace_seq_puts(s, " <= ")) 1026 goto partial;
980 goto partial;
981 1027
982 if (!seq_print_ip_sym(s, field->caller[i], flags)) 1028 if (!seq_print_ip_sym(s, field->caller[i], flags))
983 goto partial; 1029 goto partial;
984 }
985 if (!trace_seq_puts(s, "\n")) 1030 if (!trace_seq_puts(s, "\n"))
986 goto partial; 1031 goto partial;
987 } 1032 }
@@ -1009,10 +1054,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1009 1054
1010 trace_assign_type(field, iter->ent); 1055 trace_assign_type(field, iter->ent);
1011 1056
1012 if (!seq_print_userip_objs(field, s, flags)) 1057 if (!trace_seq_puts(s, "<user stack trace>\n"))
1013 goto partial; 1058 goto partial;
1014 1059
1015 if (!trace_seq_putc(s, '\n')) 1060 if (!seq_print_userip_objs(field, s, flags))
1016 goto partial; 1061 goto partial;
1017 1062
1018 return TRACE_TYPE_HANDLED; 1063 return TRACE_TYPE_HANDLED;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index ac240e76eb01..d38bec4a9c30 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -27,6 +27,10 @@ extern struct trace_event *ftrace_find_event(int type);
27extern enum print_line_t trace_nop_print(struct trace_iterator *iter, 27extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
28 int flags); 28 int flags);
29 29
30/* used by module unregistering */
31extern int __unregister_ftrace_event(struct trace_event *event);
32extern struct rw_semaphore trace_event_mutex;
33
30#define MAX_MEMHEX_BYTES 8 34#define MAX_MEMHEX_BYTES 8
31#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) 35#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
32 36
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 1796f00524e1..2d7aebd71dbd 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -265,7 +265,7 @@ static int t_show(struct seq_file *m, void *v)
265 seq_printf(m, " Depth Size Location" 265 seq_printf(m, " Depth Size Location"
266 " (%d entries)\n" 266 " (%d entries)\n"
267 " ----- ---- --------\n", 267 " ----- ---- --------\n",
268 max_stack_trace.nr_entries); 268 max_stack_trace.nr_entries - 1);
269 269
270 if (!stack_tracer_enabled && !max_stack_size) 270 if (!stack_tracer_enabled && !max_stack_size)
271 print_disabled(m); 271 print_disabled(m);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index e04b76cc238a..f6693969287d 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused)
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn; 204 hrtimer->function = stack_trace_timer_fn;
205 205
206 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); 206 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
207 HRTIMER_MODE_REL_PINNED);
207} 208}
208 209
209static void start_stack_timers(void) 210static void start_stack_timers(void)