aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
commitada47b5fe13d89735805b566185f4885f5a3f750 (patch)
tree644b88f8a71896307d71438e9b3af49126ffb22b /kernel/trace
parent43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff)
parent3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff)
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig147
-rw-r--r--kernel/trace/Makefile6
-rw-r--r--kernel/trace/blktrace.c6
-rw-r--r--kernel/trace/ftrace.c533
-rw-r--r--kernel/trace/power-traces.c3
-rw-r--r--kernel/trace/ring_buffer.c137
-rw-r--r--kernel/trace/ring_buffer_benchmark.c86
-rw-r--r--kernel/trace/trace.c514
-rw-r--r--kernel/trace/trace.h118
-rw-r--r--kernel/trace/trace_branch.c19
-rw-r--r--kernel/trace/trace_clock.c21
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_event_perf.c175
-rw-r--r--kernel/trace/trace_event_profile.c125
-rw-r--r--kernel/trace/trace_events.c288
-rw-r--r--kernel/trace/trace_events_filter.c439
-rw-r--r--kernel/trace/trace_export.c113
-rw-r--r--kernel/trace/trace_functions_graph.c267
-rw-r--r--kernel/trace/trace_hw_branches.c51
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c1488
-rw-r--r--kernel/trace/trace_ksym.c520
-rw-r--r--kernel/trace/trace_mmiotrace.c1
-rw-r--r--kernel/trace/trace_output.c75
-rw-r--r--kernel/trace/trace_sched_wakeup.c16
-rw-r--r--kernel/trace/trace_selftest.c60
-rw-r--r--kernel/trace/trace_stack.c40
-rw-r--r--kernel/trace/trace_stat.c1
-rw-r--r--kernel/trace/trace_syscalls.c397
-rw-r--r--kernel/trace/trace_sysprof.c1
-rw-r--r--kernel/trace/trace_workqueue.c1
31 files changed, 4305 insertions, 1361 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..13e13d428cd3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,39 +12,37 @@ config NOP_TRACER
12config HAVE_FTRACE_NMI_ENTER 12config HAVE_FTRACE_NMI_ENTER
13 bool 13 bool
14 help 14 help
15 See Documentation/trace/ftrace-implementation.txt 15 See Documentation/trace/ftrace-design.txt
16 16
17config HAVE_FUNCTION_TRACER 17config HAVE_FUNCTION_TRACER
18 bool 18 bool
19 help 19 help
20 See Documentation/trace/ftrace-implementation.txt 20 See Documentation/trace/ftrace-design.txt
21 21
22config HAVE_FUNCTION_GRAPH_TRACER 22config HAVE_FUNCTION_GRAPH_TRACER
23 bool 23 bool
24 help 24 help
25 See Documentation/trace/ftrace-implementation.txt 25 See Documentation/trace/ftrace-design.txt
26 26
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
36 help 34 help
37 See Documentation/trace/ftrace-implementation.txt 35 See Documentation/trace/ftrace-design.txt
38 36
39config HAVE_DYNAMIC_FTRACE 37config HAVE_DYNAMIC_FTRACE
40 bool 38 bool
41 help 39 help
42 See Documentation/trace/ftrace-implementation.txt 40 See Documentation/trace/ftrace-design.txt
43 41
44config HAVE_FTRACE_MCOUNT_RECORD 42config HAVE_FTRACE_MCOUNT_RECORD
45 bool 43 bool
46 help 44 help
47 See Documentation/trace/ftrace-implementation.txt 45 See Documentation/trace/ftrace-design.txt
48 46
49config HAVE_HW_BRANCH_TRACER 47config HAVE_HW_BRANCH_TRACER
50 bool 48 bool
@@ -52,7 +50,7 @@ config HAVE_HW_BRANCH_TRACER
52config HAVE_SYSCALL_TRACEPOINTS 50config HAVE_SYSCALL_TRACEPOINTS
53 bool 51 bool
54 help 52 help
55 See Documentation/trace/ftrace-implementation.txt 53 See Documentation/trace/ftrace-design.txt
56 54
57config TRACER_MAX_TRACE 55config TRACER_MAX_TRACE
58 bool 56 bool
@@ -83,7 +81,7 @@ config RING_BUFFER_ALLOW_SWAP
83# This allows those options to appear when no other tracer is selected. But the 81# This allows those options to appear when no other tracer is selected. But the
84# options do not appear when something else selects it. We need the two options 82# options do not appear when something else selects it. We need the two options
85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 83# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
86# hidding of the automatic options. 84# hiding of the automatic options.
87 85
88config TRACING 86config TRACING
89 bool 87 bool
@@ -119,7 +117,7 @@ menuconfig FTRACE
119 bool "Tracers" 117 bool "Tracers"
120 default y if DEBUG_KERNEL 118 default y if DEBUG_KERNEL
121 help 119 help
122 Enable the kernel tracing infrastructure. 120 Enable the kernel tracing infrastructure.
123 121
124if FTRACE 122if FTRACE
125 123
@@ -133,7 +131,7 @@ config FUNCTION_TRACER
133 help 131 help
134 Enable the kernel to trace every kernel function. This is done 132 Enable the kernel to trace every kernel function. This is done
135 by using a compiler feature to insert a small, 5-byte No-Operation 133 by using a compiler feature to insert a small, 5-byte No-Operation
136 instruction to the beginning of every kernel function, which NOP 134 instruction at the beginning of every kernel function, which NOP
137 sequence is then dynamically patched into a tracer call when 135 sequence is then dynamically patched into a tracer call when
138 tracing is enabled by the administrator. If it's runtime disabled 136 tracing is enabled by the administrator. If it's runtime disabled
139 (the bootup default), then the overhead of the instructions is very 137 (the bootup default), then the overhead of the instructions is very
@@ -150,7 +148,7 @@ config FUNCTION_GRAPH_TRACER
150 and its entry. 148 and its entry.
151 Its first purpose is to trace the duration of functions and 149 Its first purpose is to trace the duration of functions and
152 draw a call graph for each thread with some information like 150 draw a call graph for each thread with some information like
153 the return value. This is done by setting the current return 151 the return value. This is done by setting the current return
154 address on the current task structure into a stack of calls. 152 address on the current task structure into a stack of calls.
155 153
156 154
@@ -173,7 +171,7 @@ config IRQSOFF_TRACER
173 171
174 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 172 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
175 173
176 (Note that kernel size and overhead increases with this option 174 (Note that kernel size and overhead increase with this option
177 enabled. This option and the preempt-off timing option can be 175 enabled. This option and the preempt-off timing option can be
178 used together or separately.) 176 used together or separately.)
179 177
@@ -186,7 +184,7 @@ config PREEMPT_TRACER
186 select TRACER_MAX_TRACE 184 select TRACER_MAX_TRACE
187 select RING_BUFFER_ALLOW_SWAP 185 select RING_BUFFER_ALLOW_SWAP
188 help 186 help
189 This option measures the time spent in preemption off critical 187 This option measures the time spent in preemption-off critical
190 sections, with microsecond accuracy. 188 sections, with microsecond accuracy.
191 189
192 The default measurement method is a maximum search, which is 190 The default measurement method is a maximum search, which is
@@ -195,7 +193,7 @@ config PREEMPT_TRACER
195 193
196 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 194 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
197 195
198 (Note that kernel size and overhead increases with this option 196 (Note that kernel size and overhead increase with this option
199 enabled. This option and the irqs-off timing option can be 197 enabled. This option and the irqs-off timing option can be
200 used together or separately.) 198 used together or separately.)
201 199
@@ -222,7 +220,7 @@ config ENABLE_DEFAULT_TRACERS
222 depends on !GENERIC_TRACER 220 depends on !GENERIC_TRACER
223 select TRACING 221 select TRACING
224 help 222 help
225 This tracer hooks to various trace points in the kernel 223 This tracer hooks to various trace points in the kernel,
226 allowing the user to pick and choose which trace point they 224 allowing the user to pick and choose which trace point they
227 want to trace. It also includes the sched_switch tracer plugin. 225 want to trace. It also includes the sched_switch tracer plugin.
228 226
@@ -265,19 +263,19 @@ choice
265 The likely/unlikely profiler only looks at the conditions that 263 The likely/unlikely profiler only looks at the conditions that
266 are annotated with a likely or unlikely macro. 264 are annotated with a likely or unlikely macro.
267 265
268 The "all branch" profiler will profile every if statement in the 266 The "all branch" profiler will profile every if-statement in the
269 kernel. This profiler will also enable the likely/unlikely 267 kernel. This profiler will also enable the likely/unlikely
270 profiler as well. 268 profiler.
271 269
272 Either of the above profilers add a bit of overhead to the system. 270 Either of the above profilers adds a bit of overhead to the system.
273 If unsure choose "No branch profiling". 271 If unsure, choose "No branch profiling".
274 272
275config BRANCH_PROFILE_NONE 273config BRANCH_PROFILE_NONE
276 bool "No branch profiling" 274 bool "No branch profiling"
277 help 275 help
278 No branch profiling. Branch profiling adds a bit of overhead. 276 No branch profiling. Branch profiling adds a bit of overhead.
279 Only enable it if you want to analyse the branching behavior. 277 Only enable it if you want to analyse the branching behavior.
280 Otherwise keep it disabled. 278 Otherwise keep it disabled.
281 279
282config PROFILE_ANNOTATED_BRANCHES 280config PROFILE_ANNOTATED_BRANCHES
283 bool "Trace likely/unlikely profiler" 281 bool "Trace likely/unlikely profiler"
@@ -288,7 +286,7 @@ config PROFILE_ANNOTATED_BRANCHES
288 286
289 /sys/kernel/debug/tracing/profile_annotated_branch 287 /sys/kernel/debug/tracing/profile_annotated_branch
290 288
291 Note: this will add a significant overhead, only turn this 289 Note: this will add a significant overhead; only turn this
292 on if you need to profile the system's use of these macros. 290 on if you need to profile the system's use of these macros.
293 291
294config PROFILE_ALL_BRANCHES 292config PROFILE_ALL_BRANCHES
@@ -305,7 +303,7 @@ config PROFILE_ALL_BRANCHES
305 303
306 This configuration, when enabled, will impose a great overhead 304 This configuration, when enabled, will impose a great overhead
307 on the system. This should only be enabled when the system 305 on the system. This should only be enabled when the system
308 is to be analyzed 306 is to be analyzed in much detail.
309endchoice 307endchoice
310 308
311config TRACING_BRANCHES 309config TRACING_BRANCHES
@@ -330,15 +328,27 @@ config BRANCH_TRACER
330 328
331 Say N if unsure. 329 Say N if unsure.
332 330
333config POWER_TRACER 331config KSYM_TRACER
334 bool "Trace power consumption behavior" 332 bool "Trace read and write access on kernel memory locations"
335 depends on X86 333 depends on HAVE_HW_BREAKPOINT
336 select GENERIC_TRACER 334 select TRACING
335 help
336 This tracer helps find read and write operations on any given kernel
337 symbol i.e. /proc/kallsyms.
338
339config PROFILE_KSYM_TRACER
340 bool "Profile all kernel memory accesses on 'watched' variables"
341 depends on KSYM_TRACER
337 help 342 help
338 This tracer helps developers to analyze and optimize the kernels 343 This tracer profiles kernel accesses on variables watched through the
339 power management decisions, specifically the C-state and P-state 344 ksym tracer ftrace plugin. Depending upon the hardware, all read
340 behavior. 345 and write operations on kernel variables can be monitored for
346 accesses.
347
348 The results will be displayed in:
349 /debugfs/tracing/profile_ksym
341 350
351 Say N if unsure.
342 352
343config STACK_TRACER 353config STACK_TRACER
344 bool "Trace max stack" 354 bool "Trace max stack"
@@ -370,14 +380,14 @@ config HW_BRANCH_TRACER
370 select GENERIC_TRACER 380 select GENERIC_TRACER
371 help 381 help
372 This tracer records all branches on the system in a circular 382 This tracer records all branches on the system in a circular
373 buffer giving access to the last N branches for each cpu. 383 buffer, giving access to the last N branches for each cpu.
374 384
375config KMEMTRACE 385config KMEMTRACE
376 bool "Trace SLAB allocations" 386 bool "Trace SLAB allocations"
377 select GENERIC_TRACER 387 select GENERIC_TRACER
378 help 388 help
379 kmemtrace provides tracing for slab allocator functions, such as 389 kmemtrace provides tracing for slab allocator functions, such as
380 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected 390 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
381 data is then fed to the userspace application in order to analyse 391 data is then fed to the userspace application in order to analyse
382 allocation hotspots, internal fragmentation and so on, making it 392 allocation hotspots, internal fragmentation and so on, making it
383 possible to see how well an allocator performs, as well as debug 393 possible to see how well an allocator performs, as well as debug
@@ -396,15 +406,15 @@ config WORKQUEUE_TRACER
396 bool "Trace workqueues" 406 bool "Trace workqueues"
397 select GENERIC_TRACER 407 select GENERIC_TRACER
398 help 408 help
399 The workqueue tracer provides some statistical informations 409 The workqueue tracer provides some statistical information
400 about each cpu workqueue thread such as the number of the 410 about each cpu workqueue thread such as the number of the
401 works inserted and executed since their creation. It can help 411 works inserted and executed since their creation. It can help
402 to evaluate the amount of work each of them have to perform. 412 to evaluate the amount of work each of them has to perform.
403 For example it can help a developer to decide whether he should 413 For example it can help a developer to decide whether he should
404 choose a per cpu workqueue instead of a singlethreaded one. 414 choose a per-cpu workqueue instead of a singlethreaded one.
405 415
406config BLK_DEV_IO_TRACE 416config BLK_DEV_IO_TRACE
407 bool "Support for tracing block io actions" 417 bool "Support for tracing block IO actions"
408 depends on SYSFS 418 depends on SYSFS
409 depends on BLOCK 419 depends on BLOCK
410 select RELAY 420 select RELAY
@@ -428,38 +438,55 @@ config BLK_DEV_IO_TRACE
428 438
429 If unsure, say N. 439 If unsure, say N.
430 440
441config KPROBE_EVENT
442 depends on KPROBES
443 depends on HAVE_REGS_AND_STACK_ACCESS_API
444 bool "Enable kprobes-based dynamic events"
445 select TRACING
446 default y
447 help
448 This allows the user to add tracing events (similar to tracepoints)
449 on the fly via the ftrace interface. See
450 Documentation/trace/kprobetrace.txt for more details.
451
452 Those events can be inserted wherever kprobes can probe, and record
453 various register and memory values.
454
455 This option is also required by perf-probe subcommand of perf tools.
456 If you want to use perf tools, this option is strongly recommended.
457
431config DYNAMIC_FTRACE 458config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 459 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 460 depends on FUNCTION_TRACER
434 depends on HAVE_DYNAMIC_FTRACE 461 depends on HAVE_DYNAMIC_FTRACE
435 default y 462 default y
436 help 463 help
437 This option will modify all the calls to ftrace dynamically 464 This option will modify all the calls to ftrace dynamically
438 (will patch them out of the binary image and replaces them 465 (will patch them out of the binary image and replace them
439 with a No-Op instruction) as they are called. A table is 466 with a No-Op instruction) as they are called. A table is
440 created to dynamically enable them again. 467 created to dynamically enable them again.
441 468
442 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise 469 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
443 has native performance as long as no tracing is active. 470 otherwise has native performance as long as no tracing is active.
444 471
445 The changes to the code are done by a kernel thread that 472 The changes to the code are done by a kernel thread that
446 wakes up once a second and checks to see if any ftrace calls 473 wakes up once a second and checks to see if any ftrace calls
447 were made. If so, it runs stop_machine (stops all CPUS) 474 were made. If so, it runs stop_machine (stops all CPUS)
448 and modifies the code to jump over the call to ftrace. 475 and modifies the code to jump over the call to ftrace.
449 476
450config FUNCTION_PROFILER 477config FUNCTION_PROFILER
451 bool "Kernel function profiler" 478 bool "Kernel function profiler"
452 depends on FUNCTION_TRACER 479 depends on FUNCTION_TRACER
453 default n 480 default n
454 help 481 help
455 This option enables the kernel function profiler. A file is created 482 This option enables the kernel function profiler. A file is created
456 in debugfs called function_profile_enabled which defaults to zero. 483 in debugfs called function_profile_enabled which defaults to zero.
457 When a 1 is echoed into this file profiling begins, and when a 484 When a 1 is echoed into this file profiling begins, and when a
458 zero is entered, profiling stops. A file in the trace_stats 485 zero is entered, profiling stops. A "functions" file is created in
459 directory called functions, that show the list of functions that 486 the trace_stats directory; this file shows the list of functions that
460 have been hit and their counters. 487 have been hit and their counters.
461 488
462 If in doubt, say N 489 If in doubt, say N.
463 490
464config FTRACE_MCOUNT_RECORD 491config FTRACE_MCOUNT_RECORD
465 def_bool y 492 def_bool y
@@ -518,8 +545,8 @@ config RING_BUFFER_BENCHMARK
518 tristate "Ring buffer benchmark stress tester" 545 tristate "Ring buffer benchmark stress tester"
519 depends on RING_BUFFER 546 depends on RING_BUFFER
520 help 547 help
521 This option creates a test to stress the ring buffer and bench mark it. 548 This option creates a test to stress the ring buffer and benchmark it.
522 It creates its own ring buffer such that it will not interfer with 549 It creates its own ring buffer such that it will not interfere with
523 any other users of the ring buffer (such as ftrace). It then creates 550 any other users of the ring buffer (such as ftrace). It then creates
524 a producer and consumer that will run for 10 seconds and sleep for 551 a producer and consumer that will run for 10 seconds and sleep for
525 10 seconds. Each interval it will print out the number of events 552 10 seconds. Each interval it will print out the number of events
@@ -528,7 +555,7 @@ config RING_BUFFER_BENCHMARK
528 It does not disable interrupts or raise its priority, so it may be 555 It does not disable interrupts or raise its priority, so it may be
529 affected by processes that are running. 556 affected by processes that are running.
530 557
531 If unsure, say N 558 If unsure, say N.
532 559
533endif # FTRACE 560endif # FTRACE
534 561
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..78edc6490038 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,8 +51,12 @@ endif
51obj-$(CONFIG_EVENT_TRACING) += trace_events.o 51obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
56endif
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
59obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 60obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 61
58libftrace-y := ftrace.o 62libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d9d6206e0b14..b3bc91a3f510 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h>
24#include <linux/debugfs.h> 25#include <linux/debugfs.h>
25#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
26#include <linux/time.h> 27#include <linux/time.h>
@@ -540,9 +541,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
540 if (ret) 541 if (ret)
541 return ret; 542 return ret;
542 543
543 if (copy_to_user(arg, &buts, sizeof(buts))) 544 if (copy_to_user(arg, &buts, sizeof(buts))) {
545 blk_trace_remove(q);
544 return -EFAULT; 546 return -EFAULT;
545 547 }
546 return 0; 548 return 0;
547} 549}
548EXPORT_SYMBOL_GPL(blk_trace_setup); 550EXPORT_SYMBOL_GPL(blk_trace_setup);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6dc4e5ef7a01..2404b59b3097 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,12 +22,13 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/slab.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31#include <linux/rcupdate.h>
31 32
32#include <trace/events/sched.h> 33#include <trace/events/sched.h>
33 34
@@ -60,6 +61,13 @@ static int last_ftrace_enabled;
60/* Quick disabling of function tracer. */ 61/* Quick disabling of function tracer. */
61int function_trace_stop; 62int function_trace_stop;
62 63
64/* List for set_ftrace_pid's pids. */
65LIST_HEAD(ftrace_pids);
66struct ftrace_pid {
67 struct list_head list;
68 struct pid *pid;
69};
70
63/* 71/*
64 * ftrace_disabled is set when an anomaly is discovered. 72 * ftrace_disabled is set when an anomaly is discovered.
65 * ftrace_disabled is much stronger than ftrace_enabled. 73 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -78,18 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
78ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 86ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
79ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 87ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
80 88
89/*
90 * Traverse the ftrace_list, invoking all entries. The reason that we
91 * can use rcu_dereference_raw() is that elements removed from this list
92 * are simply leaked, so there is no need to interact with a grace-period
93 * mechanism. The rcu_dereference_raw() calls are needed to handle
94 * concurrent insertions into the ftrace_list.
95 *
96 * Silly Alpha and silly pointer-speculation compiler optimizations!
97 */
81static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 98static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
82{ 99{
83 struct ftrace_ops *op = ftrace_list; 100 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
84
85 /* in case someone actually ports this to alpha! */
86 read_barrier_depends();
87 101
88 while (op != &ftrace_list_end) { 102 while (op != &ftrace_list_end) {
89 /* silly alpha */
90 read_barrier_depends();
91 op->func(ip, parent_ip); 103 op->func(ip, parent_ip);
92 op = op->next; 104 op = rcu_dereference_raw(op->next); /*see above*/
93 }; 105 };
94} 106}
95 107
@@ -144,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
144 * the ops->next pointer is valid before another CPU sees 156 * the ops->next pointer is valid before another CPU sees
145 * the ops pointer included into the ftrace_list. 157 * the ops pointer included into the ftrace_list.
146 */ 158 */
147 smp_wmb(); 159 rcu_assign_pointer(ftrace_list, ops);
148 ftrace_list = ops;
149 160
150 if (ftrace_enabled) { 161 if (ftrace_enabled) {
151 ftrace_func_t func; 162 ftrace_func_t func;
@@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 else 166 else
156 func = ftrace_list_func; 167 func = ftrace_list_func;
157 168
158 if (ftrace_pid_trace) { 169 if (!list_empty(&ftrace_pids)) {
159 set_ftrace_pid_function(func); 170 set_ftrace_pid_function(func);
160 func = ftrace_pid_func; 171 func = ftrace_pid_func;
161 } 172 }
@@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
203 if (ftrace_list->next == &ftrace_list_end) { 214 if (ftrace_list->next == &ftrace_list_end) {
204 ftrace_func_t func = ftrace_list->func; 215 ftrace_func_t func = ftrace_list->func;
205 216
206 if (ftrace_pid_trace) { 217 if (!list_empty(&ftrace_pids)) {
207 set_ftrace_pid_function(func); 218 set_ftrace_pid_function(func);
208 func = ftrace_pid_func; 219 func = ftrace_pid_func;
209 } 220 }
@@ -231,7 +242,7 @@ static void ftrace_update_pid_func(void)
231 func = __ftrace_trace_function; 242 func = __ftrace_trace_function;
232#endif 243#endif
233 244
234 if (ftrace_pid_trace) { 245 if (!list_empty(&ftrace_pids)) {
235 set_ftrace_pid_function(func); 246 set_ftrace_pid_function(func);
236 func = ftrace_pid_func; 247 func = ftrace_pid_func;
237 } else { 248 } else {
@@ -821,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
821} 832}
822#endif /* CONFIG_FUNCTION_PROFILER */ 833#endif /* CONFIG_FUNCTION_PROFILER */
823 834
824/* set when tracing only a pid */
825struct pid *ftrace_pid_trace;
826static struct pid * const ftrace_swapper_pid = &init_struct_pid; 835static struct pid * const ftrace_swapper_pid = &init_struct_pid;
827 836
828#ifdef CONFIG_DYNAMIC_FTRACE 837#ifdef CONFIG_DYNAMIC_FTRACE
@@ -889,36 +898,6 @@ static struct dyn_ftrace *ftrace_free_records;
889 } \ 898 } \
890 } 899 }
891 900
892#ifdef CONFIG_KPROBES
893
894static int frozen_record_count;
895
896static inline void freeze_record(struct dyn_ftrace *rec)
897{
898 if (!(rec->flags & FTRACE_FL_FROZEN)) {
899 rec->flags |= FTRACE_FL_FROZEN;
900 frozen_record_count++;
901 }
902}
903
904static inline void unfreeze_record(struct dyn_ftrace *rec)
905{
906 if (rec->flags & FTRACE_FL_FROZEN) {
907 rec->flags &= ~FTRACE_FL_FROZEN;
908 frozen_record_count--;
909 }
910}
911
912static inline int record_frozen(struct dyn_ftrace *rec)
913{
914 return rec->flags & FTRACE_FL_FROZEN;
915}
916#else
917# define freeze_record(rec) ({ 0; })
918# define unfreeze_record(rec) ({ 0; })
919# define record_frozen(rec) ({ 0; })
920#endif /* CONFIG_KPROBES */
921
922static void ftrace_free_rec(struct dyn_ftrace *rec) 901static void ftrace_free_rec(struct dyn_ftrace *rec)
923{ 902{
924 rec->freelist = ftrace_free_records; 903 rec->freelist = ftrace_free_records;
@@ -1016,6 +995,21 @@ static void ftrace_bug(int failed, unsigned long ip)
1016} 995}
1017 996
1018 997
998/* Return 1 if the address range is reserved for ftrace */
999int ftrace_text_reserved(void *start, void *end)
1000{
1001 struct dyn_ftrace *rec;
1002 struct ftrace_page *pg;
1003
1004 do_for_each_ftrace_rec(pg, rec) {
1005 if (rec->ip <= (unsigned long)end &&
1006 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1007 return 1;
1008 } while_for_each_ftrace_rec();
1009 return 0;
1010}
1011
1012
1019static int 1013static int
1020__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1014__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1021{ 1015{
@@ -1067,14 +1061,6 @@ static void ftrace_replace_code(int enable)
1067 !(rec->flags & FTRACE_FL_CONVERTED)) 1061 !(rec->flags & FTRACE_FL_CONVERTED))
1068 continue; 1062 continue;
1069 1063
1070 /* ignore updates to this record's mcount site */
1071 if (get_kprobe((void *)rec->ip)) {
1072 freeze_record(rec);
1073 continue;
1074 } else {
1075 unfreeze_record(rec);
1076 }
1077
1078 failed = __ftrace_replace_code(rec, enable); 1064 failed = __ftrace_replace_code(rec, enable);
1079 if (failed) { 1065 if (failed) {
1080 rec->flags |= FTRACE_FL_FAILED; 1066 rec->flags |= FTRACE_FL_FAILED;
@@ -1261,12 +1247,34 @@ static int ftrace_update_code(struct module *mod)
1261 ftrace_new_addrs = p->newlist; 1247 ftrace_new_addrs = p->newlist;
1262 p->flags = 0L; 1248 p->flags = 0L;
1263 1249
1264 /* convert record (i.e, patch mcount-call with NOP) */ 1250 /*
1265 if (ftrace_code_disable(mod, p)) { 1251 * Do the initial record convertion from mcount jump
1266 p->flags |= FTRACE_FL_CONVERTED; 1252 * to the NOP instructions.
1267 ftrace_update_cnt++; 1253 */
1268 } else 1254 if (!ftrace_code_disable(mod, p)) {
1269 ftrace_free_rec(p); 1255 ftrace_free_rec(p);
1256 continue;
1257 }
1258
1259 p->flags |= FTRACE_FL_CONVERTED;
1260 ftrace_update_cnt++;
1261
1262 /*
1263 * If the tracing is enabled, go ahead and enable the record.
1264 *
1265 * The reason not to enable the record immediatelly is the
1266 * inherent check of ftrace_make_nop/ftrace_make_call for
1267 * correct previous instructions. Making first the NOP
1268 * conversion puts the module to the correct state, thus
1269 * passing the ftrace_make_call check.
1270 */
1271 if (ftrace_start_up) {
1272 int failed = __ftrace_replace_code(p, 1);
1273 if (failed) {
1274 ftrace_bug(failed, p->ip);
1275 ftrace_free_rec(p);
1276 }
1277 }
1270 } 1278 }
1271 1279
1272 stop = ftrace_now(raw_smp_processor_id()); 1280 stop = ftrace_now(raw_smp_processor_id());
@@ -1656,64 +1664,10 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1656 return ret; 1664 return ret;
1657} 1665}
1658 1666
1659enum {
1660 MATCH_FULL,
1661 MATCH_FRONT_ONLY,
1662 MATCH_MIDDLE_ONLY,
1663 MATCH_END_ONLY,
1664};
1665
1666/*
1667 * (static function - no need for kernel doc)
1668 *
1669 * Pass in a buffer containing a glob and this function will
1670 * set search to point to the search part of the buffer and
1671 * return the type of search it is (see enum above).
1672 * This does modify buff.
1673 *
1674 * Returns enum type.
1675 * search returns the pointer to use for comparison.
1676 * not returns 1 if buff started with a '!'
1677 * 0 otherwise.
1678 */
1679static int
1680ftrace_setup_glob(char *buff, int len, char **search, int *not)
1681{
1682 int type = MATCH_FULL;
1683 int i;
1684
1685 if (buff[0] == '!') {
1686 *not = 1;
1687 buff++;
1688 len--;
1689 } else
1690 *not = 0;
1691
1692 *search = buff;
1693
1694 for (i = 0; i < len; i++) {
1695 if (buff[i] == '*') {
1696 if (!i) {
1697 *search = buff + 1;
1698 type = MATCH_END_ONLY;
1699 } else {
1700 if (type == MATCH_END_ONLY)
1701 type = MATCH_MIDDLE_ONLY;
1702 else
1703 type = MATCH_FRONT_ONLY;
1704 buff[i] = 0;
1705 break;
1706 }
1707 }
1708 }
1709
1710 return type;
1711}
1712
1713static int ftrace_match(char *str, char *regex, int len, int type) 1667static int ftrace_match(char *str, char *regex, int len, int type)
1714{ 1668{
1715 int matched = 0; 1669 int matched = 0;
1716 char *ptr; 1670 int slen;
1717 1671
1718 switch (type) { 1672 switch (type) {
1719 case MATCH_FULL: 1673 case MATCH_FULL:
@@ -1729,8 +1683,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
1729 matched = 1; 1683 matched = 1;
1730 break; 1684 break;
1731 case MATCH_END_ONLY: 1685 case MATCH_END_ONLY:
1732 ptr = strstr(str, regex); 1686 slen = strlen(str);
1733 if (ptr && (ptr[len] == 0)) 1687 if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
1734 matched = 1; 1688 matched = 1;
1735 break; 1689 break;
1736 } 1690 }
@@ -1747,7 +1701,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1747 return ftrace_match(str, regex, len, type); 1701 return ftrace_match(str, regex, len, type);
1748} 1702}
1749 1703
1750static void ftrace_match_records(char *buff, int len, int enable) 1704static int ftrace_match_records(char *buff, int len, int enable)
1751{ 1705{
1752 unsigned int search_len; 1706 unsigned int search_len;
1753 struct ftrace_page *pg; 1707 struct ftrace_page *pg;
@@ -1756,9 +1710,10 @@ static void ftrace_match_records(char *buff, int len, int enable)
1756 char *search; 1710 char *search;
1757 int type; 1711 int type;
1758 int not; 1712 int not;
1713 int found = 0;
1759 1714
1760 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1715 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1761 type = ftrace_setup_glob(buff, len, &search, &not); 1716 type = filter_parse_regex(buff, len, &search, &not);
1762 1717
1763 search_len = strlen(search); 1718 search_len = strlen(search);
1764 1719
@@ -1773,6 +1728,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1773 rec->flags &= ~flag; 1728 rec->flags &= ~flag;
1774 else 1729 else
1775 rec->flags |= flag; 1730 rec->flags |= flag;
1731 found = 1;
1776 } 1732 }
1777 /* 1733 /*
1778 * Only enable filtering if we have a function that 1734 * Only enable filtering if we have a function that
@@ -1782,6 +1738,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
1782 ftrace_filtered = 1; 1738 ftrace_filtered = 1;
1783 } while_for_each_ftrace_rec(); 1739 } while_for_each_ftrace_rec();
1784 mutex_unlock(&ftrace_lock); 1740 mutex_unlock(&ftrace_lock);
1741
1742 return found;
1785} 1743}
1786 1744
1787static int 1745static int
@@ -1803,7 +1761,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1803 return 1; 1761 return 1;
1804} 1762}
1805 1763
1806static void ftrace_match_module_records(char *buff, char *mod, int enable) 1764static int ftrace_match_module_records(char *buff, char *mod, int enable)
1807{ 1765{
1808 unsigned search_len = 0; 1766 unsigned search_len = 0;
1809 struct ftrace_page *pg; 1767 struct ftrace_page *pg;
@@ -1812,6 +1770,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1812 char *search = buff; 1770 char *search = buff;
1813 unsigned long flag; 1771 unsigned long flag;
1814 int not = 0; 1772 int not = 0;
1773 int found = 0;
1815 1774
1816 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1775 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1817 1776
@@ -1826,7 +1785,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1826 } 1785 }
1827 1786
1828 if (strlen(buff)) { 1787 if (strlen(buff)) {
1829 type = ftrace_setup_glob(buff, strlen(buff), &search, &not); 1788 type = filter_parse_regex(buff, strlen(buff), &search, &not);
1830 search_len = strlen(search); 1789 search_len = strlen(search);
1831 } 1790 }
1832 1791
@@ -1842,12 +1801,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1842 rec->flags &= ~flag; 1801 rec->flags &= ~flag;
1843 else 1802 else
1844 rec->flags |= flag; 1803 rec->flags |= flag;
1804 found = 1;
1845 } 1805 }
1846 if (enable && (rec->flags & FTRACE_FL_FILTER)) 1806 if (enable && (rec->flags & FTRACE_FL_FILTER))
1847 ftrace_filtered = 1; 1807 ftrace_filtered = 1;
1848 1808
1849 } while_for_each_ftrace_rec(); 1809 } while_for_each_ftrace_rec();
1850 mutex_unlock(&ftrace_lock); 1810 mutex_unlock(&ftrace_lock);
1811
1812 return found;
1851} 1813}
1852 1814
1853/* 1815/*
@@ -1876,8 +1838,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1876 if (!strlen(mod)) 1838 if (!strlen(mod))
1877 return -EINVAL; 1839 return -EINVAL;
1878 1840
1879 ftrace_match_module_records(func, mod, enable); 1841 if (ftrace_match_module_records(func, mod, enable))
1880 return 0; 1842 return 0;
1843 return -EINVAL;
1881} 1844}
1882 1845
1883static struct ftrace_func_command ftrace_mod_cmd = { 1846static struct ftrace_func_command ftrace_mod_cmd = {
@@ -1991,7 +1954,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1991 int count = 0; 1954 int count = 0;
1992 char *search; 1955 char *search;
1993 1956
1994 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 1957 type = filter_parse_regex(glob, strlen(glob), &search, &not);
1995 len = strlen(search); 1958 len = strlen(search);
1996 1959
1997 /* we do not support '!' for function probes */ 1960 /* we do not support '!' for function probes */
@@ -2068,7 +2031,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2068 else if (glob) { 2031 else if (glob) {
2069 int not; 2032 int not;
2070 2033
2071 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 2034 type = filter_parse_regex(glob, strlen(glob), &search, &not);
2072 len = strlen(search); 2035 len = strlen(search);
2073 2036
2074 /* we do not support '!' for function probes */ 2037 /* we do not support '!' for function probes */
@@ -2174,8 +2137,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2174 func = strsep(&next, ":"); 2137 func = strsep(&next, ":");
2175 2138
2176 if (!next) { 2139 if (!next) {
2177 ftrace_match_records(func, len, enable); 2140 if (ftrace_match_records(func, len, enable))
2178 return 0; 2141 return 0;
2142 return ret;
2179 } 2143 }
2180 2144
2181 /* command found */ 2145 /* command found */
@@ -2221,10 +2185,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2221 !trace_parser_cont(parser)) { 2185 !trace_parser_cont(parser)) {
2222 ret = ftrace_process_regex(parser->buffer, 2186 ret = ftrace_process_regex(parser->buffer,
2223 parser->idx, enable); 2187 parser->idx, enable);
2188 trace_parser_clear(parser);
2224 if (ret) 2189 if (ret)
2225 goto out_unlock; 2190 goto out_unlock;
2226
2227 trace_parser_clear(parser);
2228 } 2191 }
2229 2192
2230 ret = read; 2193 ret = read;
@@ -2312,6 +2275,34 @@ static int __init set_ftrace_filter(char *str)
2312} 2275}
2313__setup("ftrace_filter=", set_ftrace_filter); 2276__setup("ftrace_filter=", set_ftrace_filter);
2314 2277
2278#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2279static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2280static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2281
2282static int __init set_graph_function(char *str)
2283{
2284 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
2285 return 1;
2286}
2287__setup("ftrace_graph_filter=", set_graph_function);
2288
2289static void __init set_ftrace_early_graph(char *buf)
2290{
2291 int ret;
2292 char *func;
2293
2294 while (buf) {
2295 func = strsep(&buf, ",");
2296 /* we allow only one expression at a time */
2297 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2298 func);
2299 if (ret)
2300 printk(KERN_DEBUG "ftrace: function %s not "
2301 "traceable\n", func);
2302 }
2303}
2304#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2305
2315static void __init set_ftrace_early_filter(char *buf, int enable) 2306static void __init set_ftrace_early_filter(char *buf, int enable)
2316{ 2307{
2317 char *func; 2308 char *func;
@@ -2328,6 +2319,10 @@ static void __init set_ftrace_early_filters(void)
2328 set_ftrace_early_filter(ftrace_filter_buf, 1); 2319 set_ftrace_early_filter(ftrace_filter_buf, 1);
2329 if (ftrace_notrace_buf[0]) 2320 if (ftrace_notrace_buf[0])
2330 set_ftrace_early_filter(ftrace_notrace_buf, 0); 2321 set_ftrace_early_filter(ftrace_notrace_buf, 0);
2322#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2323 if (ftrace_graph_buf[0])
2324 set_ftrace_early_graph(ftrace_graph_buf);
2325#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2331} 2326}
2332 2327
2333static int 2328static int
@@ -2410,6 +2405,7 @@ static const struct file_operations ftrace_notrace_fops = {
2410static DEFINE_MUTEX(graph_lock); 2405static DEFINE_MUTEX(graph_lock);
2411 2406
2412int ftrace_graph_count; 2407int ftrace_graph_count;
2408int ftrace_graph_filter_enabled;
2413unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2409unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2414 2410
2415static void * 2411static void *
@@ -2432,7 +2428,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
2432 mutex_lock(&graph_lock); 2428 mutex_lock(&graph_lock);
2433 2429
2434 /* Nothing, tell g_show to print all functions are enabled */ 2430 /* Nothing, tell g_show to print all functions are enabled */
2435 if (!ftrace_graph_count && !*pos) 2431 if (!ftrace_graph_filter_enabled && !*pos)
2436 return (void *)1; 2432 return (void *)1;
2437 2433
2438 return __g_next(m, pos); 2434 return __g_next(m, pos);
@@ -2478,6 +2474,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2478 mutex_lock(&graph_lock); 2474 mutex_lock(&graph_lock);
2479 if ((file->f_mode & FMODE_WRITE) && 2475 if ((file->f_mode & FMODE_WRITE) &&
2480 (file->f_flags & O_TRUNC)) { 2476 (file->f_flags & O_TRUNC)) {
2477 ftrace_graph_filter_enabled = 0;
2481 ftrace_graph_count = 0; 2478 ftrace_graph_count = 0;
2482 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2479 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2483 } 2480 }
@@ -2503,7 +2500,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2503 struct dyn_ftrace *rec; 2500 struct dyn_ftrace *rec;
2504 struct ftrace_page *pg; 2501 struct ftrace_page *pg;
2505 int search_len; 2502 int search_len;
2506 int found = 0; 2503 int fail = 1;
2507 int type, not; 2504 int type, not;
2508 char *search; 2505 char *search;
2509 bool exists; 2506 bool exists;
@@ -2513,39 +2510,52 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2513 return -ENODEV; 2510 return -ENODEV;
2514 2511
2515 /* decode regex */ 2512 /* decode regex */
2516 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not); 2513 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2517 if (not) 2514 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2518 return -EINVAL; 2515 return -EBUSY;
2519 2516
2520 search_len = strlen(search); 2517 search_len = strlen(search);
2521 2518
2522 mutex_lock(&ftrace_lock); 2519 mutex_lock(&ftrace_lock);
2523 do_for_each_ftrace_rec(pg, rec) { 2520 do_for_each_ftrace_rec(pg, rec) {
2524 2521
2525 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2526 break;
2527
2528 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2522 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2529 continue; 2523 continue;
2530 2524
2531 if (ftrace_match_record(rec, search, search_len, type)) { 2525 if (ftrace_match_record(rec, search, search_len, type)) {
2532 /* ensure it is not already in the array */ 2526 /* if it is in the array */
2533 exists = false; 2527 exists = false;
2534 for (i = 0; i < *idx; i++) 2528 for (i = 0; i < *idx; i++) {
2535 if (array[i] == rec->ip) { 2529 if (array[i] == rec->ip) {
2536 exists = true; 2530 exists = true;
2537 break; 2531 break;
2538 } 2532 }
2539 if (!exists) { 2533 }
2540 array[(*idx)++] = rec->ip; 2534
2541 found = 1; 2535 if (!not) {
2536 fail = 0;
2537 if (!exists) {
2538 array[(*idx)++] = rec->ip;
2539 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2540 goto out;
2541 }
2542 } else {
2543 if (exists) {
2544 array[i] = array[--(*idx)];
2545 array[*idx] = 0;
2546 fail = 0;
2547 }
2542 } 2548 }
2543 } 2549 }
2544 } while_for_each_ftrace_rec(); 2550 } while_for_each_ftrace_rec();
2545 2551out:
2546 mutex_unlock(&ftrace_lock); 2552 mutex_unlock(&ftrace_lock);
2547 2553
2548 return found ? 0 : -EINVAL; 2554 if (fail)
2555 return -EINVAL;
2556
2557 ftrace_graph_filter_enabled = 1;
2558 return 0;
2549} 2559}
2550 2560
2551static ssize_t 2561static ssize_t
@@ -2555,16 +2565,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2555 struct trace_parser parser; 2565 struct trace_parser parser;
2556 ssize_t read, ret; 2566 ssize_t read, ret;
2557 2567
2558 if (!cnt || cnt < 0) 2568 if (!cnt)
2559 return 0; 2569 return 0;
2560 2570
2561 mutex_lock(&graph_lock); 2571 mutex_lock(&graph_lock);
2562 2572
2563 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2564 ret = -EBUSY;
2565 goto out_unlock;
2566 }
2567
2568 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2573 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2569 ret = -ENOMEM; 2574 ret = -ENOMEM;
2570 goto out_unlock; 2575 goto out_unlock;
@@ -2624,7 +2629,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2624 return 0; 2629 return 0;
2625} 2630}
2626 2631
2627static int ftrace_convert_nops(struct module *mod, 2632static int ftrace_process_locs(struct module *mod,
2628 unsigned long *start, 2633 unsigned long *start,
2629 unsigned long *end) 2634 unsigned long *end)
2630{ 2635{
@@ -2684,7 +2689,7 @@ static void ftrace_init_module(struct module *mod,
2684{ 2689{
2685 if (ftrace_disabled || start == end) 2690 if (ftrace_disabled || start == end)
2686 return; 2691 return;
2687 ftrace_convert_nops(mod, start, end); 2692 ftrace_process_locs(mod, start, end);
2688} 2693}
2689 2694
2690static int ftrace_module_notify(struct notifier_block *self, 2695static int ftrace_module_notify(struct notifier_block *self,
@@ -2745,7 +2750,7 @@ void __init ftrace_init(void)
2745 2750
2746 last_ftrace_enabled = ftrace_enabled = 1; 2751 last_ftrace_enabled = ftrace_enabled = 1;
2747 2752
2748 ret = ftrace_convert_nops(NULL, 2753 ret = ftrace_process_locs(NULL,
2749 __start_mcount_loc, 2754 __start_mcount_loc,
2750 __stop_mcount_loc); 2755 __stop_mcount_loc);
2751 2756
@@ -2778,23 +2783,6 @@ static inline void ftrace_startup_enable(int command) { }
2778# define ftrace_shutdown_sysctl() do { } while (0) 2783# define ftrace_shutdown_sysctl() do { } while (0)
2779#endif /* CONFIG_DYNAMIC_FTRACE */ 2784#endif /* CONFIG_DYNAMIC_FTRACE */
2780 2785
2781static ssize_t
2782ftrace_pid_read(struct file *file, char __user *ubuf,
2783 size_t cnt, loff_t *ppos)
2784{
2785 char buf[64];
2786 int r;
2787
2788 if (ftrace_pid_trace == ftrace_swapper_pid)
2789 r = sprintf(buf, "swapper tasks\n");
2790 else if (ftrace_pid_trace)
2791 r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
2792 else
2793 r = sprintf(buf, "no pid\n");
2794
2795 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2796}
2797
2798static void clear_ftrace_swapper(void) 2786static void clear_ftrace_swapper(void)
2799{ 2787{
2800 struct task_struct *p; 2788 struct task_struct *p;
@@ -2845,14 +2833,12 @@ static void set_ftrace_pid(struct pid *pid)
2845 rcu_read_unlock(); 2833 rcu_read_unlock();
2846} 2834}
2847 2835
2848static void clear_ftrace_pid_task(struct pid **pid) 2836static void clear_ftrace_pid_task(struct pid *pid)
2849{ 2837{
2850 if (*pid == ftrace_swapper_pid) 2838 if (pid == ftrace_swapper_pid)
2851 clear_ftrace_swapper(); 2839 clear_ftrace_swapper();
2852 else 2840 else
2853 clear_ftrace_pid(*pid); 2841 clear_ftrace_pid(pid);
2854
2855 *pid = NULL;
2856} 2842}
2857 2843
2858static void set_ftrace_pid_task(struct pid *pid) 2844static void set_ftrace_pid_task(struct pid *pid)
@@ -2863,74 +2849,184 @@ static void set_ftrace_pid_task(struct pid *pid)
2863 set_ftrace_pid(pid); 2849 set_ftrace_pid(pid);
2864} 2850}
2865 2851
2866static ssize_t 2852static int ftrace_pid_add(int p)
2867ftrace_pid_write(struct file *filp, const char __user *ubuf,
2868 size_t cnt, loff_t *ppos)
2869{ 2853{
2870 struct pid *pid; 2854 struct pid *pid;
2871 char buf[64]; 2855 struct ftrace_pid *fpid;
2872 long val; 2856 int ret = -EINVAL;
2873 int ret;
2874 2857
2875 if (cnt >= sizeof(buf)) 2858 mutex_lock(&ftrace_lock);
2876 return -EINVAL;
2877 2859
2878 if (copy_from_user(&buf, ubuf, cnt)) 2860 if (!p)
2879 return -EFAULT; 2861 pid = ftrace_swapper_pid;
2862 else
2863 pid = find_get_pid(p);
2880 2864
2881 buf[cnt] = 0; 2865 if (!pid)
2866 goto out;
2882 2867
2883 ret = strict_strtol(buf, 10, &val); 2868 ret = 0;
2884 if (ret < 0)
2885 return ret;
2886 2869
2887 mutex_lock(&ftrace_lock); 2870 list_for_each_entry(fpid, &ftrace_pids, list)
2888 if (val < 0) { 2871 if (fpid->pid == pid)
2889 /* disable pid tracing */ 2872 goto out_put;
2890 if (!ftrace_pid_trace)
2891 goto out;
2892 2873
2893 clear_ftrace_pid_task(&ftrace_pid_trace); 2874 ret = -ENOMEM;
2894 2875
2895 } else { 2876 fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
2896 /* swapper task is special */ 2877 if (!fpid)
2897 if (!val) { 2878 goto out_put;
2898 pid = ftrace_swapper_pid;
2899 if (pid == ftrace_pid_trace)
2900 goto out;
2901 } else {
2902 pid = find_get_pid(val);
2903 2879
2904 if (pid == ftrace_pid_trace) { 2880 list_add(&fpid->list, &ftrace_pids);
2905 put_pid(pid); 2881 fpid->pid = pid;
2906 goto out;
2907 }
2908 }
2909 2882
2910 if (ftrace_pid_trace) 2883 set_ftrace_pid_task(pid);
2911 clear_ftrace_pid_task(&ftrace_pid_trace);
2912 2884
2913 if (!pid) 2885 ftrace_update_pid_func();
2914 goto out; 2886 ftrace_startup_enable(0);
2887
2888 mutex_unlock(&ftrace_lock);
2889 return 0;
2890
2891out_put:
2892 if (pid != ftrace_swapper_pid)
2893 put_pid(pid);
2894
2895out:
2896 mutex_unlock(&ftrace_lock);
2897 return ret;
2898}
2899
2900static void ftrace_pid_reset(void)
2901{
2902 struct ftrace_pid *fpid, *safe;
2903
2904 mutex_lock(&ftrace_lock);
2905 list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
2906 struct pid *pid = fpid->pid;
2915 2907
2916 ftrace_pid_trace = pid; 2908 clear_ftrace_pid_task(pid);
2917 2909
2918 set_ftrace_pid_task(ftrace_pid_trace); 2910 list_del(&fpid->list);
2911 kfree(fpid);
2919 } 2912 }
2920 2913
2921 /* update the function call */
2922 ftrace_update_pid_func(); 2914 ftrace_update_pid_func();
2923 ftrace_startup_enable(0); 2915 ftrace_startup_enable(0);
2924 2916
2925 out:
2926 mutex_unlock(&ftrace_lock); 2917 mutex_unlock(&ftrace_lock);
2918}
2927 2919
2928 return cnt; 2920static void *fpid_start(struct seq_file *m, loff_t *pos)
2921{
2922 mutex_lock(&ftrace_lock);
2923
2924 if (list_empty(&ftrace_pids) && (!*pos))
2925 return (void *) 1;
2926
2927 return seq_list_start(&ftrace_pids, *pos);
2928}
2929
2930static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
2931{
2932 if (v == (void *)1)
2933 return NULL;
2934
2935 return seq_list_next(v, &ftrace_pids, pos);
2936}
2937
2938static void fpid_stop(struct seq_file *m, void *p)
2939{
2940 mutex_unlock(&ftrace_lock);
2941}
2942
2943static int fpid_show(struct seq_file *m, void *v)
2944{
2945 const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
2946
2947 if (v == (void *)1) {
2948 seq_printf(m, "no pid\n");
2949 return 0;
2950 }
2951
2952 if (fpid->pid == ftrace_swapper_pid)
2953 seq_printf(m, "swapper tasks\n");
2954 else
2955 seq_printf(m, "%u\n", pid_vnr(fpid->pid));
2956
2957 return 0;
2958}
2959
2960static const struct seq_operations ftrace_pid_sops = {
2961 .start = fpid_start,
2962 .next = fpid_next,
2963 .stop = fpid_stop,
2964 .show = fpid_show,
2965};
2966
2967static int
2968ftrace_pid_open(struct inode *inode, struct file *file)
2969{
2970 int ret = 0;
2971
2972 if ((file->f_mode & FMODE_WRITE) &&
2973 (file->f_flags & O_TRUNC))
2974 ftrace_pid_reset();
2975
2976 if (file->f_mode & FMODE_READ)
2977 ret = seq_open(file, &ftrace_pid_sops);
2978
2979 return ret;
2980}
2981
2982static ssize_t
2983ftrace_pid_write(struct file *filp, const char __user *ubuf,
2984 size_t cnt, loff_t *ppos)
2985{
2986 char buf[64], *tmp;
2987 long val;
2988 int ret;
2989
2990 if (cnt >= sizeof(buf))
2991 return -EINVAL;
2992
2993 if (copy_from_user(&buf, ubuf, cnt))
2994 return -EFAULT;
2995
2996 buf[cnt] = 0;
2997
2998 /*
2999 * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
3000 * to clean the filter quietly.
3001 */
3002 tmp = strstrip(buf);
3003 if (strlen(tmp) == 0)
3004 return 1;
3005
3006 ret = strict_strtol(tmp, 10, &val);
3007 if (ret < 0)
3008 return ret;
3009
3010 ret = ftrace_pid_add(val);
3011
3012 return ret ? ret : cnt;
3013}
3014
3015static int
3016ftrace_pid_release(struct inode *inode, struct file *file)
3017{
3018 if (file->f_mode & FMODE_READ)
3019 seq_release(inode, file);
3020
3021 return 0;
2929} 3022}
2930 3023
2931static const struct file_operations ftrace_pid_fops = { 3024static const struct file_operations ftrace_pid_fops = {
2932 .read = ftrace_pid_read, 3025 .open = ftrace_pid_open,
2933 .write = ftrace_pid_write, 3026 .write = ftrace_pid_write,
3027 .read = seq_read,
3028 .llseek = seq_lseek,
3029 .release = ftrace_pid_release,
2934}; 3030};
2935 3031
2936static __init int ftrace_init_debugfs(void) 3032static __init int ftrace_init_debugfs(void)
@@ -3258,6 +3354,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3258{ 3354{
3259 /* Make sure we do not use the parent ret_stack */ 3355 /* Make sure we do not use the parent ret_stack */
3260 t->ret_stack = NULL; 3356 t->ret_stack = NULL;
3357 t->curr_ret_stack = -1;
3261 3358
3262 if (ftrace_graph_active) { 3359 if (ftrace_graph_active) {
3263 struct ftrace_ret_stack *ret_stack; 3360 struct ftrace_ret_stack *ret_stack;
@@ -3267,7 +3364,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3267 GFP_KERNEL); 3364 GFP_KERNEL);
3268 if (!ret_stack) 3365 if (!ret_stack)
3269 return; 3366 return;
3270 t->curr_ret_stack = -1;
3271 atomic_set(&t->tracing_graph_pause, 0); 3367 atomic_set(&t->tracing_graph_pause, 0);
3272 atomic_set(&t->trace_overrun, 0); 3368 atomic_set(&t->trace_overrun, 0);
3273 t->ftrace_timestamp = 0; 3369 t->ftrace_timestamp = 0;
@@ -3293,4 +3389,3 @@ void ftrace_graph_stop(void)
3293 ftrace_stop(); 3389 ftrace_stop();
3294} 3390}
3295#endif 3391#endif
3296
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..a22582a06161 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -9,12 +9,9 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/slab.h>
13 12
14#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 14#include <trace/events/power.h>
16 15
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 16EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20 17
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5dd017fea6f5..41ca394feb22 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,12 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22 23
24#include <asm/local.h>
23#include "trace.h" 25#include "trace.h"
24 26
25/* 27/*
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
207#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
208 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
211 221
@@ -397,18 +407,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
397 int ret; 407 int ret;
398 408
399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" 409 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\n", 410 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp)); 411 (unsigned int)sizeof(field.time_stamp),
412 (unsigned int)is_signed_type(u64));
402 413
403 ret = trace_seq_printf(s, "\tfield: local_t commit;\t" 414 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
404 "offset:%u;\tsize:%u;\n", 415 "offset:%u;\tsize:%u;\tsigned:%u;\n",
405 (unsigned int)offsetof(typeof(field), commit), 416 (unsigned int)offsetof(typeof(field), commit),
406 (unsigned int)sizeof(field.commit)); 417 (unsigned int)sizeof(field.commit),
418 (unsigned int)is_signed_type(long));
407 419
408 ret = trace_seq_printf(s, "\tfield: char data;\t" 420 ret = trace_seq_printf(s, "\tfield: char data;\t"
409 "offset:%u;\tsize:%u;\n", 421 "offset:%u;\tsize:%u;\tsigned:%u;\n",
410 (unsigned int)offsetof(typeof(field), data), 422 (unsigned int)offsetof(typeof(field), data),
411 (unsigned int)BUF_PAGE_SIZE); 423 (unsigned int)BUF_PAGE_SIZE,
424 (unsigned int)is_signed_type(char));
412 425
413 return ret; 426 return ret;
414} 427}
@@ -420,7 +433,7 @@ struct ring_buffer_per_cpu {
420 int cpu; 433 int cpu;
421 struct ring_buffer *buffer; 434 struct ring_buffer *buffer;
422 spinlock_t reader_lock; /* serialize readers */ 435 spinlock_t reader_lock; /* serialize readers */
423 raw_spinlock_t lock; 436 arch_spinlock_t lock;
424 struct lock_class_key lock_key; 437 struct lock_class_key lock_key;
425 struct list_head *pages; 438 struct list_head *pages;
426 struct buffer_page *head_page; /* read from head */ 439 struct buffer_page *head_page; /* read from head */
@@ -461,6 +474,8 @@ struct ring_buffer_iter {
461 struct ring_buffer_per_cpu *cpu_buffer; 474 struct ring_buffer_per_cpu *cpu_buffer;
462 unsigned long head; 475 unsigned long head;
463 struct buffer_page *head_page; 476 struct buffer_page *head_page;
477 struct buffer_page *cache_reader_page;
478 unsigned long cache_read;
464 u64 read_stamp; 479 u64 read_stamp;
465}; 480};
466 481
@@ -995,7 +1010,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
995 cpu_buffer->buffer = buffer; 1010 cpu_buffer->buffer = buffer;
996 spin_lock_init(&cpu_buffer->reader_lock); 1011 spin_lock_init(&cpu_buffer->reader_lock);
997 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1012 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
998 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1013 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
999 1014
1000 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1015 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1001 GFP_KERNEL, cpu_to_node(cpu)); 1016 GFP_KERNEL, cpu_to_node(cpu));
@@ -1190,30 +1205,25 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1190 struct list_head *p; 1205 struct list_head *p;
1191 unsigned i; 1206 unsigned i;
1192 1207
1193 atomic_inc(&cpu_buffer->record_disabled);
1194 synchronize_sched();
1195
1196 spin_lock_irq(&cpu_buffer->reader_lock); 1208 spin_lock_irq(&cpu_buffer->reader_lock);
1197 rb_head_page_deactivate(cpu_buffer); 1209 rb_head_page_deactivate(cpu_buffer);
1198 1210
1199 for (i = 0; i < nr_pages; i++) { 1211 for (i = 0; i < nr_pages; i++) {
1200 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1212 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1201 return; 1213 goto out;
1202 p = cpu_buffer->pages->next; 1214 p = cpu_buffer->pages->next;
1203 bpage = list_entry(p, struct buffer_page, list); 1215 bpage = list_entry(p, struct buffer_page, list);
1204 list_del_init(&bpage->list); 1216 list_del_init(&bpage->list);
1205 free_buffer_page(bpage); 1217 free_buffer_page(bpage);
1206 } 1218 }
1207 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1219 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1208 return; 1220 goto out;
1209 1221
1210 rb_reset_cpu(cpu_buffer); 1222 rb_reset_cpu(cpu_buffer);
1211 spin_unlock_irq(&cpu_buffer->reader_lock);
1212
1213 rb_check_pages(cpu_buffer); 1223 rb_check_pages(cpu_buffer);
1214 1224
1215 atomic_dec(&cpu_buffer->record_disabled); 1225out:
1216 1226 spin_unlock_irq(&cpu_buffer->reader_lock);
1217} 1227}
1218 1228
1219static void 1229static void
@@ -1224,26 +1234,22 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1224 struct list_head *p; 1234 struct list_head *p;
1225 unsigned i; 1235 unsigned i;
1226 1236
1227 atomic_inc(&cpu_buffer->record_disabled);
1228 synchronize_sched();
1229
1230 spin_lock_irq(&cpu_buffer->reader_lock); 1237 spin_lock_irq(&cpu_buffer->reader_lock);
1231 rb_head_page_deactivate(cpu_buffer); 1238 rb_head_page_deactivate(cpu_buffer);
1232 1239
1233 for (i = 0; i < nr_pages; i++) { 1240 for (i = 0; i < nr_pages; i++) {
1234 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1241 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1235 return; 1242 goto out;
1236 p = pages->next; 1243 p = pages->next;
1237 bpage = list_entry(p, struct buffer_page, list); 1244 bpage = list_entry(p, struct buffer_page, list);
1238 list_del_init(&bpage->list); 1245 list_del_init(&bpage->list);
1239 list_add_tail(&bpage->list, cpu_buffer->pages); 1246 list_add_tail(&bpage->list, cpu_buffer->pages);
1240 } 1247 }
1241 rb_reset_cpu(cpu_buffer); 1248 rb_reset_cpu(cpu_buffer);
1242 spin_unlock_irq(&cpu_buffer->reader_lock);
1243
1244 rb_check_pages(cpu_buffer); 1249 rb_check_pages(cpu_buffer);
1245 1250
1246 atomic_dec(&cpu_buffer->record_disabled); 1251out:
1252 spin_unlock_irq(&cpu_buffer->reader_lock);
1247} 1253}
1248 1254
1249/** 1255/**
@@ -1251,11 +1257,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1251 * @buffer: the buffer to resize. 1257 * @buffer: the buffer to resize.
1252 * @size: the new size. 1258 * @size: the new size.
1253 * 1259 *
1254 * The tracer is responsible for making sure that the buffer is
1255 * not being used while changing the size.
1256 * Note: We may be able to change the above requirement by using
1257 * RCU synchronizations.
1258 *
1259 * Minimum size is 2 * BUF_PAGE_SIZE. 1260 * Minimum size is 2 * BUF_PAGE_SIZE.
1260 * 1261 *
1261 * Returns -1 on failure. 1262 * Returns -1 on failure.
@@ -1287,6 +1288,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1287 if (size == buffer_size) 1288 if (size == buffer_size)
1288 return size; 1289 return size;
1289 1290
1291 atomic_inc(&buffer->record_disabled);
1292
1293 /* Make sure all writers are done with this buffer. */
1294 synchronize_sched();
1295
1290 mutex_lock(&buffer->mutex); 1296 mutex_lock(&buffer->mutex);
1291 get_online_cpus(); 1297 get_online_cpus();
1292 1298
@@ -1349,6 +1355,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1349 put_online_cpus(); 1355 put_online_cpus();
1350 mutex_unlock(&buffer->mutex); 1356 mutex_unlock(&buffer->mutex);
1351 1357
1358 atomic_dec(&buffer->record_disabled);
1359
1352 return size; 1360 return size;
1353 1361
1354 free_pages: 1362 free_pages:
@@ -1358,6 +1366,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1358 } 1366 }
1359 put_online_cpus(); 1367 put_online_cpus();
1360 mutex_unlock(&buffer->mutex); 1368 mutex_unlock(&buffer->mutex);
1369 atomic_dec(&buffer->record_disabled);
1361 return -ENOMEM; 1370 return -ENOMEM;
1362 1371
1363 /* 1372 /*
@@ -1367,6 +1376,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1367 out_fail: 1376 out_fail:
1368 put_online_cpus(); 1377 put_online_cpus();
1369 mutex_unlock(&buffer->mutex); 1378 mutex_unlock(&buffer->mutex);
1379 atomic_dec(&buffer->record_disabled);
1370 return -1; 1380 return -1;
1371} 1381}
1372EXPORT_SYMBOL_GPL(ring_buffer_resize); 1382EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -1548,7 +1558,7 @@ rb_update_event(struct ring_buffer_event *event,
1548 1558
1549 case 0: 1559 case 0:
1550 length -= RB_EVNT_HDR_SIZE; 1560 length -= RB_EVNT_HDR_SIZE;
1551 if (length > RB_MAX_SMALL_DATA) 1561 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1552 event->array[0] = length; 1562 event->array[0] = length;
1553 else 1563 else
1554 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1564 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1723,11 +1733,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1723 if (!length) 1733 if (!length)
1724 length = 1; 1734 length = 1;
1725 1735
1726 if (length > RB_MAX_SMALL_DATA) 1736 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1727 length += sizeof(event.array[0]); 1737 length += sizeof(event.array[0]);
1728 1738
1729 length += RB_EVNT_HDR_SIZE; 1739 length += RB_EVNT_HDR_SIZE;
1730 length = ALIGN(length, RB_ALIGNMENT); 1740 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1731 1741
1732 return length; 1742 return length;
1733} 1743}
@@ -1787,9 +1797,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1787static struct ring_buffer_event * 1797static struct ring_buffer_event *
1788rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1798rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1789 unsigned long length, unsigned long tail, 1799 unsigned long length, unsigned long tail,
1790 struct buffer_page *commit_page,
1791 struct buffer_page *tail_page, u64 *ts) 1800 struct buffer_page *tail_page, u64 *ts)
1792{ 1801{
1802 struct buffer_page *commit_page = cpu_buffer->commit_page;
1793 struct ring_buffer *buffer = cpu_buffer->buffer; 1803 struct ring_buffer *buffer = cpu_buffer->buffer;
1794 struct buffer_page *next_page; 1804 struct buffer_page *next_page;
1795 int ret; 1805 int ret;
@@ -1892,13 +1902,10 @@ static struct ring_buffer_event *
1892__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1902__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1893 unsigned type, unsigned long length, u64 *ts) 1903 unsigned type, unsigned long length, u64 *ts)
1894{ 1904{
1895 struct buffer_page *tail_page, *commit_page; 1905 struct buffer_page *tail_page;
1896 struct ring_buffer_event *event; 1906 struct ring_buffer_event *event;
1897 unsigned long tail, write; 1907 unsigned long tail, write;
1898 1908
1899 commit_page = cpu_buffer->commit_page;
1900 /* we just need to protect against interrupts */
1901 barrier();
1902 tail_page = cpu_buffer->tail_page; 1909 tail_page = cpu_buffer->tail_page;
1903 write = local_add_return(length, &tail_page->write); 1910 write = local_add_return(length, &tail_page->write);
1904 1911
@@ -1909,7 +1916,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1909 /* See if we shot pass the end of this buffer page */ 1916 /* See if we shot pass the end of this buffer page */
1910 if (write > BUF_PAGE_SIZE) 1917 if (write > BUF_PAGE_SIZE)
1911 return rb_move_tail(cpu_buffer, length, tail, 1918 return rb_move_tail(cpu_buffer, length, tail,
1912 commit_page, tail_page, ts); 1919 tail_page, ts);
1913 1920
1914 /* We reserved something on the buffer */ 1921 /* We reserved something on the buffer */
1915 1922
@@ -2237,12 +2244,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2237 if (ring_buffer_flags != RB_BUFFERS_ON) 2244 if (ring_buffer_flags != RB_BUFFERS_ON)
2238 return NULL; 2245 return NULL;
2239 2246
2240 if (atomic_read(&buffer->record_disabled))
2241 return NULL;
2242
2243 /* If we are tracing schedule, we don't want to recurse */ 2247 /* If we are tracing schedule, we don't want to recurse */
2244 resched = ftrace_preempt_disable(); 2248 resched = ftrace_preempt_disable();
2245 2249
2250 if (atomic_read(&buffer->record_disabled))
2251 goto out_nocheck;
2252
2246 if (trace_recursive_lock()) 2253 if (trace_recursive_lock())
2247 goto out_nocheck; 2254 goto out_nocheck;
2248 2255
@@ -2474,11 +2481,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2474 if (ring_buffer_flags != RB_BUFFERS_ON) 2481 if (ring_buffer_flags != RB_BUFFERS_ON)
2475 return -EBUSY; 2482 return -EBUSY;
2476 2483
2477 if (atomic_read(&buffer->record_disabled))
2478 return -EBUSY;
2479
2480 resched = ftrace_preempt_disable(); 2484 resched = ftrace_preempt_disable();
2481 2485
2486 if (atomic_read(&buffer->record_disabled))
2487 goto out;
2488
2482 cpu = raw_smp_processor_id(); 2489 cpu = raw_smp_processor_id();
2483 2490
2484 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2491 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2546,7 +2553,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2546 * @buffer: The ring buffer to enable writes 2553 * @buffer: The ring buffer to enable writes
2547 * 2554 *
2548 * Note, multiple disables will need the same number of enables 2555 * Note, multiple disables will need the same number of enables
2549 * to truely enable the writing (much like preempt_disable). 2556 * to truly enable the writing (much like preempt_disable).
2550 */ 2557 */
2551void ring_buffer_record_enable(struct ring_buffer *buffer) 2558void ring_buffer_record_enable(struct ring_buffer *buffer)
2552{ 2559{
@@ -2582,7 +2589,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2582 * @cpu: The CPU to enable. 2589 * @cpu: The CPU to enable.
2583 * 2590 *
2584 * Note, multiple disables will need the same number of enables 2591 * Note, multiple disables will need the same number of enables
2585 * to truely enable the writing (much like preempt_disable). 2592 * to truly enable the writing (much like preempt_disable).
2586 */ 2593 */
2587void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2594void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2588{ 2595{
@@ -2723,6 +2730,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2723 iter->read_stamp = cpu_buffer->read_stamp; 2730 iter->read_stamp = cpu_buffer->read_stamp;
2724 else 2731 else
2725 iter->read_stamp = iter->head_page->page->time_stamp; 2732 iter->read_stamp = iter->head_page->page->time_stamp;
2733 iter->cache_reader_page = cpu_buffer->reader_page;
2734 iter->cache_read = cpu_buffer->read;
2726} 2735}
2727 2736
2728/** 2737/**
@@ -2834,7 +2843,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2834 int ret; 2843 int ret;
2835 2844
2836 local_irq_save(flags); 2845 local_irq_save(flags);
2837 __raw_spin_lock(&cpu_buffer->lock); 2846 arch_spin_lock(&cpu_buffer->lock);
2838 2847
2839 again: 2848 again:
2840 /* 2849 /*
@@ -2876,7 +2885,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2876 * Splice the empty reader page into the list around the head. 2885 * Splice the empty reader page into the list around the head.
2877 */ 2886 */
2878 reader = rb_set_head_page(cpu_buffer); 2887 reader = rb_set_head_page(cpu_buffer);
2879 cpu_buffer->reader_page->list.next = reader->list.next; 2888 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
2880 cpu_buffer->reader_page->list.prev = reader->list.prev; 2889 cpu_buffer->reader_page->list.prev = reader->list.prev;
2881 2890
2882 /* 2891 /*
@@ -2913,7 +2922,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2913 * 2922 *
2914 * Now make the new head point back to the reader page. 2923 * Now make the new head point back to the reader page.
2915 */ 2924 */
2916 reader->list.next->prev = &cpu_buffer->reader_page->list; 2925 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
2917 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2926 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2918 2927
2919 /* Finally update the reader page to the new head */ 2928 /* Finally update the reader page to the new head */
@@ -2923,7 +2932,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2923 goto again; 2932 goto again;
2924 2933
2925 out: 2934 out:
2926 __raw_spin_unlock(&cpu_buffer->lock); 2935 arch_spin_unlock(&cpu_buffer->lock);
2927 local_irq_restore(flags); 2936 local_irq_restore(flags);
2928 2937
2929 return reader; 2938 return reader;
@@ -3067,13 +3076,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3067 struct ring_buffer_event *event; 3076 struct ring_buffer_event *event;
3068 int nr_loops = 0; 3077 int nr_loops = 0;
3069 3078
3070 if (ring_buffer_iter_empty(iter))
3071 return NULL;
3072
3073 cpu_buffer = iter->cpu_buffer; 3079 cpu_buffer = iter->cpu_buffer;
3074 buffer = cpu_buffer->buffer; 3080 buffer = cpu_buffer->buffer;
3075 3081
3082 /*
3083 * Check if someone performed a consuming read to
3084 * the buffer. A consuming read invalidates the iterator
3085 * and we need to reset the iterator in this case.
3086 */
3087 if (unlikely(iter->cache_read != cpu_buffer->read ||
3088 iter->cache_reader_page != cpu_buffer->reader_page))
3089 rb_iter_reset(iter);
3090
3076 again: 3091 again:
3092 if (ring_buffer_iter_empty(iter))
3093 return NULL;
3094
3077 /* 3095 /*
3078 * We repeat when a timestamp is encountered. 3096 * We repeat when a timestamp is encountered.
3079 * We can get multiple timestamps by nested interrupts or also 3097 * We can get multiple timestamps by nested interrupts or also
@@ -3088,6 +3106,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3088 if (rb_per_cpu_empty(cpu_buffer)) 3106 if (rb_per_cpu_empty(cpu_buffer))
3089 return NULL; 3107 return NULL;
3090 3108
3109 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3110 rb_inc_iter(iter);
3111 goto again;
3112 }
3113
3091 event = rb_iter_head_event(iter); 3114 event = rb_iter_head_event(iter);
3092 3115
3093 switch (event->type_len) { 3116 switch (event->type_len) {
@@ -3286,9 +3309,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3286 synchronize_sched(); 3309 synchronize_sched();
3287 3310
3288 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3311 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3289 __raw_spin_lock(&cpu_buffer->lock); 3312 arch_spin_lock(&cpu_buffer->lock);
3290 rb_iter_reset(iter); 3313 rb_iter_reset(iter);
3291 __raw_spin_unlock(&cpu_buffer->lock); 3314 arch_spin_unlock(&cpu_buffer->lock);
3292 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3315 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3293 3316
3294 return iter; 3317 return iter;
@@ -3408,11 +3431,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3408 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3431 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3409 goto out; 3432 goto out;
3410 3433
3411 __raw_spin_lock(&cpu_buffer->lock); 3434 arch_spin_lock(&cpu_buffer->lock);
3412 3435
3413 rb_reset_cpu(cpu_buffer); 3436 rb_reset_cpu(cpu_buffer);
3414 3437
3415 __raw_spin_unlock(&cpu_buffer->lock); 3438 arch_spin_unlock(&cpu_buffer->lock);
3416 3439
3417 out: 3440 out:
3418 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3441 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 573d3cc762c3..df74c7982255 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <asm/local.h>
11 12
12struct rb_page { 13struct rb_page {
13 u64 ts; 14 u64 ts;
@@ -35,6 +36,28 @@ static int disable_reader;
35module_param(disable_reader, uint, 0644); 36module_param(disable_reader, uint, 0644);
36MODULE_PARM_DESC(disable_reader, "only run producer"); 37MODULE_PARM_DESC(disable_reader, "only run producer");
37 38
39static int write_iteration = 50;
40module_param(write_iteration, uint, 0644);
41MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
42
43static int producer_nice = 19;
44static int consumer_nice = 19;
45
46static int producer_fifo = -1;
47static int consumer_fifo = -1;
48
49module_param(producer_nice, uint, 0644);
50MODULE_PARM_DESC(producer_nice, "nice prio for producer");
51
52module_param(consumer_nice, uint, 0644);
53MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
54
55module_param(producer_fifo, uint, 0644);
56MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
57
58module_param(consumer_fifo, uint, 0644);
59MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
60
38static int read_events; 61static int read_events;
39 62
40static int kill_test; 63static int kill_test;
@@ -208,15 +231,18 @@ static void ring_buffer_producer(void)
208 do { 231 do {
209 struct ring_buffer_event *event; 232 struct ring_buffer_event *event;
210 int *entry; 233 int *entry;
211 234 int i;
212 event = ring_buffer_lock_reserve(buffer, 10); 235
213 if (!event) { 236 for (i = 0; i < write_iteration; i++) {
214 missed++; 237 event = ring_buffer_lock_reserve(buffer, 10);
215 } else { 238 if (!event) {
216 hit++; 239 missed++;
217 entry = ring_buffer_event_data(event); 240 } else {
218 *entry = smp_processor_id(); 241 hit++;
219 ring_buffer_unlock_commit(buffer, event); 242 entry = ring_buffer_event_data(event);
243 *entry = smp_processor_id();
244 ring_buffer_unlock_commit(buffer, event);
245 }
220 } 246 }
221 do_gettimeofday(&end_tv); 247 do_gettimeofday(&end_tv);
222 248
@@ -263,6 +289,27 @@ static void ring_buffer_producer(void)
263 289
264 if (kill_test) 290 if (kill_test)
265 trace_printk("ERROR!\n"); 291 trace_printk("ERROR!\n");
292
293 if (!disable_reader) {
294 if (consumer_fifo < 0)
295 trace_printk("Running Consumer at nice: %d\n",
296 consumer_nice);
297 else
298 trace_printk("Running Consumer at SCHED_FIFO %d\n",
299 consumer_fifo);
300 }
301 if (producer_fifo < 0)
302 trace_printk("Running Producer at nice: %d\n",
303 producer_nice);
304 else
305 trace_printk("Running Producer at SCHED_FIFO %d\n",
306 producer_fifo);
307
308 /* Let the user know that the test is running at low priority */
309 if (producer_fifo < 0 && consumer_fifo < 0 &&
310 producer_nice == 19 && consumer_nice == 19)
311 trace_printk("WARNING!!! This test is running at lowest priority.\n");
312
266 trace_printk("Time: %lld (usecs)\n", time); 313 trace_printk("Time: %lld (usecs)\n", time);
267 trace_printk("Overruns: %lld\n", overruns); 314 trace_printk("Overruns: %lld\n", overruns);
268 if (disable_reader) 315 if (disable_reader)
@@ -392,6 +439,27 @@ static int __init ring_buffer_benchmark_init(void)
392 if (IS_ERR(producer)) 439 if (IS_ERR(producer))
393 goto out_kill; 440 goto out_kill;
394 441
442 /*
443 * Run them as low-prio background tasks by default:
444 */
445 if (!disable_reader) {
446 if (consumer_fifo >= 0) {
447 struct sched_param param = {
448 .sched_priority = consumer_fifo
449 };
450 sched_setscheduler(consumer, SCHED_FIFO, &param);
451 } else
452 set_user_nice(consumer, consumer_nice);
453 }
454
455 if (producer_fifo >= 0) {
456 struct sched_param param = {
457 .sched_priority = consumer_fifo
458 };
459 sched_setscheduler(producer, SCHED_FIFO, &param);
460 } else
461 set_user_nice(producer, producer_nice);
462
395 return 0; 463 return 0;
396 464
397 out_kill: 465 out_kill:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b20d3ec75de9..44f916a04065 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h> 14#include <linux/ring_buffer.h>
15#include <linux/utsrelease.h> 15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h> 16#include <linux/stacktrace.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
@@ -32,10 +32,11 @@
32#include <linux/splice.h> 32#include <linux/splice.h>
33#include <linux/kdebug.h> 33#include <linux/kdebug.h>
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/rwsem.h>
36#include <linux/slab.h>
35#include <linux/ctype.h> 37#include <linux/ctype.h>
36#include <linux/init.h> 38#include <linux/init.h>
37#include <linux/poll.h> 39#include <linux/poll.h>
38#include <linux/gfp.h>
39#include <linux/fs.h> 40#include <linux/fs.h>
40 41
41#include "trace.h" 42#include "trace.h"
@@ -86,25 +87,22 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
86 */ 87 */
87static int tracing_disabled = 1; 88static int tracing_disabled = 1;
88 89
89DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 90DEFINE_PER_CPU(int, ftrace_cpu_disabled);
90 91
91static inline void ftrace_disable_cpu(void) 92static inline void ftrace_disable_cpu(void)
92{ 93{
93 preempt_disable(); 94 preempt_disable();
94 local_inc(&__get_cpu_var(ftrace_cpu_disabled)); 95 __this_cpu_inc(ftrace_cpu_disabled);
95} 96}
96 97
97static inline void ftrace_enable_cpu(void) 98static inline void ftrace_enable_cpu(void)
98{ 99{
99 local_dec(&__get_cpu_var(ftrace_cpu_disabled)); 100 __this_cpu_dec(ftrace_cpu_disabled);
100 preempt_enable(); 101 preempt_enable();
101} 102}
102 103
103static cpumask_var_t __read_mostly tracing_buffer_mask; 104static cpumask_var_t __read_mostly tracing_buffer_mask;
104 105
105/* Define which cpu buffers are currently read in trace_pipe */
106static cpumask_var_t tracing_reader_cpumask;
107
108#define for_each_tracing_cpu(cpu) \ 106#define for_each_tracing_cpu(cpu) \
109 for_each_cpu(cpu, tracing_buffer_mask) 107 for_each_cpu(cpu, tracing_buffer_mask)
110 108
@@ -129,7 +127,7 @@ static int tracing_set_tracer(const char *buf);
129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 127static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
130static char *default_bootup_tracer; 128static char *default_bootup_tracer;
131 129
132static int __init set_ftrace(char *str) 130static int __init set_cmdline_ftrace(char *str)
133{ 131{
134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 132 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
135 default_bootup_tracer = bootup_tracer_buf; 133 default_bootup_tracer = bootup_tracer_buf;
@@ -137,7 +135,7 @@ static int __init set_ftrace(char *str)
137 ring_buffer_expanded = 1; 135 ring_buffer_expanded = 1;
138 return 1; 136 return 1;
139} 137}
140__setup("ftrace=", set_ftrace); 138__setup("ftrace=", set_cmdline_ftrace);
141 139
142static int __init set_ftrace_dump_on_oops(char *str) 140static int __init set_ftrace_dump_on_oops(char *str)
143{ 141{
@@ -203,7 +201,7 @@ cycle_t ftrace_now(int cpu)
203 */ 201 */
204static struct trace_array max_tr; 202static struct trace_array max_tr;
205 203
206static DEFINE_PER_CPU(struct trace_array_cpu, max_data); 204static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
207 205
208/* tracer_enabled is used to toggle activation of a tracer */ 206/* tracer_enabled is used to toggle activation of a tracer */
209static int tracer_enabled = 1; 207static int tracer_enabled = 1;
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly;
243 241
244/* 242/*
245 * trace_types_lock is used to protect the trace_types list. 243 * trace_types_lock is used to protect the trace_types list.
246 * This lock is also used to keep user access serialized.
247 * Accesses from userspace will grab this lock while userspace
248 * activities happen inside the kernel.
249 */ 244 */
250static DEFINE_MUTEX(trace_types_lock); 245static DEFINE_MUTEX(trace_types_lock);
251 246
247/*
248 * serialize the access of the ring buffer
249 *
250 * ring buffer serializes readers, but it is low level protection.
251 * The validity of the events (which returns by ring_buffer_peek() ..etc)
252 * are not protected by ring buffer.
253 *
254 * The content of events may become garbage if we allow other process consumes
255 * these events concurrently:
256 * A) the page of the consumed events may become a normal page
257 * (not reader page) in ring buffer, and this page will be rewrited
258 * by events producer.
259 * B) The page of the consumed events may become a page for splice_read,
260 * and this page will be returned to system.
261 *
262 * These primitives allow multi process access to different cpu ring buffer
263 * concurrently.
264 *
265 * These primitives don't distinguish read-only and read-consume access.
266 * Multi read-only access are also serialized.
267 */
268
269#ifdef CONFIG_SMP
270static DECLARE_RWSEM(all_cpu_access_lock);
271static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
272
273static inline void trace_access_lock(int cpu)
274{
275 if (cpu == TRACE_PIPE_ALL_CPU) {
276 /* gain it for accessing the whole ring buffer. */
277 down_write(&all_cpu_access_lock);
278 } else {
279 /* gain it for accessing a cpu ring buffer. */
280
281 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
282 down_read(&all_cpu_access_lock);
283
284 /* Secondly block other access to this @cpu ring buffer. */
285 mutex_lock(&per_cpu(cpu_access_lock, cpu));
286 }
287}
288
289static inline void trace_access_unlock(int cpu)
290{
291 if (cpu == TRACE_PIPE_ALL_CPU) {
292 up_write(&all_cpu_access_lock);
293 } else {
294 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
295 up_read(&all_cpu_access_lock);
296 }
297}
298
299static inline void trace_access_lock_init(void)
300{
301 int cpu;
302
303 for_each_possible_cpu(cpu)
304 mutex_init(&per_cpu(cpu_access_lock, cpu));
305}
306
307#else
308
309static DEFINE_MUTEX(access_lock);
310
311static inline void trace_access_lock(int cpu)
312{
313 (void)cpu;
314 mutex_lock(&access_lock);
315}
316
317static inline void trace_access_unlock(int cpu)
318{
319 (void)cpu;
320 mutex_unlock(&access_lock);
321}
322
323static inline void trace_access_lock_init(void)
324{
325}
326
327#endif
328
252/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 329/* trace_wait is a waitqueue for tasks blocked on trace_poll */
253static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 330static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
254 331
@@ -297,6 +374,21 @@ static int __init set_buf_size(char *str)
297} 374}
298__setup("trace_buf_size=", set_buf_size); 375__setup("trace_buf_size=", set_buf_size);
299 376
377static int __init set_tracing_thresh(char *str)
378{
379 unsigned long threshhold;
380 int ret;
381
382 if (!str)
383 return 0;
384 ret = strict_strtoul(str, 0, &threshhold);
385 if (ret < 0)
386 return 0;
387 tracing_thresh = threshhold * 1000;
388 return 1;
389}
390__setup("tracing_thresh=", set_tracing_thresh);
391
300unsigned long nsecs_to_usecs(unsigned long nsecs) 392unsigned long nsecs_to_usecs(unsigned long nsecs)
301{ 393{
302 return nsecs / 1000; 394 return nsecs / 1000;
@@ -313,7 +405,6 @@ static const char *trace_options[] = {
313 "bin", 405 "bin",
314 "block", 406 "block",
315 "stacktrace", 407 "stacktrace",
316 "sched-tree",
317 "trace_printk", 408 "trace_printk",
318 "ftrace_preempt", 409 "ftrace_preempt",
319 "branch", 410 "branch",
@@ -493,19 +584,20 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
493 * protected by per_cpu spinlocks. But the action of the swap 584 * protected by per_cpu spinlocks. But the action of the swap
494 * needs its own lock. 585 * needs its own lock.
495 * 586 *
496 * This is defined as a raw_spinlock_t in order to help 587 * This is defined as a arch_spinlock_t in order to help
497 * with performance when lockdep debugging is enabled. 588 * with performance when lockdep debugging is enabled.
498 * 589 *
499 * It is also used in other places outside the update_max_tr 590 * It is also used in other places outside the update_max_tr
500 * so it needs to be defined outside of the 591 * so it needs to be defined outside of the
501 * CONFIG_TRACER_MAX_TRACE. 592 * CONFIG_TRACER_MAX_TRACE.
502 */ 593 */
503static raw_spinlock_t ftrace_max_lock = 594static arch_spinlock_t ftrace_max_lock =
504 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 595 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
596
597unsigned long __read_mostly tracing_thresh;
505 598
506#ifdef CONFIG_TRACER_MAX_TRACE 599#ifdef CONFIG_TRACER_MAX_TRACE
507unsigned long __read_mostly tracing_max_latency; 600unsigned long __read_mostly tracing_max_latency;
508unsigned long __read_mostly tracing_thresh;
509 601
510/* 602/*
511 * Copy the new maximum trace into the separate maximum-trace 603 * Copy the new maximum trace into the separate maximum-trace
@@ -516,7 +608,7 @@ static void
516__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 608__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
517{ 609{
518 struct trace_array_cpu *data = tr->data[cpu]; 610 struct trace_array_cpu *data = tr->data[cpu];
519 struct trace_array_cpu *max_data = tr->data[cpu]; 611 struct trace_array_cpu *max_data;
520 612
521 max_tr.cpu = cpu; 613 max_tr.cpu = cpu;
522 max_tr.time_start = data->preempt_timestamp; 614 max_tr.time_start = data->preempt_timestamp;
@@ -526,7 +618,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
526 max_data->critical_start = data->critical_start; 618 max_data->critical_start = data->critical_start;
527 max_data->critical_end = data->critical_end; 619 max_data->critical_end = data->critical_end;
528 620
529 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 621 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
530 max_data->pid = tsk->pid; 622 max_data->pid = tsk->pid;
531 max_data->uid = task_uid(tsk); 623 max_data->uid = task_uid(tsk);
532 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 624 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -555,13 +647,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
555 return; 647 return;
556 648
557 WARN_ON_ONCE(!irqs_disabled()); 649 WARN_ON_ONCE(!irqs_disabled());
558 __raw_spin_lock(&ftrace_max_lock); 650 arch_spin_lock(&ftrace_max_lock);
559 651
560 tr->buffer = max_tr.buffer; 652 tr->buffer = max_tr.buffer;
561 max_tr.buffer = buf; 653 max_tr.buffer = buf;
562 654
563 __update_max_tr(tr, tsk, cpu); 655 __update_max_tr(tr, tsk, cpu);
564 __raw_spin_unlock(&ftrace_max_lock); 656 arch_spin_unlock(&ftrace_max_lock);
565} 657}
566 658
567/** 659/**
@@ -581,7 +673,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
581 return; 673 return;
582 674
583 WARN_ON_ONCE(!irqs_disabled()); 675 WARN_ON_ONCE(!irqs_disabled());
584 __raw_spin_lock(&ftrace_max_lock); 676 arch_spin_lock(&ftrace_max_lock);
585 677
586 ftrace_disable_cpu(); 678 ftrace_disable_cpu();
587 679
@@ -603,7 +695,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
603 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 695 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
604 696
605 __update_max_tr(tr, tsk, cpu); 697 __update_max_tr(tr, tsk, cpu);
606 __raw_spin_unlock(&ftrace_max_lock); 698 arch_spin_unlock(&ftrace_max_lock);
607} 699}
608#endif /* CONFIG_TRACER_MAX_TRACE */ 700#endif /* CONFIG_TRACER_MAX_TRACE */
609 701
@@ -748,10 +840,10 @@ out:
748 mutex_unlock(&trace_types_lock); 840 mutex_unlock(&trace_types_lock);
749} 841}
750 842
751static void __tracing_reset(struct trace_array *tr, int cpu) 843static void __tracing_reset(struct ring_buffer *buffer, int cpu)
752{ 844{
753 ftrace_disable_cpu(); 845 ftrace_disable_cpu();
754 ring_buffer_reset_cpu(tr->buffer, cpu); 846 ring_buffer_reset_cpu(buffer, cpu);
755 ftrace_enable_cpu(); 847 ftrace_enable_cpu();
756} 848}
757 849
@@ -763,7 +855,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
763 855
764 /* Make sure all commits have finished */ 856 /* Make sure all commits have finished */
765 synchronize_sched(); 857 synchronize_sched();
766 __tracing_reset(tr, cpu); 858 __tracing_reset(buffer, cpu);
767 859
768 ring_buffer_record_enable(buffer); 860 ring_buffer_record_enable(buffer);
769} 861}
@@ -781,7 +873,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
781 tr->time_start = ftrace_now(tr->cpu); 873 tr->time_start = ftrace_now(tr->cpu);
782 874
783 for_each_online_cpu(cpu) 875 for_each_online_cpu(cpu)
784 __tracing_reset(tr, cpu); 876 __tracing_reset(buffer, cpu);
785 877
786 ring_buffer_record_enable(buffer); 878 ring_buffer_record_enable(buffer);
787} 879}
@@ -802,7 +894,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
802static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 894static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
803static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 895static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
804static int cmdline_idx; 896static int cmdline_idx;
805static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; 897static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
806 898
807/* temporary disable recording */ 899/* temporary disable recording */
808static atomic_t trace_record_cmdline_disabled __read_mostly; 900static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -858,6 +950,8 @@ void tracing_start(void)
858 goto out; 950 goto out;
859 } 951 }
860 952
953 /* Prevent the buffers from switching */
954 arch_spin_lock(&ftrace_max_lock);
861 955
862 buffer = global_trace.buffer; 956 buffer = global_trace.buffer;
863 if (buffer) 957 if (buffer)
@@ -867,6 +961,8 @@ void tracing_start(void)
867 if (buffer) 961 if (buffer)
868 ring_buffer_record_enable(buffer); 962 ring_buffer_record_enable(buffer);
869 963
964 arch_spin_unlock(&ftrace_max_lock);
965
870 ftrace_start(); 966 ftrace_start();
871 out: 967 out:
872 spin_unlock_irqrestore(&tracing_start_lock, flags); 968 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -888,6 +984,9 @@ void tracing_stop(void)
888 if (trace_stop_count++) 984 if (trace_stop_count++)
889 goto out; 985 goto out;
890 986
987 /* Prevent the buffers from switching */
988 arch_spin_lock(&ftrace_max_lock);
989
891 buffer = global_trace.buffer; 990 buffer = global_trace.buffer;
892 if (buffer) 991 if (buffer)
893 ring_buffer_record_disable(buffer); 992 ring_buffer_record_disable(buffer);
@@ -896,6 +995,8 @@ void tracing_stop(void)
896 if (buffer) 995 if (buffer)
897 ring_buffer_record_disable(buffer); 996 ring_buffer_record_disable(buffer);
898 997
998 arch_spin_unlock(&ftrace_max_lock);
999
899 out: 1000 out:
900 spin_unlock_irqrestore(&tracing_start_lock, flags); 1001 spin_unlock_irqrestore(&tracing_start_lock, flags);
901} 1002}
@@ -915,7 +1016,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
915 * nor do we want to disable interrupts, 1016 * nor do we want to disable interrupts,
916 * so if we miss here, then better luck next time. 1017 * so if we miss here, then better luck next time.
917 */ 1018 */
918 if (!__raw_spin_trylock(&trace_cmdline_lock)) 1019 if (!arch_spin_trylock(&trace_cmdline_lock))
919 return; 1020 return;
920 1021
921 idx = map_pid_to_cmdline[tsk->pid]; 1022 idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +1041,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
940 1041
941 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 1042 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
942 1043
943 __raw_spin_unlock(&trace_cmdline_lock); 1044 arch_spin_unlock(&trace_cmdline_lock);
944} 1045}
945 1046
946void trace_find_cmdline(int pid, char comm[]) 1047void trace_find_cmdline(int pid, char comm[])
@@ -952,20 +1053,25 @@ void trace_find_cmdline(int pid, char comm[])
952 return; 1053 return;
953 } 1054 }
954 1055
1056 if (WARN_ON_ONCE(pid < 0)) {
1057 strcpy(comm, "<XXX>");
1058 return;
1059 }
1060
955 if (pid > PID_MAX_DEFAULT) { 1061 if (pid > PID_MAX_DEFAULT) {
956 strcpy(comm, "<...>"); 1062 strcpy(comm, "<...>");
957 return; 1063 return;
958 } 1064 }
959 1065
960 preempt_disable(); 1066 preempt_disable();
961 __raw_spin_lock(&trace_cmdline_lock); 1067 arch_spin_lock(&trace_cmdline_lock);
962 map = map_pid_to_cmdline[pid]; 1068 map = map_pid_to_cmdline[pid];
963 if (map != NO_CMDLINE_MAP) 1069 if (map != NO_CMDLINE_MAP)
964 strcpy(comm, saved_cmdlines[map]); 1070 strcpy(comm, saved_cmdlines[map]);
965 else 1071 else
966 strcpy(comm, "<...>"); 1072 strcpy(comm, "<...>");
967 1073
968 __raw_spin_unlock(&trace_cmdline_lock); 1074 arch_spin_unlock(&trace_cmdline_lock);
969 preempt_enable(); 1075 preempt_enable();
970} 1076}
971 1077
@@ -1085,7 +1191,7 @@ trace_function(struct trace_array *tr,
1085 struct ftrace_entry *entry; 1191 struct ftrace_entry *entry;
1086 1192
1087 /* If we are reading the ring buffer, don't trace */ 1193 /* If we are reading the ring buffer, don't trace */
1088 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 1194 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1089 return; 1195 return;
1090 1196
1091 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1197 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1151,6 +1257,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1151 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1257 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1152} 1258}
1153 1259
1260/**
1261 * trace_dump_stack - record a stack back trace in the trace buffer
1262 */
1263void trace_dump_stack(void)
1264{
1265 unsigned long flags;
1266
1267 if (tracing_disabled || tracing_selftest_running)
1268 return;
1269
1270 local_save_flags(flags);
1271
1272 /* skipping 3 traces, seems to get us at the caller of this function */
1273 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1274}
1275
1154void 1276void
1155ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1277ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1156{ 1278{
@@ -1162,6 +1284,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1162 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1284 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1163 return; 1285 return;
1164 1286
1287 /*
1288 * NMIs can not handle page faults, even with fix ups.
1289 * The save user stack can (and often does) fault.
1290 */
1291 if (unlikely(in_nmi()))
1292 return;
1293
1165 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1294 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1166 sizeof(*entry), flags, pc); 1295 sizeof(*entry), flags, pc);
1167 if (!event) 1296 if (!event)
@@ -1251,8 +1380,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1251 */ 1380 */
1252int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1381int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1253{ 1382{
1254 static raw_spinlock_t trace_buf_lock = 1383 static arch_spinlock_t trace_buf_lock =
1255 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1384 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1256 static u32 trace_buf[TRACE_BUF_SIZE]; 1385 static u32 trace_buf[TRACE_BUF_SIZE];
1257 1386
1258 struct ftrace_event_call *call = &event_bprint; 1387 struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1412,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1283 1412
1284 /* Lockdep uses trace_printk for lock tracing */ 1413 /* Lockdep uses trace_printk for lock tracing */
1285 local_irq_save(flags); 1414 local_irq_save(flags);
1286 __raw_spin_lock(&trace_buf_lock); 1415 arch_spin_lock(&trace_buf_lock);
1287 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1416 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1288 1417
1289 if (len > TRACE_BUF_SIZE || len < 0) 1418 if (len > TRACE_BUF_SIZE || len < 0)
@@ -1300,11 +1429,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1300 entry->fmt = fmt; 1429 entry->fmt = fmt;
1301 1430
1302 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1431 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1303 if (!filter_check_discard(call, entry, buffer, event)) 1432 if (!filter_check_discard(call, entry, buffer, event)) {
1304 ring_buffer_unlock_commit(buffer, event); 1433 ring_buffer_unlock_commit(buffer, event);
1434 ftrace_trace_stack(buffer, flags, 6, pc);
1435 }
1305 1436
1306out_unlock: 1437out_unlock:
1307 __raw_spin_unlock(&trace_buf_lock); 1438 arch_spin_unlock(&trace_buf_lock);
1308 local_irq_restore(flags); 1439 local_irq_restore(flags);
1309 1440
1310out: 1441out:
@@ -1334,7 +1465,7 @@ int trace_array_printk(struct trace_array *tr,
1334int trace_array_vprintk(struct trace_array *tr, 1465int trace_array_vprintk(struct trace_array *tr,
1335 unsigned long ip, const char *fmt, va_list args) 1466 unsigned long ip, const char *fmt, va_list args)
1336{ 1467{
1337 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1468 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1338 static char trace_buf[TRACE_BUF_SIZE]; 1469 static char trace_buf[TRACE_BUF_SIZE];
1339 1470
1340 struct ftrace_event_call *call = &event_print; 1471 struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1491,9 @@ int trace_array_vprintk(struct trace_array *tr,
1360 1491
1361 pause_graph_tracing(); 1492 pause_graph_tracing();
1362 raw_local_irq_save(irq_flags); 1493 raw_local_irq_save(irq_flags);
1363 __raw_spin_lock(&trace_buf_lock); 1494 arch_spin_lock(&trace_buf_lock);
1364 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1495 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 1496
1366 len = min(len, TRACE_BUF_SIZE-1);
1367 trace_buf[len] = 0;
1368
1369 size = sizeof(*entry) + len + 1; 1497 size = sizeof(*entry) + len + 1;
1370 buffer = tr->buffer; 1498 buffer = tr->buffer;
1371 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1499 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
@@ -1373,15 +1501,17 @@ int trace_array_vprintk(struct trace_array *tr,
1373 if (!event) 1501 if (!event)
1374 goto out_unlock; 1502 goto out_unlock;
1375 entry = ring_buffer_event_data(event); 1503 entry = ring_buffer_event_data(event);
1376 entry->ip = ip; 1504 entry->ip = ip;
1377 1505
1378 memcpy(&entry->buf, trace_buf, len); 1506 memcpy(&entry->buf, trace_buf, len);
1379 entry->buf[len] = 0; 1507 entry->buf[len] = '\0';
1380 if (!filter_check_discard(call, entry, buffer, event)) 1508 if (!filter_check_discard(call, entry, buffer, event)) {
1381 ring_buffer_unlock_commit(buffer, event); 1509 ring_buffer_unlock_commit(buffer, event);
1510 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1511 }
1382 1512
1383 out_unlock: 1513 out_unlock:
1384 __raw_spin_unlock(&trace_buf_lock); 1514 arch_spin_unlock(&trace_buf_lock);
1385 raw_local_irq_restore(irq_flags); 1515 raw_local_irq_restore(irq_flags);
1386 unpause_graph_tracing(); 1516 unpause_graph_tracing();
1387 out: 1517 out:
@@ -1515,6 +1645,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1515 int i = (int)*pos; 1645 int i = (int)*pos;
1516 void *ent; 1646 void *ent;
1517 1647
1648 WARN_ON_ONCE(iter->leftover);
1649
1518 (*pos)++; 1650 (*pos)++;
1519 1651
1520 /* can't go backwards */ 1652 /* can't go backwards */
@@ -1566,12 +1698,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1566} 1698}
1567 1699
1568/* 1700/*
1569 * No necessary locking here. The worst thing which can
1570 * happen is loosing events consumed at the same time
1571 * by a trace_pipe reader.
1572 * Other than that, we don't risk to crash the ring buffer
1573 * because it serializes the readers.
1574 *
1575 * The current tracer is copied to avoid a global locking 1701 * The current tracer is copied to avoid a global locking
1576 * all around. 1702 * all around.
1577 */ 1703 */
@@ -1609,21 +1735,34 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1609 1735
1610 ftrace_enable_cpu(); 1736 ftrace_enable_cpu();
1611 1737
1738 iter->leftover = 0;
1612 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1739 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1613 ; 1740 ;
1614 1741
1615 } else { 1742 } else {
1616 l = *pos - 1; 1743 /*
1617 p = s_next(m, p, &l); 1744 * If we overflowed the seq_file before, then we want
1745 * to just reuse the trace_seq buffer again.
1746 */
1747 if (iter->leftover)
1748 p = iter;
1749 else {
1750 l = *pos - 1;
1751 p = s_next(m, p, &l);
1752 }
1618 } 1753 }
1619 1754
1620 trace_event_read_lock(); 1755 trace_event_read_lock();
1756 trace_access_lock(cpu_file);
1621 return p; 1757 return p;
1622} 1758}
1623 1759
1624static void s_stop(struct seq_file *m, void *p) 1760static void s_stop(struct seq_file *m, void *p)
1625{ 1761{
1762 struct trace_iterator *iter = m->private;
1763
1626 atomic_dec(&trace_record_cmdline_disabled); 1764 atomic_dec(&trace_record_cmdline_disabled);
1765 trace_access_unlock(iter->cpu_file);
1627 trace_event_read_unlock(); 1766 trace_event_read_unlock();
1628} 1767}
1629 1768
@@ -1922,6 +2061,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1922static int s_show(struct seq_file *m, void *v) 2061static int s_show(struct seq_file *m, void *v)
1923{ 2062{
1924 struct trace_iterator *iter = v; 2063 struct trace_iterator *iter = v;
2064 int ret;
1925 2065
1926 if (iter->ent == NULL) { 2066 if (iter->ent == NULL) {
1927 if (iter->tr) { 2067 if (iter->tr) {
@@ -1941,9 +2081,27 @@ static int s_show(struct seq_file *m, void *v)
1941 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2081 if (!(trace_flags & TRACE_ITER_VERBOSE))
1942 print_func_help_header(m); 2082 print_func_help_header(m);
1943 } 2083 }
2084 } else if (iter->leftover) {
2085 /*
2086 * If we filled the seq_file buffer earlier, we
2087 * want to just show it now.
2088 */
2089 ret = trace_print_seq(m, &iter->seq);
2090
2091 /* ret should this time be zero, but you never know */
2092 iter->leftover = ret;
2093
1944 } else { 2094 } else {
1945 print_trace_line(iter); 2095 print_trace_line(iter);
1946 trace_print_seq(m, &iter->seq); 2096 ret = trace_print_seq(m, &iter->seq);
2097 /*
2098 * If we overflow the seq_file buffer, then it will
2099 * ask us for this data again at start up.
2100 * Use that instead.
2101 * ret is 0 if seq_file write succeeded.
2102 * -1 otherwise.
2103 */
2104 iter->leftover = ret;
1947 } 2105 }
1948 2106
1949 return 0; 2107 return 0;
@@ -2253,7 +2411,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2253 mutex_lock(&tracing_cpumask_update_lock); 2411 mutex_lock(&tracing_cpumask_update_lock);
2254 2412
2255 local_irq_disable(); 2413 local_irq_disable();
2256 __raw_spin_lock(&ftrace_max_lock); 2414 arch_spin_lock(&ftrace_max_lock);
2257 for_each_tracing_cpu(cpu) { 2415 for_each_tracing_cpu(cpu) {
2258 /* 2416 /*
2259 * Increase/decrease the disabled counter if we are 2417 * Increase/decrease the disabled counter if we are
@@ -2268,7 +2426,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2268 atomic_dec(&global_trace.data[cpu]->disabled); 2426 atomic_dec(&global_trace.data[cpu]->disabled);
2269 } 2427 }
2270 } 2428 }
2271 __raw_spin_unlock(&ftrace_max_lock); 2429 arch_spin_unlock(&ftrace_max_lock);
2272 local_irq_enable(); 2430 local_irq_enable();
2273 2431
2274 cpumask_copy(tracing_cpumask, tracing_cpumask_new); 2432 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2290,67 +2448,49 @@ static const struct file_operations tracing_cpumask_fops = {
2290 .write = tracing_cpumask_write, 2448 .write = tracing_cpumask_write,
2291}; 2449};
2292 2450
2293static ssize_t 2451static int tracing_trace_options_show(struct seq_file *m, void *v)
2294tracing_trace_options_read(struct file *filp, char __user *ubuf,
2295 size_t cnt, loff_t *ppos)
2296{ 2452{
2297 struct tracer_opt *trace_opts; 2453 struct tracer_opt *trace_opts;
2298 u32 tracer_flags; 2454 u32 tracer_flags;
2299 int len = 0;
2300 char *buf;
2301 int r = 0;
2302 int i; 2455 int i;
2303 2456
2304
2305 /* calculate max size */
2306 for (i = 0; trace_options[i]; i++) {
2307 len += strlen(trace_options[i]);
2308 len += 3; /* "no" and newline */
2309 }
2310
2311 mutex_lock(&trace_types_lock); 2457 mutex_lock(&trace_types_lock);
2312 tracer_flags = current_trace->flags->val; 2458 tracer_flags = current_trace->flags->val;
2313 trace_opts = current_trace->flags->opts; 2459 trace_opts = current_trace->flags->opts;
2314 2460
2315 /*
2316 * Increase the size with names of options specific
2317 * of the current tracer.
2318 */
2319 for (i = 0; trace_opts[i].name; i++) {
2320 len += strlen(trace_opts[i].name);
2321 len += 3; /* "no" and newline */
2322 }
2323
2324 /* +1 for \0 */
2325 buf = kmalloc(len + 1, GFP_KERNEL);
2326 if (!buf) {
2327 mutex_unlock(&trace_types_lock);
2328 return -ENOMEM;
2329 }
2330
2331 for (i = 0; trace_options[i]; i++) { 2461 for (i = 0; trace_options[i]; i++) {
2332 if (trace_flags & (1 << i)) 2462 if (trace_flags & (1 << i))
2333 r += sprintf(buf + r, "%s\n", trace_options[i]); 2463 seq_printf(m, "%s\n", trace_options[i]);
2334 else 2464 else
2335 r += sprintf(buf + r, "no%s\n", trace_options[i]); 2465 seq_printf(m, "no%s\n", trace_options[i]);
2336 } 2466 }
2337 2467
2338 for (i = 0; trace_opts[i].name; i++) { 2468 for (i = 0; trace_opts[i].name; i++) {
2339 if (tracer_flags & trace_opts[i].bit) 2469 if (tracer_flags & trace_opts[i].bit)
2340 r += sprintf(buf + r, "%s\n", 2470 seq_printf(m, "%s\n", trace_opts[i].name);
2341 trace_opts[i].name);
2342 else 2471 else
2343 r += sprintf(buf + r, "no%s\n", 2472 seq_printf(m, "no%s\n", trace_opts[i].name);
2344 trace_opts[i].name);
2345 } 2473 }
2346 mutex_unlock(&trace_types_lock); 2474 mutex_unlock(&trace_types_lock);
2347 2475
2348 WARN_ON(r >= len + 1); 2476 return 0;
2477}
2349 2478
2350 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2479static int __set_tracer_option(struct tracer *trace,
2480 struct tracer_flags *tracer_flags,
2481 struct tracer_opt *opts, int neg)
2482{
2483 int ret;
2351 2484
2352 kfree(buf); 2485 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2353 return r; 2486 if (ret)
2487 return ret;
2488
2489 if (neg)
2490 tracer_flags->val &= ~opts->bit;
2491 else
2492 tracer_flags->val |= opts->bit;
2493 return 0;
2354} 2494}
2355 2495
2356/* Try to assign a tracer specific option */ 2496/* Try to assign a tracer specific option */
@@ -2358,33 +2498,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2358{ 2498{
2359 struct tracer_flags *tracer_flags = trace->flags; 2499 struct tracer_flags *tracer_flags = trace->flags;
2360 struct tracer_opt *opts = NULL; 2500 struct tracer_opt *opts = NULL;
2361 int ret = 0, i = 0; 2501 int i;
2362 int len;
2363 2502
2364 for (i = 0; tracer_flags->opts[i].name; i++) { 2503 for (i = 0; tracer_flags->opts[i].name; i++) {
2365 opts = &tracer_flags->opts[i]; 2504 opts = &tracer_flags->opts[i];
2366 len = strlen(opts->name);
2367 2505
2368 if (strncmp(cmp, opts->name, len) == 0) { 2506 if (strcmp(cmp, opts->name) == 0)
2369 ret = trace->set_flag(tracer_flags->val, 2507 return __set_tracer_option(trace, trace->flags,
2370 opts->bit, !neg); 2508 opts, neg);
2371 break;
2372 }
2373 } 2509 }
2374 /* Not found */
2375 if (!tracer_flags->opts[i].name)
2376 return -EINVAL;
2377
2378 /* Refused to handle */
2379 if (ret)
2380 return ret;
2381
2382 if (neg)
2383 tracer_flags->val &= ~opts->bit;
2384 else
2385 tracer_flags->val |= opts->bit;
2386 2510
2387 return 0; 2511 return -EINVAL;
2388} 2512}
2389 2513
2390static void set_tracer_flags(unsigned int mask, int enabled) 2514static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2404,7 +2528,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2404 size_t cnt, loff_t *ppos) 2528 size_t cnt, loff_t *ppos)
2405{ 2529{
2406 char buf[64]; 2530 char buf[64];
2407 char *cmp = buf; 2531 char *cmp;
2408 int neg = 0; 2532 int neg = 0;
2409 int ret; 2533 int ret;
2410 int i; 2534 int i;
@@ -2416,16 +2540,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2416 return -EFAULT; 2540 return -EFAULT;
2417 2541
2418 buf[cnt] = 0; 2542 buf[cnt] = 0;
2543 cmp = strstrip(buf);
2419 2544
2420 if (strncmp(buf, "no", 2) == 0) { 2545 if (strncmp(cmp, "no", 2) == 0) {
2421 neg = 1; 2546 neg = 1;
2422 cmp += 2; 2547 cmp += 2;
2423 } 2548 }
2424 2549
2425 for (i = 0; trace_options[i]; i++) { 2550 for (i = 0; trace_options[i]; i++) {
2426 int len = strlen(trace_options[i]); 2551 if (strcmp(cmp, trace_options[i]) == 0) {
2427
2428 if (strncmp(cmp, trace_options[i], len) == 0) {
2429 set_tracer_flags(1 << i, !neg); 2552 set_tracer_flags(1 << i, !neg);
2430 break; 2553 break;
2431 } 2554 }
@@ -2445,9 +2568,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2445 return cnt; 2568 return cnt;
2446} 2569}
2447 2570
2571static int tracing_trace_options_open(struct inode *inode, struct file *file)
2572{
2573 if (tracing_disabled)
2574 return -ENODEV;
2575 return single_open(file, tracing_trace_options_show, NULL);
2576}
2577
2448static const struct file_operations tracing_iter_fops = { 2578static const struct file_operations tracing_iter_fops = {
2449 .open = tracing_open_generic, 2579 .open = tracing_trace_options_open,
2450 .read = tracing_trace_options_read, 2580 .read = seq_read,
2581 .llseek = seq_lseek,
2582 .release = single_release,
2451 .write = tracing_trace_options_write, 2583 .write = tracing_trace_options_write,
2452}; 2584};
2453 2585
@@ -2821,22 +2953,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2821 2953
2822 mutex_lock(&trace_types_lock); 2954 mutex_lock(&trace_types_lock);
2823 2955
2824 /* We only allow one reader per cpu */
2825 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2826 if (!cpumask_empty(tracing_reader_cpumask)) {
2827 ret = -EBUSY;
2828 goto out;
2829 }
2830 cpumask_setall(tracing_reader_cpumask);
2831 } else {
2832 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2833 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2834 else {
2835 ret = -EBUSY;
2836 goto out;
2837 }
2838 }
2839
2840 /* create a buffer to store the information to pass to userspace */ 2956 /* create a buffer to store the information to pass to userspace */
2841 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2957 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2842 if (!iter) { 2958 if (!iter) {
@@ -2892,10 +3008,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2892 3008
2893 mutex_lock(&trace_types_lock); 3009 mutex_lock(&trace_types_lock);
2894 3010
2895 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) 3011 if (iter->trace->pipe_close)
2896 cpumask_clear(tracing_reader_cpumask); 3012 iter->trace->pipe_close(iter);
2897 else
2898 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2899 3013
2900 mutex_unlock(&trace_types_lock); 3014 mutex_unlock(&trace_types_lock);
2901 3015
@@ -3055,6 +3169,7 @@ waitagain:
3055 iter->pos = -1; 3169 iter->pos = -1;
3056 3170
3057 trace_event_read_lock(); 3171 trace_event_read_lock();
3172 trace_access_lock(iter->cpu_file);
3058 while (find_next_entry_inc(iter) != NULL) { 3173 while (find_next_entry_inc(iter) != NULL) {
3059 enum print_line_t ret; 3174 enum print_line_t ret;
3060 int len = iter->seq.len; 3175 int len = iter->seq.len;
@@ -3071,6 +3186,7 @@ waitagain:
3071 if (iter->seq.len >= cnt) 3186 if (iter->seq.len >= cnt)
3072 break; 3187 break;
3073 } 3188 }
3189 trace_access_unlock(iter->cpu_file);
3074 trace_event_read_unlock(); 3190 trace_event_read_unlock();
3075 3191
3076 /* Now copy what we have to the user */ 3192 /* Now copy what we have to the user */
@@ -3103,7 +3219,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3103 __free_page(spd->pages[idx]); 3219 __free_page(spd->pages[idx]);
3104} 3220}
3105 3221
3106static struct pipe_buf_operations tracing_pipe_buf_ops = { 3222static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3107 .can_merge = 0, 3223 .can_merge = 0,
3108 .map = generic_pipe_buf_map, 3224 .map = generic_pipe_buf_map,
3109 .unmap = generic_pipe_buf_unmap, 3225 .unmap = generic_pipe_buf_unmap,
@@ -3196,6 +3312,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3196 } 3312 }
3197 3313
3198 trace_event_read_lock(); 3314 trace_event_read_lock();
3315 trace_access_lock(iter->cpu_file);
3199 3316
3200 /* Fill as many pages as possible. */ 3317 /* Fill as many pages as possible. */
3201 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3318 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3219,6 +3336,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3219 trace_seq_init(&iter->seq); 3336 trace_seq_init(&iter->seq);
3220 } 3337 }
3221 3338
3339 trace_access_unlock(iter->cpu_file);
3222 trace_event_read_unlock(); 3340 trace_event_read_unlock();
3223 mutex_unlock(&iter->mutex); 3341 mutex_unlock(&iter->mutex);
3224 3342
@@ -3334,7 +3452,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3334 size_t cnt, loff_t *fpos) 3452 size_t cnt, loff_t *fpos)
3335{ 3453{
3336 char *buf; 3454 char *buf;
3337 char *end;
3338 3455
3339 if (tracing_disabled) 3456 if (tracing_disabled)
3340 return -EINVAL; 3457 return -EINVAL;
@@ -3342,7 +3459,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3342 if (cnt > TRACE_BUF_SIZE) 3459 if (cnt > TRACE_BUF_SIZE)
3343 cnt = TRACE_BUF_SIZE; 3460 cnt = TRACE_BUF_SIZE;
3344 3461
3345 buf = kmalloc(cnt + 1, GFP_KERNEL); 3462 buf = kmalloc(cnt + 2, GFP_KERNEL);
3346 if (buf == NULL) 3463 if (buf == NULL)
3347 return -ENOMEM; 3464 return -ENOMEM;
3348 3465
@@ -3350,35 +3467,31 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3350 kfree(buf); 3467 kfree(buf);
3351 return -EFAULT; 3468 return -EFAULT;
3352 } 3469 }
3470 if (buf[cnt-1] != '\n') {
3471 buf[cnt] = '\n';
3472 buf[cnt+1] = '\0';
3473 } else
3474 buf[cnt] = '\0';
3353 3475
3354 /* Cut from the first nil or newline. */ 3476 cnt = mark_printk("%s", buf);
3355 buf[cnt] = '\0';
3356 end = strchr(buf, '\n');
3357 if (end)
3358 *end = '\0';
3359
3360 cnt = mark_printk("%s\n", buf);
3361 kfree(buf); 3477 kfree(buf);
3362 *fpos += cnt; 3478 *fpos += cnt;
3363 3479
3364 return cnt; 3480 return cnt;
3365} 3481}
3366 3482
3367static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf, 3483static int tracing_clock_show(struct seq_file *m, void *v)
3368 size_t cnt, loff_t *ppos)
3369{ 3484{
3370 char buf[64];
3371 int bufiter = 0;
3372 int i; 3485 int i;
3373 3486
3374 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 3487 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3375 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, 3488 seq_printf(m,
3376 "%s%s%s%s", i ? " " : "", 3489 "%s%s%s%s", i ? " " : "",
3377 i == trace_clock_id ? "[" : "", trace_clocks[i].name, 3490 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3378 i == trace_clock_id ? "]" : ""); 3491 i == trace_clock_id ? "]" : "");
3379 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n"); 3492 seq_putc(m, '\n');
3380 3493
3381 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter); 3494 return 0;
3382} 3495}
3383 3496
3384static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 3497static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3420,6 +3533,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3420 return cnt; 3533 return cnt;
3421} 3534}
3422 3535
3536static int tracing_clock_open(struct inode *inode, struct file *file)
3537{
3538 if (tracing_disabled)
3539 return -ENODEV;
3540 return single_open(file, tracing_clock_show, NULL);
3541}
3542
3423static const struct file_operations tracing_max_lat_fops = { 3543static const struct file_operations tracing_max_lat_fops = {
3424 .open = tracing_open_generic, 3544 .open = tracing_open_generic,
3425 .read = tracing_max_lat_read, 3545 .read = tracing_max_lat_read,
@@ -3458,8 +3578,10 @@ static const struct file_operations tracing_mark_fops = {
3458}; 3578};
3459 3579
3460static const struct file_operations trace_clock_fops = { 3580static const struct file_operations trace_clock_fops = {
3461 .open = tracing_open_generic, 3581 .open = tracing_clock_open,
3462 .read = tracing_clock_read, 3582 .read = seq_read,
3583 .llseek = seq_lseek,
3584 .release = single_release,
3463 .write = tracing_clock_write, 3585 .write = tracing_clock_write,
3464}; 3586};
3465 3587
@@ -3516,10 +3638,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3516 3638
3517 info->read = 0; 3639 info->read = 0;
3518 3640
3641 trace_access_lock(info->cpu);
3519 ret = ring_buffer_read_page(info->tr->buffer, 3642 ret = ring_buffer_read_page(info->tr->buffer,
3520 &info->spare, 3643 &info->spare,
3521 count, 3644 count,
3522 info->cpu, 0); 3645 info->cpu, 0);
3646 trace_access_unlock(info->cpu);
3523 if (ret < 0) 3647 if (ret < 0)
3524 return 0; 3648 return 0;
3525 3649
@@ -3589,7 +3713,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3589} 3713}
3590 3714
3591/* Pipe buffer operations for a buffer. */ 3715/* Pipe buffer operations for a buffer. */
3592static struct pipe_buf_operations buffer_pipe_buf_ops = { 3716static const struct pipe_buf_operations buffer_pipe_buf_ops = {
3593 .can_merge = 0, 3717 .can_merge = 0,
3594 .map = generic_pipe_buf_map, 3718 .map = generic_pipe_buf_map,
3595 .unmap = generic_pipe_buf_unmap, 3719 .unmap = generic_pipe_buf_unmap,
@@ -3647,6 +3771,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3647 len &= PAGE_MASK; 3771 len &= PAGE_MASK;
3648 } 3772 }
3649 3773
3774 trace_access_lock(info->cpu);
3650 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3775 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3651 3776
3652 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3777 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3694,6 +3819,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3694 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3819 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3695 } 3820 }
3696 3821
3822 trace_access_unlock(info->cpu);
3697 spd.nr_pages = i; 3823 spd.nr_pages = i;
3698 3824
3699 /* did we read anything? */ 3825 /* did we read anything? */
@@ -3730,7 +3856,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3730 3856
3731 s = kmalloc(sizeof(*s), GFP_KERNEL); 3857 s = kmalloc(sizeof(*s), GFP_KERNEL);
3732 if (!s) 3858 if (!s)
3733 return ENOMEM; 3859 return -ENOMEM;
3734 3860
3735 trace_seq_init(s); 3861 trace_seq_init(s);
3736 3862
@@ -3920,39 +4046,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3920 if (ret < 0) 4046 if (ret < 0)
3921 return ret; 4047 return ret;
3922 4048
3923 ret = 0; 4049 if (val != 0 && val != 1)
3924 switch (val) { 4050 return -EINVAL;
3925 case 0:
3926 /* do nothing if already cleared */
3927 if (!(topt->flags->val & topt->opt->bit))
3928 break;
3929
3930 mutex_lock(&trace_types_lock);
3931 if (current_trace->set_flag)
3932 ret = current_trace->set_flag(topt->flags->val,
3933 topt->opt->bit, 0);
3934 mutex_unlock(&trace_types_lock);
3935 if (ret)
3936 return ret;
3937 topt->flags->val &= ~topt->opt->bit;
3938 break;
3939 case 1:
3940 /* do nothing if already set */
3941 if (topt->flags->val & topt->opt->bit)
3942 break;
3943 4051
4052 if (!!(topt->flags->val & topt->opt->bit) != val) {
3944 mutex_lock(&trace_types_lock); 4053 mutex_lock(&trace_types_lock);
3945 if (current_trace->set_flag) 4054 ret = __set_tracer_option(current_trace, topt->flags,
3946 ret = current_trace->set_flag(topt->flags->val, 4055 topt->opt, !val);
3947 topt->opt->bit, 1);
3948 mutex_unlock(&trace_types_lock); 4056 mutex_unlock(&trace_types_lock);
3949 if (ret) 4057 if (ret)
3950 return ret; 4058 return ret;
3951 topt->flags->val |= topt->opt->bit;
3952 break;
3953
3954 default:
3955 return -EINVAL;
3956 } 4059 }
3957 4060
3958 *ppos += cnt; 4061 *ppos += cnt;
@@ -4153,6 +4256,8 @@ static __init int tracer_init_debugfs(void)
4153 struct dentry *d_tracer; 4256 struct dentry *d_tracer;
4154 int cpu; 4257 int cpu;
4155 4258
4259 trace_access_lock_init();
4260
4156 d_tracer = tracing_init_dentry(); 4261 d_tracer = tracing_init_dentry();
4157 4262
4158 trace_create_file("tracing_enabled", 0644, d_tracer, 4263 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4176,10 +4281,10 @@ static __init int tracer_init_debugfs(void)
4176#ifdef CONFIG_TRACER_MAX_TRACE 4281#ifdef CONFIG_TRACER_MAX_TRACE
4177 trace_create_file("tracing_max_latency", 0644, d_tracer, 4282 trace_create_file("tracing_max_latency", 0644, d_tracer,
4178 &tracing_max_latency, &tracing_max_lat_fops); 4283 &tracing_max_latency, &tracing_max_lat_fops);
4284#endif
4179 4285
4180 trace_create_file("tracing_thresh", 0644, d_tracer, 4286 trace_create_file("tracing_thresh", 0644, d_tracer,
4181 &tracing_thresh, &tracing_max_lat_fops); 4287 &tracing_thresh, &tracing_max_lat_fops);
4182#endif
4183 4288
4184 trace_create_file("README", 0444, d_tracer, 4289 trace_create_file("README", 0444, d_tracer,
4185 NULL, &tracing_readme_fops); 4290 NULL, &tracing_readme_fops);
@@ -4279,8 +4384,8 @@ trace_printk_seq(struct trace_seq *s)
4279 4384
4280static void __ftrace_dump(bool disable_tracing) 4385static void __ftrace_dump(bool disable_tracing)
4281{ 4386{
4282 static raw_spinlock_t ftrace_dump_lock = 4387 static arch_spinlock_t ftrace_dump_lock =
4283 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 4388 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
4284 /* use static because iter can be a bit big for the stack */ 4389 /* use static because iter can be a bit big for the stack */
4285 static struct trace_iterator iter; 4390 static struct trace_iterator iter;
4286 unsigned int old_userobj; 4391 unsigned int old_userobj;
@@ -4290,7 +4395,7 @@ static void __ftrace_dump(bool disable_tracing)
4290 4395
4291 /* only one dump */ 4396 /* only one dump */
4292 local_irq_save(flags); 4397 local_irq_save(flags);
4293 __raw_spin_lock(&ftrace_dump_lock); 4398 arch_spin_lock(&ftrace_dump_lock);
4294 if (dump_ran) 4399 if (dump_ran)
4295 goto out; 4400 goto out;
4296 4401
@@ -4365,7 +4470,7 @@ static void __ftrace_dump(bool disable_tracing)
4365 } 4470 }
4366 4471
4367 out: 4472 out:
4368 __raw_spin_unlock(&ftrace_dump_lock); 4473 arch_spin_unlock(&ftrace_dump_lock);
4369 local_irq_restore(flags); 4474 local_irq_restore(flags);
4370} 4475}
4371 4476
@@ -4387,9 +4492,6 @@ __init static int tracer_alloc_buffers(void)
4387 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4492 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4388 goto out_free_buffer_mask; 4493 goto out_free_buffer_mask;
4389 4494
4390 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4391 goto out_free_tracing_cpumask;
4392
4393 /* To save memory, keep the ring buffer size to its minimum */ 4495 /* To save memory, keep the ring buffer size to its minimum */
4394 if (ring_buffer_expanded) 4496 if (ring_buffer_expanded)
4395 ring_buf_size = trace_buf_size; 4497 ring_buf_size = trace_buf_size;
@@ -4426,7 +4528,7 @@ __init static int tracer_alloc_buffers(void)
4426 /* Allocate the first page for all buffers */ 4528 /* Allocate the first page for all buffers */
4427 for_each_tracing_cpu(i) { 4529 for_each_tracing_cpu(i) {
4428 global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4530 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4429 max_tr.data[i] = &per_cpu(max_data, i); 4531 max_tr.data[i] = &per_cpu(max_tr_data, i);
4430 } 4532 }
4431 4533
4432 trace_init_cmdlines(); 4534 trace_init_cmdlines();
@@ -4447,8 +4549,6 @@ __init static int tracer_alloc_buffers(void)
4447 return 0; 4549 return 0;
4448 4550
4449out_free_cpumask: 4551out_free_cpumask:
4450 free_cpumask_var(tracing_reader_cpumask);
4451out_free_tracing_cpumask:
4452 free_cpumask_var(tracing_cpumask); 4552 free_cpumask_var(tracing_cpumask);
4453out_free_buffer_mask: 4553out_free_buffer_mask:
4454 free_cpumask_var(tracing_buffer_mask); 4554 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 405cb850b75d..2825ef2c0b15 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -98,9 +100,32 @@ struct syscall_trace_enter {
98struct syscall_trace_exit { 100struct syscall_trace_exit {
99 struct trace_entry ent; 101 struct trace_entry ent;
100 int nr; 102 int nr;
101 unsigned long ret; 103 long ret;
102}; 104};
103 105
106struct kprobe_trace_entry {
107 struct trace_entry ent;
108 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111};
112
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent;
119 unsigned long func;
120 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123};
124
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
104/* 129/*
105 * trace_flag_type is an enumeration that holds different 130 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 131 * states when a trace occurs. These are:
@@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void);
209 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
211 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
212 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
213 } while (0) 239 } while (0)
214 240
@@ -246,6 +272,7 @@ struct tracer_flags {
246 * @pipe_open: called when the trace_pipe file is opened 272 * @pipe_open: called when the trace_pipe file is opened
247 * @wait_pipe: override how the user waits for traces on trace_pipe 273 * @wait_pipe: override how the user waits for traces on trace_pipe
248 * @close: called when the trace file is released 274 * @close: called when the trace file is released
275 * @pipe_close: called when the trace_pipe file is released
249 * @read: override the default read callback on trace_pipe 276 * @read: override the default read callback on trace_pipe
250 * @splice_read: override the default splice_read callback on trace_pipe 277 * @splice_read: override the default splice_read callback on trace_pipe
251 * @selftest: selftest to run on boot (see trace_selftest.c) 278 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -264,6 +291,7 @@ struct tracer {
264 void (*pipe_open)(struct trace_iterator *iter); 291 void (*pipe_open)(struct trace_iterator *iter);
265 void (*wait_pipe)(struct trace_iterator *iter); 292 void (*wait_pipe)(struct trace_iterator *iter);
266 void (*close)(struct trace_iterator *iter); 293 void (*close)(struct trace_iterator *iter);
294 void (*pipe_close)(struct trace_iterator *iter);
267 ssize_t (*read)(struct trace_iterator *iter, 295 ssize_t (*read)(struct trace_iterator *iter,
268 struct file *filp, char __user *ubuf, 296 struct file *filp, char __user *ubuf,
269 size_t cnt, loff_t *ppos); 297 size_t cnt, loff_t *ppos);
@@ -364,11 +392,14 @@ int register_tracer(struct tracer *type);
364void unregister_tracer(struct tracer *type); 392void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void); 393int is_tracing_stopped(void);
366 394
395extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396
367extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
368 398
399extern unsigned long tracing_thresh;
400
369#ifdef CONFIG_TRACER_MAX_TRACE 401#ifdef CONFIG_TRACER_MAX_TRACE
370extern unsigned long tracing_max_latency; 402extern unsigned long tracing_max_latency;
371extern unsigned long tracing_thresh;
372 403
373void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 404void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
374void update_max_tr_single(struct trace_array *tr, 405void update_max_tr_single(struct trace_array *tr,
@@ -413,7 +444,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
413 444
414extern int ring_buffer_expanded; 445extern int ring_buffer_expanded;
415extern bool tracing_selftest_disabled; 446extern bool tracing_selftest_disabled;
416DECLARE_PER_CPU(local_t, ftrace_cpu_disabled); 447DECLARE_PER_CPU(int, ftrace_cpu_disabled);
417 448
418#ifdef CONFIG_FTRACE_STARTUP_TEST 449#ifdef CONFIG_FTRACE_STARTUP_TEST
419extern int trace_selftest_startup_function(struct tracer *trace, 450extern int trace_selftest_startup_function(struct tracer *trace,
@@ -438,6 +469,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
438 struct trace_array *tr); 469 struct trace_array *tr);
439extern int trace_selftest_startup_hw_branches(struct tracer *trace, 470extern int trace_selftest_startup_hw_branches(struct tracer *trace,
440 struct trace_array *tr); 471 struct trace_array *tr);
472extern int trace_selftest_startup_ksym(struct tracer *trace,
473 struct trace_array *tr);
441#endif /* CONFIG_FTRACE_STARTUP_TEST */ 474#endif /* CONFIG_FTRACE_STARTUP_TEST */
442 475
443extern void *head_page(struct trace_array_cpu *data); 476extern void *head_page(struct trace_array_cpu *data);
@@ -465,6 +498,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
465#ifdef CONFIG_DYNAMIC_FTRACE 498#ifdef CONFIG_DYNAMIC_FTRACE
466/* TODO: make this variable */ 499/* TODO: make this variable */
467#define FTRACE_GRAPH_MAX_FUNCS 32 500#define FTRACE_GRAPH_MAX_FUNCS 32
501extern int ftrace_graph_filter_enabled;
468extern int ftrace_graph_count; 502extern int ftrace_graph_count;
469extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 503extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
470 504
@@ -472,7 +506,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
472{ 506{
473 int i; 507 int i;
474 508
475 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 509 if (!ftrace_graph_filter_enabled)
476 return 1; 510 return 1;
477 511
478 for (i = 0; i < ftrace_graph_count; i++) { 512 for (i = 0; i < ftrace_graph_count; i++) {
@@ -483,10 +517,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
483 return 0; 517 return 0;
484} 518}
485#else 519#else
486static inline int ftrace_trace_addr(unsigned long addr)
487{
488 return 1;
489}
490static inline int ftrace_graph_addr(unsigned long addr) 520static inline int ftrace_graph_addr(unsigned long addr)
491{ 521{
492 return 1; 522 return 1;
@@ -500,12 +530,12 @@ print_graph_function(struct trace_iterator *iter)
500} 530}
501#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 531#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
502 532
503extern struct pid *ftrace_pid_trace; 533extern struct list_head ftrace_pids;
504 534
505#ifdef CONFIG_FUNCTION_TRACER 535#ifdef CONFIG_FUNCTION_TRACER
506static inline int ftrace_trace_task(struct task_struct *task) 536static inline int ftrace_trace_task(struct task_struct *task)
507{ 537{
508 if (!ftrace_pid_trace) 538 if (list_empty(&ftrace_pids))
509 return 1; 539 return 1;
510 540
511 return test_tsk_trace_trace(task); 541 return test_tsk_trace_trace(task);
@@ -521,7 +551,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
521 * struct trace_parser - servers for reading the user input separated by spaces 551 * struct trace_parser - servers for reading the user input separated by spaces
522 * @cont: set if the input is not complete - no final space char was found 552 * @cont: set if the input is not complete - no final space char was found
523 * @buffer: holds the parsed user input 553 * @buffer: holds the parsed user input
524 * @idx: user input lenght 554 * @idx: user input length
525 * @size: buffer size 555 * @size: buffer size
526 */ 556 */
527struct trace_parser { 557struct trace_parser {
@@ -569,18 +599,17 @@ enum trace_iterator_flags {
569 TRACE_ITER_BIN = 0x40, 599 TRACE_ITER_BIN = 0x40,
570 TRACE_ITER_BLOCK = 0x80, 600 TRACE_ITER_BLOCK = 0x80,
571 TRACE_ITER_STACKTRACE = 0x100, 601 TRACE_ITER_STACKTRACE = 0x100,
572 TRACE_ITER_SCHED_TREE = 0x200, 602 TRACE_ITER_PRINTK = 0x200,
573 TRACE_ITER_PRINTK = 0x400, 603 TRACE_ITER_PREEMPTONLY = 0x400,
574 TRACE_ITER_PREEMPTONLY = 0x800, 604 TRACE_ITER_BRANCH = 0x800,
575 TRACE_ITER_BRANCH = 0x1000, 605 TRACE_ITER_ANNOTATE = 0x1000,
576 TRACE_ITER_ANNOTATE = 0x2000, 606 TRACE_ITER_USERSTACKTRACE = 0x2000,
577 TRACE_ITER_USERSTACKTRACE = 0x4000, 607 TRACE_ITER_SYM_USEROBJ = 0x4000,
578 TRACE_ITER_SYM_USEROBJ = 0x8000, 608 TRACE_ITER_PRINTK_MSGONLY = 0x8000,
579 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 609 TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
580 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 610 TRACE_ITER_LATENCY_FMT = 0x20000,
581 TRACE_ITER_LATENCY_FMT = 0x40000, 611 TRACE_ITER_SLEEP_TIME = 0x40000,
582 TRACE_ITER_SLEEP_TIME = 0x80000, 612 TRACE_ITER_GRAPH_TIME = 0x80000,
583 TRACE_ITER_GRAPH_TIME = 0x100000,
584}; 613};
585 614
586/* 615/*
@@ -687,7 +716,6 @@ struct event_filter {
687 int n_preds; 716 int n_preds;
688 struct filter_pred **preds; 717 struct filter_pred **preds;
689 char *filter_string; 718 char *filter_string;
690 bool no_reset;
691}; 719};
692 720
693struct event_subsystem { 721struct event_subsystem {
@@ -699,22 +727,40 @@ struct event_subsystem {
699}; 727};
700 728
701struct filter_pred; 729struct filter_pred;
730struct regex;
702 731
703typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 732typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
704 int val1, int val2); 733 int val1, int val2);
705 734
735typedef int (*regex_match_func)(char *str, struct regex *r, int len);
736
737enum regex_type {
738 MATCH_FULL = 0,
739 MATCH_FRONT_ONLY,
740 MATCH_MIDDLE_ONLY,
741 MATCH_END_ONLY,
742};
743
744struct regex {
745 char pattern[MAX_FILTER_STR_VAL];
746 int len;
747 int field_len;
748 regex_match_func match;
749};
750
706struct filter_pred { 751struct filter_pred {
707 filter_pred_fn_t fn; 752 filter_pred_fn_t fn;
708 u64 val; 753 u64 val;
709 char str_val[MAX_FILTER_STR_VAL]; 754 struct regex regex;
710 int str_len; 755 char *field_name;
711 char *field_name; 756 int offset;
712 int offset; 757 int not;
713 int not; 758 int op;
714 int op; 759 int pop_n;
715 int pop_n;
716}; 760};
717 761
762extern enum regex_type
763filter_parse_regex(char *buff, int len, char **search, int *not);
718extern void print_event_filter(struct ftrace_event_call *call, 764extern void print_event_filter(struct ftrace_event_call *call,
719 struct trace_seq *s); 765 struct trace_seq *s);
720extern int apply_event_filter(struct ftrace_event_call *call, 766extern int apply_event_filter(struct ftrace_event_call *call,
@@ -730,7 +776,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
730 struct ring_buffer *buffer, 776 struct ring_buffer *buffer,
731 struct ring_buffer_event *event) 777 struct ring_buffer_event *event)
732{ 778{
733 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { 779 if (unlikely(call->filter_active) &&
780 !filter_match_preds(call->filter, rec)) {
734 ring_buffer_discard_commit(buffer, event); 781 ring_buffer_discard_commit(buffer, event);
735 return 1; 782 return 1;
736 } 783 }
@@ -746,7 +793,8 @@ extern const char *__stop___trace_bprintk_fmt[];
746 793
747#undef FTRACE_ENTRY 794#undef FTRACE_ENTRY
748#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 795#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
749 extern struct ftrace_event_call event_##call; 796 extern struct ftrace_event_call \
797 __attribute__((__aligned__(4))) event_##call;
750#undef FTRACE_ENTRY_DUP 798#undef FTRACE_ENTRY_DUP
751#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 799#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
752 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 800 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..b9bc4d470177 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
307 return -1; 307 return -1;
308 if (percent_a > percent_b) 308 if (percent_a > percent_b)
309 return 1; 309 return 1;
310 else 310
311 return 0; 311 if (a->incorrect < b->incorrect)
312 return -1;
313 if (a->incorrect > b->incorrect)
314 return 1;
315
316 /*
317 * Since the above shows worse (incorrect) cases
318 * first, we continue that by showing best (correct)
319 * cases last.
320 */
321 if (a->correct > b->correct)
322 return -1;
323 if (a->correct < b->correct)
324 return 1;
325
326 return 0;
312} 327}
313 328
314static struct tracer_stat annotated_branch_stats = { 329static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 20c5f92e28a8..9d589d8dcd1a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
@@ -20,6 +21,8 @@
20#include <linux/ktime.h> 21#include <linux/ktime.h>
21#include <linux/trace_clock.h> 22#include <linux/trace_clock.h>
22 23
24#include "trace.h"
25
23/* 26/*
24 * trace_clock_local(): the simplest and least coherent tracing clock. 27 * trace_clock_local(): the simplest and least coherent tracing clock.
25 * 28 *
@@ -28,17 +31,17 @@
28 */ 31 */
29u64 notrace trace_clock_local(void) 32u64 notrace trace_clock_local(void)
30{ 33{
31 unsigned long flags;
32 u64 clock; 34 u64 clock;
35 int resched;
33 36
34 /* 37 /*
35 * sched_clock() is an architecture implemented, fast, scalable, 38 * sched_clock() is an architecture implemented, fast, scalable,
36 * lockless clock. It is not guaranteed to be coherent across 39 * lockless clock. It is not guaranteed to be coherent across
37 * CPUs, nor across CPU idle events. 40 * CPUs, nor across CPU idle events.
38 */ 41 */
39 raw_local_irq_save(flags); 42 resched = ftrace_preempt_disable();
40 clock = sched_clock(); 43 clock = sched_clock();
41 raw_local_irq_restore(flags); 44 ftrace_preempt_enable(resched);
42 45
43 return clock; 46 return clock;
44} 47}
@@ -69,10 +72,10 @@ u64 notrace trace_clock(void)
69/* keep prev_time and lock in the same cacheline. */ 72/* keep prev_time and lock in the same cacheline. */
70static struct { 73static struct {
71 u64 prev_time; 74 u64 prev_time;
72 raw_spinlock_t lock; 75 arch_spinlock_t lock;
73} trace_clock_struct ____cacheline_aligned_in_smp = 76} trace_clock_struct ____cacheline_aligned_in_smp =
74 { 77 {
75 .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, 78 .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
76 }; 79 };
77 80
78u64 notrace trace_clock_global(void) 81u64 notrace trace_clock_global(void)
@@ -81,7 +84,7 @@ u64 notrace trace_clock_global(void)
81 int this_cpu; 84 int this_cpu;
82 u64 now; 85 u64 now;
83 86
84 raw_local_irq_save(flags); 87 local_irq_save(flags);
85 88
86 this_cpu = raw_smp_processor_id(); 89 this_cpu = raw_smp_processor_id();
87 now = cpu_clock(this_cpu); 90 now = cpu_clock(this_cpu);
@@ -92,7 +95,7 @@ u64 notrace trace_clock_global(void)
92 if (unlikely(in_nmi())) 95 if (unlikely(in_nmi()))
93 goto out; 96 goto out;
94 97
95 __raw_spin_lock(&trace_clock_struct.lock); 98 arch_spin_lock(&trace_clock_struct.lock);
96 99
97 /* 100 /*
98 * TODO: if this happens often then maybe we should reset 101 * TODO: if this happens often then maybe we should reset
@@ -104,10 +107,10 @@ u64 notrace trace_clock_global(void)
104 107
105 trace_clock_struct.prev_time = now; 108 trace_clock_struct.prev_time = now;
106 109
107 __raw_spin_unlock(&trace_clock_struct.lock); 110 arch_spin_unlock(&trace_clock_struct.lock);
108 111
109 out: 112 out:
110 raw_local_irq_restore(flags); 113 local_irq_restore(flags);
111 114
112 return now; 115 return now;
113} 116}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
new file mode 100644
index 000000000000..0565bb42566f
--- /dev/null
+++ b/kernel/trace/trace_event_perf.c
@@ -0,0 +1,175 @@
1/*
2 * trace event based perf event profiling/tracing
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */
7
8#include <linux/module.h>
9#include <linux/kprobes.h>
10#include "trace.h"
11
12DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
13EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
14
15EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
16
17static char *perf_trace_buf;
18static char *perf_trace_buf_nmi;
19
20/*
21 * Force it to be aligned to unsigned long to avoid misaligned accesses
22 * suprises
23 */
24typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
25 perf_trace_t;
26
27/* Count the events in use (per event id, not per instance) */
28static int total_ref_count;
29
30static int perf_trace_event_enable(struct ftrace_event_call *event)
31{
32 char *buf;
33 int ret = -ENOMEM;
34
35 if (event->perf_refcount++ > 0)
36 return 0;
37
38 if (!total_ref_count) {
39 buf = (char *)alloc_percpu(perf_trace_t);
40 if (!buf)
41 goto fail_buf;
42
43 rcu_assign_pointer(perf_trace_buf, buf);
44
45 buf = (char *)alloc_percpu(perf_trace_t);
46 if (!buf)
47 goto fail_buf_nmi;
48
49 rcu_assign_pointer(perf_trace_buf_nmi, buf);
50 }
51
52 ret = event->perf_event_enable(event);
53 if (!ret) {
54 total_ref_count++;
55 return 0;
56 }
57
58fail_buf_nmi:
59 if (!total_ref_count) {
60 free_percpu(perf_trace_buf_nmi);
61 free_percpu(perf_trace_buf);
62 perf_trace_buf_nmi = NULL;
63 perf_trace_buf = NULL;
64 }
65fail_buf:
66 event->perf_refcount--;
67
68 return ret;
69}
70
71int perf_trace_enable(int event_id)
72{
73 struct ftrace_event_call *event;
74 int ret = -EINVAL;
75
76 mutex_lock(&event_mutex);
77 list_for_each_entry(event, &ftrace_events, list) {
78 if (event->id == event_id && event->perf_event_enable &&
79 try_module_get(event->mod)) {
80 ret = perf_trace_event_enable(event);
81 break;
82 }
83 }
84 mutex_unlock(&event_mutex);
85
86 return ret;
87}
88
89static void perf_trace_event_disable(struct ftrace_event_call *event)
90{
91 char *buf, *nmi_buf;
92
93 if (--event->perf_refcount > 0)
94 return;
95
96 event->perf_event_disable(event);
97
98 if (!--total_ref_count) {
99 buf = perf_trace_buf;
100 rcu_assign_pointer(perf_trace_buf, NULL);
101
102 nmi_buf = perf_trace_buf_nmi;
103 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
104
105 /*
106 * Ensure every events in profiling have finished before
107 * releasing the buffers
108 */
109 synchronize_sched();
110
111 free_percpu(buf);
112 free_percpu(nmi_buf);
113 }
114}
115
116void perf_trace_disable(int event_id)
117{
118 struct ftrace_event_call *event;
119
120 mutex_lock(&event_mutex);
121 list_for_each_entry(event, &ftrace_events, list) {
122 if (event->id == event_id) {
123 perf_trace_event_disable(event);
124 module_put(event->mod);
125 break;
126 }
127 }
128 mutex_unlock(&event_mutex);
129}
130
131__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
132 int *rctxp, unsigned long *irq_flags)
133{
134 struct trace_entry *entry;
135 char *trace_buf, *raw_data;
136 int pc, cpu;
137
138 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
139
140 pc = preempt_count();
141
142 /* Protect the per cpu buffer, begin the rcu read side */
143 local_irq_save(*irq_flags);
144
145 *rctxp = perf_swevent_get_recursion_context();
146 if (*rctxp < 0)
147 goto err_recursion;
148
149 cpu = smp_processor_id();
150
151 if (in_nmi())
152 trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
153 else
154 trace_buf = rcu_dereference_sched(perf_trace_buf);
155
156 if (!trace_buf)
157 goto err;
158
159 raw_data = per_cpu_ptr(trace_buf, cpu);
160
161 /* zero the dead bytes from align to not leak stack to user */
162 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
163
164 entry = (struct trace_entry *)raw_data;
165 tracing_generic_entry_update(entry, *irq_flags, pc);
166 entry->type = type;
167
168 return raw_data;
169err:
170 perf_swevent_put_recursion_context(*rctxp);
171err_recursion:
172 local_irq_restore(*irq_flags);
173 return NULL;
174}
175EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
deleted file mode 100644
index 8d5c171cc998..000000000000
--- a/kernel/trace/trace_event_profile.c
+++ /dev/null
@@ -1,125 +0,0 @@
1/*
2 * trace event based perf counter profiling
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include "trace.h"
10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16
17char *trace_profile_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf);
19
20char *trace_profile_buf_nmi;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22
23/* Count the events in use (per event id, not per instance) */
24static int total_profile_count;
25
26static int ftrace_profile_enable_event(struct ftrace_event_call *event)
27{
28 char *buf;
29 int ret = -ENOMEM;
30
31 if (atomic_inc_return(&event->profile_count))
32 return 0;
33
34 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t);
36 if (!buf)
37 goto fail_buf;
38
39 rcu_assign_pointer(trace_profile_buf, buf);
40
41 buf = (char *)alloc_percpu(profile_buf_t);
42 if (!buf)
43 goto fail_buf_nmi;
44
45 rcu_assign_pointer(trace_profile_buf_nmi, buf);
46 }
47
48 ret = event->profile_enable();
49 if (!ret) {
50 total_profile_count++;
51 return 0;
52 }
53
54fail_buf_nmi:
55 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi);
57 free_percpu(trace_profile_buf);
58 trace_profile_buf_nmi = NULL;
59 trace_profile_buf = NULL;
60 }
61fail_buf:
62 atomic_dec(&event->profile_count);
63
64 return ret;
65}
66
67int ftrace_profile_enable(int event_id)
68{
69 struct ftrace_event_call *event;
70 int ret = -EINVAL;
71
72 mutex_lock(&event_mutex);
73 list_for_each_entry(event, &ftrace_events, list) {
74 if (event->id == event_id && event->profile_enable &&
75 try_module_get(event->mod)) {
76 ret = ftrace_profile_enable_event(event);
77 break;
78 }
79 }
80 mutex_unlock(&event_mutex);
81
82 return ret;
83}
84
85static void ftrace_profile_disable_event(struct ftrace_event_call *event)
86{
87 char *buf, *nmi_buf;
88
89 if (!atomic_add_negative(-1, &event->profile_count))
90 return;
91
92 event->profile_disable();
93
94 if (!--total_profile_count) {
95 buf = trace_profile_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL);
97
98 nmi_buf = trace_profile_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL);
100
101 /*
102 * Ensure every events in profiling have finished before
103 * releasing the buffers
104 */
105 synchronize_sched();
106
107 free_percpu(buf);
108 free_percpu(nmi_buf);
109 }
110}
111
112void ftrace_profile_disable(int event_id)
113{
114 struct ftrace_event_call *event;
115
116 mutex_lock(&event_mutex);
117 list_for_each_entry(event, &ftrace_events, list) {
118 if (event->id == event_id) {
119 ftrace_profile_disable_event(event);
120 module_put(event->mod);
121 break;
122 }
123 }
124 mutex_unlock(&event_mutex);
125}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d128f65778e6..c697c7043349 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/slab.h>
18#include <linux/delay.h> 19#include <linux/delay.h>
19 20
20#include <asm/setup.h> 21#include <asm/setup.h>
@@ -60,10 +61,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
60 return 0; 61 return 0;
61 62
62err: 63err:
63 if (field) { 64 if (field)
64 kfree(field->name); 65 kfree(field->name);
65 kfree(field->type);
66 }
67 kfree(field); 66 kfree(field);
68 67
69 return -ENOMEM; 68 return -ENOMEM;
@@ -78,7 +77,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
78 if (ret) \ 77 if (ret) \
79 return ret; 78 return ret;
80 79
81int trace_define_common_fields(struct ftrace_event_call *call) 80static int trace_define_common_fields(struct ftrace_event_call *call)
82{ 81{
83 int ret; 82 int ret;
84 struct trace_entry ent; 83 struct trace_entry ent;
@@ -91,11 +90,8 @@ int trace_define_common_fields(struct ftrace_event_call *call)
91 90
92 return ret; 91 return ret;
93} 92}
94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95
96#ifdef CONFIG_MODULES
97 93
98static void trace_destroy_fields(struct ftrace_event_call *call) 94void trace_destroy_fields(struct ftrace_event_call *call)
99{ 95{
100 struct ftrace_event_field *field, *next; 96 struct ftrace_event_field *field, *next;
101 97
@@ -107,27 +103,49 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 103 }
108} 104}
109 105
110#endif /* CONFIG_MODULES */ 106int trace_event_raw_init(struct ftrace_event_call *call)
107{
108 int id;
111 109
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 110 id = register_ftrace_event(call->event);
111 if (!id)
112 return -ENODEV;
113 call->id = id;
114 INIT_LIST_HEAD(&call->fields);
115
116 return 0;
117}
118EXPORT_SYMBOL_GPL(trace_event_raw_init);
119
120static int ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 121 int enable)
114{ 122{
123 int ret = 0;
124
115 switch (enable) { 125 switch (enable) {
116 case 0: 126 case 0:
117 if (call->enabled) { 127 if (call->enabled) {
118 call->enabled = 0; 128 call->enabled = 0;
119 tracing_stop_cmdline_record(); 129 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 130 call->unregfunc(call);
121 } 131 }
122 break; 132 break;
123 case 1: 133 case 1:
124 if (!call->enabled) { 134 if (!call->enabled) {
125 call->enabled = 1;
126 tracing_start_cmdline_record(); 135 tracing_start_cmdline_record();
127 call->regfunc(call->data); 136 ret = call->regfunc(call);
137 if (ret) {
138 tracing_stop_cmdline_record();
139 pr_info("event trace: Could not enable event "
140 "%s\n", call->name);
141 break;
142 }
143 call->enabled = 1;
128 } 144 }
129 break; 145 break;
130 } 146 }
147
148 return ret;
131} 149}
132 150
133static void ftrace_clear_events(void) 151static void ftrace_clear_events(void)
@@ -406,7 +424,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
406 case 0: 424 case 0:
407 case 1: 425 case 1:
408 mutex_lock(&event_mutex); 426 mutex_lock(&event_mutex);
409 ftrace_event_enable_disable(call, val); 427 ret = ftrace_event_enable_disable(call, val);
410 mutex_unlock(&event_mutex); 428 mutex_unlock(&event_mutex);
411 break; 429 break;
412 430
@@ -416,7 +434,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
416 434
417 *ppos += cnt; 435 *ppos += cnt;
418 436
419 return cnt; 437 return ret ? ret : cnt;
420} 438}
421 439
422static ssize_t 440static ssize_t
@@ -501,41 +519,16 @@ out:
501 return ret; 519 return ret;
502} 520}
503 521
504extern char *__bad_type_size(void);
505
506#undef FIELD
507#define FIELD(type, name) \
508 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
509 #type, "common_" #name, offsetof(typeof(field), name), \
510 sizeof(field.name)
511
512static int trace_write_header(struct trace_seq *s)
513{
514 struct trace_entry field;
515
516 /* struct trace_entry */
517 return trace_seq_printf(s,
518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
519 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
520 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
521 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
522 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
523 "\n",
524 FIELD(unsigned short, type),
525 FIELD(unsigned char, flags),
526 FIELD(unsigned char, preempt_count),
527 FIELD(int, pid),
528 FIELD(int, lock_depth));
529}
530
531static ssize_t 522static ssize_t
532event_format_read(struct file *filp, char __user *ubuf, size_t cnt, 523event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
533 loff_t *ppos) 524 loff_t *ppos)
534{ 525{
535 struct ftrace_event_call *call = filp->private_data; 526 struct ftrace_event_call *call = filp->private_data;
527 struct ftrace_event_field *field;
536 struct trace_seq *s; 528 struct trace_seq *s;
529 int common_field_count = 5;
537 char *buf; 530 char *buf;
538 int r; 531 int r = 0;
539 532
540 if (*ppos) 533 if (*ppos)
541 return 0; 534 return 0;
@@ -546,14 +539,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
546 539
547 trace_seq_init(s); 540 trace_seq_init(s);
548 541
549 /* If any of the first writes fail, so will the show_format. */
550
551 trace_seq_printf(s, "name: %s\n", call->name); 542 trace_seq_printf(s, "name: %s\n", call->name);
552 trace_seq_printf(s, "ID: %d\n", call->id); 543 trace_seq_printf(s, "ID: %d\n", call->id);
553 trace_seq_printf(s, "format:\n"); 544 trace_seq_printf(s, "format:\n");
554 trace_write_header(s);
555 545
556 r = call->show_format(call, s); 546 list_for_each_entry_reverse(field, &call->fields, link) {
547 /*
548 * Smartly shows the array type(except dynamic array).
549 * Normal:
550 * field:TYPE VAR
551 * If TYPE := TYPE[LEN], it is shown:
552 * field:TYPE VAR[LEN]
553 */
554 const char *array_descriptor = strchr(field->type, '[');
555
556 if (!strncmp(field->type, "__data_loc", 10))
557 array_descriptor = NULL;
558
559 if (!array_descriptor) {
560 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
561 "\tsize:%u;\tsigned:%d;\n",
562 field->type, field->name, field->offset,
563 field->size, !!field->is_signed);
564 } else {
565 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
566 "\tsize:%u;\tsigned:%d;\n",
567 (int)(array_descriptor - field->type),
568 field->type, field->name,
569 array_descriptor, field->offset,
570 field->size, !!field->is_signed);
571 }
572
573 if (--common_field_count == 0)
574 r = trace_seq_printf(s, "\n");
575
576 if (!r)
577 break;
578 }
579
580 if (r)
581 r = trace_seq_printf(s, "\nprint fmt: %s\n",
582 call->print_fmt);
583
557 if (!r) { 584 if (!r) {
558 /* 585 /*
559 * ug! The format output is bigger than a PAGE!! 586 * ug! The format output is bigger than a PAGE!!
@@ -878,9 +905,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
878 "'%s/filter' entry\n", name); 905 "'%s/filter' entry\n", name);
879 } 906 }
880 907
881 entry = trace_create_file("enable", 0644, system->entry, 908 trace_create_file("enable", 0644, system->entry,
882 (void *)system->name, 909 (void *)system->name,
883 &ftrace_system_enable_fops); 910 &ftrace_system_enable_fops);
884 911
885 return system->entry; 912 return system->entry;
886} 913}
@@ -892,7 +919,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
892 const struct file_operations *filter, 919 const struct file_operations *filter,
893 const struct file_operations *format) 920 const struct file_operations *format)
894{ 921{
895 struct dentry *entry;
896 int ret; 922 int ret;
897 923
898 /* 924 /*
@@ -910,55 +936,72 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
910 } 936 }
911 937
912 if (call->regfunc) 938 if (call->regfunc)
913 entry = trace_create_file("enable", 0644, call->dir, call, 939 trace_create_file("enable", 0644, call->dir, call,
914 enable); 940 enable);
915 941
916 if (call->id && call->profile_enable) 942 if (call->id && call->perf_event_enable)
917 entry = trace_create_file("id", 0444, call->dir, call, 943 trace_create_file("id", 0444, call->dir, call,
918 id); 944 id);
919 945
920 if (call->define_fields) { 946 if (call->define_fields) {
921 ret = call->define_fields(call); 947 ret = trace_define_common_fields(call);
948 if (!ret)
949 ret = call->define_fields(call);
922 if (ret < 0) { 950 if (ret < 0) {
923 pr_warning("Could not initialize trace point" 951 pr_warning("Could not initialize trace point"
924 " events/%s\n", call->name); 952 " events/%s\n", call->name);
925 return ret; 953 return ret;
926 } 954 }
927 entry = trace_create_file("filter", 0644, call->dir, call, 955 trace_create_file("filter", 0644, call->dir, call,
928 filter); 956 filter);
929 } 957 }
930 958
931 /* A trace may not want to export its format */ 959 trace_create_file("format", 0444, call->dir, call,
932 if (!call->show_format) 960 format);
933 return 0;
934
935 entry = trace_create_file("format", 0444, call->dir, call,
936 format);
937 961
938 return 0; 962 return 0;
939} 963}
940 964
941#define for_each_event(event, start, end) \ 965static int __trace_add_event_call(struct ftrace_event_call *call)
942 for (event = start; \ 966{
943 (unsigned long)event < (unsigned long)end; \ 967 struct dentry *d_events;
944 event++) 968 int ret;
945 969
946#ifdef CONFIG_MODULES 970 if (!call->name)
971 return -EINVAL;
947 972
948static LIST_HEAD(ftrace_module_file_list); 973 if (call->raw_init) {
974 ret = call->raw_init(call);
975 if (ret < 0) {
976 if (ret != -ENOSYS)
977 pr_warning("Could not initialize trace "
978 "events/%s\n", call->name);
979 return ret;
980 }
981 }
949 982
950/* 983 d_events = event_trace_events_dir();
951 * Modules must own their file_operations to keep up with 984 if (!d_events)
952 * reference counting. 985 return -ENOENT;
953 */ 986
954struct ftrace_module_file_ops { 987 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
955 struct list_head list; 988 &ftrace_enable_fops, &ftrace_event_filter_fops,
956 struct module *mod; 989 &ftrace_event_format_fops);
957 struct file_operations id; 990 if (!ret)
958 struct file_operations enable; 991 list_add(&call->list, &ftrace_events);
959 struct file_operations format; 992
960 struct file_operations filter; 993 return ret;
961}; 994}
995
996/* Add an additional event_call dynamically */
997int trace_add_event_call(struct ftrace_event_call *call)
998{
999 int ret;
1000 mutex_lock(&event_mutex);
1001 ret = __trace_add_event_call(call);
1002 mutex_unlock(&event_mutex);
1003 return ret;
1004}
962 1005
963static void remove_subsystem_dir(const char *name) 1006static void remove_subsystem_dir(const char *name)
964{ 1007{
@@ -986,6 +1029,53 @@ static void remove_subsystem_dir(const char *name)
986 } 1029 }
987} 1030}
988 1031
1032/*
1033 * Must be called under locking both of event_mutex and trace_event_mutex.
1034 */
1035static void __trace_remove_event_call(struct ftrace_event_call *call)
1036{
1037 ftrace_event_enable_disable(call, 0);
1038 if (call->event)
1039 __unregister_ftrace_event(call->event);
1040 debugfs_remove_recursive(call->dir);
1041 list_del(&call->list);
1042 trace_destroy_fields(call);
1043 destroy_preds(call);
1044 remove_subsystem_dir(call->system);
1045}
1046
1047/* Remove an event_call */
1048void trace_remove_event_call(struct ftrace_event_call *call)
1049{
1050 mutex_lock(&event_mutex);
1051 down_write(&trace_event_mutex);
1052 __trace_remove_event_call(call);
1053 up_write(&trace_event_mutex);
1054 mutex_unlock(&event_mutex);
1055}
1056
1057#define for_each_event(event, start, end) \
1058 for (event = start; \
1059 (unsigned long)event < (unsigned long)end; \
1060 event++)
1061
1062#ifdef CONFIG_MODULES
1063
1064static LIST_HEAD(ftrace_module_file_list);
1065
1066/*
1067 * Modules must own their file_operations to keep up with
1068 * reference counting.
1069 */
1070struct ftrace_module_file_ops {
1071 struct list_head list;
1072 struct module *mod;
1073 struct file_operations id;
1074 struct file_operations enable;
1075 struct file_operations format;
1076 struct file_operations filter;
1077};
1078
989static struct ftrace_module_file_ops * 1079static struct ftrace_module_file_ops *
990trace_create_file_ops(struct module *mod) 1080trace_create_file_ops(struct module *mod)
991{ 1081{
@@ -1043,7 +1133,7 @@ static void trace_module_add_events(struct module *mod)
1043 if (!call->name) 1133 if (!call->name)
1044 continue; 1134 continue;
1045 if (call->raw_init) { 1135 if (call->raw_init) {
1046 ret = call->raw_init(); 1136 ret = call->raw_init(call);
1047 if (ret < 0) { 1137 if (ret < 0) {
1048 if (ret != -ENOSYS) 1138 if (ret != -ENOSYS)
1049 pr_warning("Could not initialize trace " 1139 pr_warning("Could not initialize trace "
@@ -1061,10 +1151,11 @@ static void trace_module_add_events(struct module *mod)
1061 return; 1151 return;
1062 } 1152 }
1063 call->mod = mod; 1153 call->mod = mod;
1064 list_add(&call->list, &ftrace_events); 1154 ret = event_create_dir(call, d_events,
1065 event_create_dir(call, d_events, 1155 &file_ops->id, &file_ops->enable,
1066 &file_ops->id, &file_ops->enable, 1156 &file_ops->filter, &file_ops->format);
1067 &file_ops->filter, &file_ops->format); 1157 if (!ret)
1158 list_add(&call->list, &ftrace_events);
1068 } 1159 }
1069} 1160}
1070 1161
@@ -1078,14 +1169,7 @@ static void trace_module_remove_events(struct module *mod)
1078 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1169 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1079 if (call->mod == mod) { 1170 if (call->mod == mod) {
1080 found = true; 1171 found = true;
1081 ftrace_event_enable_disable(call, 0); 1172 __trace_remove_event_call(call);
1082 if (call->event)
1083 __unregister_ftrace_event(call->event);
1084 debugfs_remove_recursive(call->dir);
1085 list_del(&call->list);
1086 trace_destroy_fields(call);
1087 destroy_preds(call);
1088 remove_subsystem_dir(call->system);
1089 } 1173 }
1090 } 1174 }
1091 1175
@@ -1203,7 +1287,7 @@ static __init int event_trace_init(void)
1203 if (!call->name) 1287 if (!call->name)
1204 continue; 1288 continue;
1205 if (call->raw_init) { 1289 if (call->raw_init) {
1206 ret = call->raw_init(); 1290 ret = call->raw_init(call);
1207 if (ret < 0) { 1291 if (ret < 0) {
1208 if (ret != -ENOSYS) 1292 if (ret != -ENOSYS)
1209 pr_warning("Could not initialize trace " 1293 pr_warning("Could not initialize trace "
@@ -1211,10 +1295,12 @@ static __init int event_trace_init(void)
1211 continue; 1295 continue;
1212 } 1296 }
1213 } 1297 }
1214 list_add(&call->list, &ftrace_events); 1298 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1215 event_create_dir(call, d_events, &ftrace_event_id_fops, 1299 &ftrace_enable_fops,
1216 &ftrace_enable_fops, &ftrace_event_filter_fops, 1300 &ftrace_event_filter_fops,
1217 &ftrace_event_format_fops); 1301 &ftrace_event_format_fops);
1302 if (!ret)
1303 list_add(&call->list, &ftrace_events);
1218 } 1304 }
1219 1305
1220 while (true) { 1306 while (true) {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 98a6cc5c64ed..88c0b6dbd7fe 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -18,11 +18,11 @@
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */ 19 */
20 20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/ctype.h> 22#include <linux/ctype.h>
25#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h>
25#include <linux/slab.h>
26 26
27#include "trace.h" 27#include "trace.h"
28#include "trace_output.h" 28#include "trace_output.h"
@@ -31,6 +31,7 @@ enum filter_op_ids
31{ 31{
32 OP_OR, 32 OP_OR,
33 OP_AND, 33 OP_AND,
34 OP_GLOB,
34 OP_NE, 35 OP_NE,
35 OP_EQ, 36 OP_EQ,
36 OP_LT, 37 OP_LT,
@@ -48,16 +49,17 @@ struct filter_op {
48}; 49};
49 50
50static struct filter_op filter_ops[] = { 51static struct filter_op filter_ops[] = {
51 { OP_OR, "||", 1 }, 52 { OP_OR, "||", 1 },
52 { OP_AND, "&&", 2 }, 53 { OP_AND, "&&", 2 },
53 { OP_NE, "!=", 4 }, 54 { OP_GLOB, "~", 4 },
54 { OP_EQ, "==", 4 }, 55 { OP_NE, "!=", 4 },
55 { OP_LT, "<", 5 }, 56 { OP_EQ, "==", 4 },
56 { OP_LE, "<=", 5 }, 57 { OP_LT, "<", 5 },
57 { OP_GT, ">", 5 }, 58 { OP_LE, "<=", 5 },
58 { OP_GE, ">=", 5 }, 59 { OP_GT, ">", 5 },
59 { OP_NONE, "OP_NONE", 0 }, 60 { OP_GE, ">=", 5 },
60 { OP_OPEN_PAREN, "(", 0 }, 61 { OP_NONE, "OP_NONE", 0 },
62 { OP_OPEN_PAREN, "(", 0 },
61}; 63};
62 64
63enum { 65enum {
@@ -197,9 +199,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
197 char *addr = (char *)(event + pred->offset); 199 char *addr = (char *)(event + pred->offset);
198 int cmp, match; 200 int cmp, match;
199 201
200 cmp = strncmp(addr, pred->str_val, pred->str_len); 202 cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
201 203
202 match = (!cmp) ^ pred->not; 204 match = cmp ^ pred->not;
203 205
204 return match; 206 return match;
205} 207}
@@ -210,10 +212,11 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
210{ 212{
211 char **addr = (char **)(event + pred->offset); 213 char **addr = (char **)(event + pred->offset);
212 int cmp, match; 214 int cmp, match;
215 int len = strlen(*addr) + 1; /* including tailing '\0' */
213 216
214 cmp = strncmp(*addr, pred->str_val, pred->str_len); 217 cmp = pred->regex.match(*addr, &pred->regex, len);
215 218
216 match = (!cmp) ^ pred->not; 219 match = cmp ^ pred->not;
217 220
218 return match; 221 return match;
219} 222}
@@ -237,9 +240,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
237 char *addr = (char *)(event + str_loc); 240 char *addr = (char *)(event + str_loc);
238 int cmp, match; 241 int cmp, match;
239 242
240 cmp = strncmp(addr, pred->str_val, str_len); 243 cmp = pred->regex.match(addr, &pred->regex, str_len);
241 244
242 match = (!cmp) ^ pred->not; 245 match = cmp ^ pred->not;
243 246
244 return match; 247 return match;
245} 248}
@@ -250,10 +253,133 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
250 return 0; 253 return 0;
251} 254}
252 255
256/*
257 * regex_match_foo - Basic regex callbacks
258 *
259 * @str: the string to be searched
260 * @r: the regex structure containing the pattern string
261 * @len: the length of the string to be searched (including '\0')
262 *
263 * Note:
264 * - @str might not be NULL-terminated if it's of type DYN_STRING
265 * or STATIC_STRING
266 */
267
268static int regex_match_full(char *str, struct regex *r, int len)
269{
270 if (strncmp(str, r->pattern, len) == 0)
271 return 1;
272 return 0;
273}
274
275static int regex_match_front(char *str, struct regex *r, int len)
276{
277 if (strncmp(str, r->pattern, r->len) == 0)
278 return 1;
279 return 0;
280}
281
282static int regex_match_middle(char *str, struct regex *r, int len)
283{
284 if (strnstr(str, r->pattern, len))
285 return 1;
286 return 0;
287}
288
289static int regex_match_end(char *str, struct regex *r, int len)
290{
291 int strlen = len - 1;
292
293 if (strlen >= r->len &&
294 memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
295 return 1;
296 return 0;
297}
298
299/**
300 * filter_parse_regex - parse a basic regex
301 * @buff: the raw regex
302 * @len: length of the regex
303 * @search: will point to the beginning of the string to compare
304 * @not: tell whether the match will have to be inverted
305 *
306 * This passes in a buffer containing a regex and this function will
307 * set search to point to the search part of the buffer and
308 * return the type of search it is (see enum above).
309 * This does modify buff.
310 *
311 * Returns enum type.
312 * search returns the pointer to use for comparison.
313 * not returns 1 if buff started with a '!'
314 * 0 otherwise.
315 */
316enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
317{
318 int type = MATCH_FULL;
319 int i;
320
321 if (buff[0] == '!') {
322 *not = 1;
323 buff++;
324 len--;
325 } else
326 *not = 0;
327
328 *search = buff;
329
330 for (i = 0; i < len; i++) {
331 if (buff[i] == '*') {
332 if (!i) {
333 *search = buff + 1;
334 type = MATCH_END_ONLY;
335 } else {
336 if (type == MATCH_END_ONLY)
337 type = MATCH_MIDDLE_ONLY;
338 else
339 type = MATCH_FRONT_ONLY;
340 buff[i] = 0;
341 break;
342 }
343 }
344 }
345
346 return type;
347}
348
349static void filter_build_regex(struct filter_pred *pred)
350{
351 struct regex *r = &pred->regex;
352 char *search;
353 enum regex_type type = MATCH_FULL;
354 int not = 0;
355
356 if (pred->op == OP_GLOB) {
357 type = filter_parse_regex(r->pattern, r->len, &search, &not);
358 r->len = strlen(search);
359 memmove(r->pattern, search, r->len+1);
360 }
361
362 switch (type) {
363 case MATCH_FULL:
364 r->match = regex_match_full;
365 break;
366 case MATCH_FRONT_ONLY:
367 r->match = regex_match_front;
368 break;
369 case MATCH_MIDDLE_ONLY:
370 r->match = regex_match_middle;
371 break;
372 case MATCH_END_ONLY:
373 r->match = regex_match_end;
374 break;
375 }
376
377 pred->not ^= not;
378}
379
253/* return 1 if event matches, 0 otherwise (discard) */ 380/* return 1 if event matches, 0 otherwise (discard) */
254int filter_match_preds(struct ftrace_event_call *call, void *rec) 381int filter_match_preds(struct event_filter *filter, void *rec)
255{ 382{
256 struct event_filter *filter = call->filter;
257 int match, top = 0, val1 = 0, val2 = 0; 383 int match, top = 0, val1 = 0, val2 = 0;
258 int stack[MAX_FILTER_PRED]; 384 int stack[MAX_FILTER_PRED];
259 struct filter_pred *pred; 385 struct filter_pred *pred;
@@ -396,7 +522,7 @@ static void filter_clear_pred(struct filter_pred *pred)
396{ 522{
397 kfree(pred->field_name); 523 kfree(pred->field_name);
398 pred->field_name = NULL; 524 pred->field_name = NULL;
399 pred->str_len = 0; 525 pred->regex.len = 0;
400} 526}
401 527
402static int filter_set_pred(struct filter_pred *dest, 528static int filter_set_pred(struct filter_pred *dest,
@@ -426,9 +552,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
426 filter->preds[i]->fn = filter_pred_none; 552 filter->preds[i]->fn = filter_pred_none;
427} 553}
428 554
429void destroy_preds(struct ftrace_event_call *call) 555static void __free_preds(struct event_filter *filter)
430{ 556{
431 struct event_filter *filter = call->filter;
432 int i; 557 int i;
433 558
434 if (!filter) 559 if (!filter)
@@ -441,21 +566,24 @@ void destroy_preds(struct ftrace_event_call *call)
441 kfree(filter->preds); 566 kfree(filter->preds);
442 kfree(filter->filter_string); 567 kfree(filter->filter_string);
443 kfree(filter); 568 kfree(filter);
569}
570
571void destroy_preds(struct ftrace_event_call *call)
572{
573 __free_preds(call->filter);
444 call->filter = NULL; 574 call->filter = NULL;
575 call->filter_active = 0;
445} 576}
446 577
447static int init_preds(struct ftrace_event_call *call) 578static struct event_filter *__alloc_preds(void)
448{ 579{
449 struct event_filter *filter; 580 struct event_filter *filter;
450 struct filter_pred *pred; 581 struct filter_pred *pred;
451 int i; 582 int i;
452 583
453 if (call->filter) 584 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
454 return 0; 585 if (!filter)
455 586 return ERR_PTR(-ENOMEM);
456 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
457 if (!call->filter)
458 return -ENOMEM;
459 587
460 filter->n_preds = 0; 588 filter->n_preds = 0;
461 589
@@ -471,12 +599,24 @@ static int init_preds(struct ftrace_event_call *call)
471 filter->preds[i] = pred; 599 filter->preds[i] = pred;
472 } 600 }
473 601
474 return 0; 602 return filter;
475 603
476oom: 604oom:
477 destroy_preds(call); 605 __free_preds(filter);
606 return ERR_PTR(-ENOMEM);
607}
608
609static int init_preds(struct ftrace_event_call *call)
610{
611 if (call->filter)
612 return 0;
478 613
479 return -ENOMEM; 614 call->filter_active = 0;
615 call->filter = __alloc_preds();
616 if (IS_ERR(call->filter))
617 return PTR_ERR(call->filter);
618
619 return 0;
480} 620}
481 621
482static int init_subsystem_preds(struct event_subsystem *system) 622static int init_subsystem_preds(struct event_subsystem *system)
@@ -499,14 +639,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
499 return 0; 639 return 0;
500} 640}
501 641
502enum { 642static void filter_free_subsystem_preds(struct event_subsystem *system)
503 FILTER_DISABLE_ALL,
504 FILTER_INIT_NO_RESET,
505 FILTER_SKIP_NO_RESET,
506};
507
508static void filter_free_subsystem_preds(struct event_subsystem *system,
509 int flag)
510{ 643{
511 struct ftrace_event_call *call; 644 struct ftrace_event_call *call;
512 645
@@ -517,14 +650,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
517 if (strcmp(call->system, system->name) != 0) 650 if (strcmp(call->system, system->name) != 0)
518 continue; 651 continue;
519 652
520 if (flag == FILTER_INIT_NO_RESET) {
521 call->filter->no_reset = false;
522 continue;
523 }
524
525 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
526 continue;
527
528 filter_disable_preds(call); 653 filter_disable_preds(call);
529 remove_filter_string(call->filter); 654 remove_filter_string(call->filter);
530 } 655 }
@@ -532,10 +657,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
532 657
533static int filter_add_pred_fn(struct filter_parse_state *ps, 658static int filter_add_pred_fn(struct filter_parse_state *ps,
534 struct ftrace_event_call *call, 659 struct ftrace_event_call *call,
660 struct event_filter *filter,
535 struct filter_pred *pred, 661 struct filter_pred *pred,
536 filter_pred_fn_t fn) 662 filter_pred_fn_t fn)
537{ 663{
538 struct event_filter *filter = call->filter;
539 int idx, err; 664 int idx, err;
540 665
541 if (filter->n_preds == MAX_FILTER_PRED) { 666 if (filter->n_preds == MAX_FILTER_PRED) {
@@ -550,7 +675,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
550 return err; 675 return err;
551 676
552 filter->n_preds++; 677 filter->n_preds++;
553 call->filter_active = 1;
554 678
555 return 0; 679 return 0;
556} 680}
@@ -575,7 +699,10 @@ static bool is_string_field(struct ftrace_event_field *field)
575 699
576static int is_legal_op(struct ftrace_event_field *field, int op) 700static int is_legal_op(struct ftrace_event_field *field, int op)
577{ 701{
578 if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) 702 if (is_string_field(field) &&
703 (op != OP_EQ && op != OP_NE && op != OP_GLOB))
704 return 0;
705 if (!is_string_field(field) && op == OP_GLOB)
579 return 0; 706 return 0;
580 707
581 return 1; 708 return 1;
@@ -626,6 +753,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
626 753
627static int filter_add_pred(struct filter_parse_state *ps, 754static int filter_add_pred(struct filter_parse_state *ps,
628 struct ftrace_event_call *call, 755 struct ftrace_event_call *call,
756 struct event_filter *filter,
629 struct filter_pred *pred, 757 struct filter_pred *pred,
630 bool dry_run) 758 bool dry_run)
631{ 759{
@@ -660,21 +788,20 @@ static int filter_add_pred(struct filter_parse_state *ps,
660 } 788 }
661 789
662 if (is_string_field(field)) { 790 if (is_string_field(field)) {
663 pred->str_len = field->size; 791 filter_build_regex(pred);
664 792
665 if (field->filter_type == FILTER_STATIC_STRING) 793 if (field->filter_type == FILTER_STATIC_STRING) {
666 fn = filter_pred_string; 794 fn = filter_pred_string;
667 else if (field->filter_type == FILTER_DYN_STRING) 795 pred->regex.field_len = field->size;
796 } else if (field->filter_type == FILTER_DYN_STRING)
668 fn = filter_pred_strloc; 797 fn = filter_pred_strloc;
669 else { 798 else
670 fn = filter_pred_pchar; 799 fn = filter_pred_pchar;
671 pred->str_len = strlen(pred->str_val);
672 }
673 } else { 800 } else {
674 if (field->is_signed) 801 if (field->is_signed)
675 ret = strict_strtoll(pred->str_val, 0, &val); 802 ret = strict_strtoll(pred->regex.pattern, 0, &val);
676 else 803 else
677 ret = strict_strtoull(pred->str_val, 0, &val); 804 ret = strict_strtoull(pred->regex.pattern, 0, &val);
678 if (ret) { 805 if (ret) {
679 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 806 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
680 return -EINVAL; 807 return -EINVAL;
@@ -694,45 +821,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
694 821
695add_pred_fn: 822add_pred_fn:
696 if (!dry_run) 823 if (!dry_run)
697 return filter_add_pred_fn(ps, call, pred, fn); 824 return filter_add_pred_fn(ps, call, filter, pred, fn);
698 return 0;
699}
700
701static int filter_add_subsystem_pred(struct filter_parse_state *ps,
702 struct event_subsystem *system,
703 struct filter_pred *pred,
704 char *filter_string,
705 bool dry_run)
706{
707 struct ftrace_event_call *call;
708 int err = 0;
709 bool fail = true;
710
711 list_for_each_entry(call, &ftrace_events, list) {
712
713 if (!call->define_fields)
714 continue;
715
716 if (strcmp(call->system, system->name))
717 continue;
718
719 if (call->filter->no_reset)
720 continue;
721
722 err = filter_add_pred(ps, call, pred, dry_run);
723 if (err)
724 call->filter->no_reset = true;
725 else
726 fail = false;
727
728 if (!dry_run)
729 replace_filter_string(call->filter, filter_string);
730 }
731
732 if (fail) {
733 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
734 return err;
735 }
736 return 0; 825 return 0;
737} 826}
738 827
@@ -1045,8 +1134,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
1045 return NULL; 1134 return NULL;
1046 } 1135 }
1047 1136
1048 strcpy(pred->str_val, operand2); 1137 strcpy(pred->regex.pattern, operand2);
1049 pred->str_len = strlen(operand2); 1138 pred->regex.len = strlen(pred->regex.pattern);
1050 1139
1051 pred->op = op; 1140 pred->op = op;
1052 1141
@@ -1090,8 +1179,8 @@ static int check_preds(struct filter_parse_state *ps)
1090 return 0; 1179 return 0;
1091} 1180}
1092 1181
1093static int replace_preds(struct event_subsystem *system, 1182static int replace_preds(struct ftrace_event_call *call,
1094 struct ftrace_event_call *call, 1183 struct event_filter *filter,
1095 struct filter_parse_state *ps, 1184 struct filter_parse_state *ps,
1096 char *filter_string, 1185 char *filter_string,
1097 bool dry_run) 1186 bool dry_run)
@@ -1138,11 +1227,7 @@ static int replace_preds(struct event_subsystem *system,
1138add_pred: 1227add_pred:
1139 if (!pred) 1228 if (!pred)
1140 return -ENOMEM; 1229 return -ENOMEM;
1141 if (call) 1230 err = filter_add_pred(ps, call, filter, pred, dry_run);
1142 err = filter_add_pred(ps, call, pred, false);
1143 else
1144 err = filter_add_subsystem_pred(ps, system, pred,
1145 filter_string, dry_run);
1146 filter_free_pred(pred); 1231 filter_free_pred(pred);
1147 if (err) 1232 if (err)
1148 return err; 1233 return err;
@@ -1153,10 +1238,50 @@ add_pred:
1153 return 0; 1238 return 0;
1154} 1239}
1155 1240
1156int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1241static int replace_system_preds(struct event_subsystem *system,
1242 struct filter_parse_state *ps,
1243 char *filter_string)
1157{ 1244{
1245 struct ftrace_event_call *call;
1246 bool fail = true;
1158 int err; 1247 int err;
1159 1248
1249 list_for_each_entry(call, &ftrace_events, list) {
1250 struct event_filter *filter = call->filter;
1251
1252 if (!call->define_fields)
1253 continue;
1254
1255 if (strcmp(call->system, system->name) != 0)
1256 continue;
1257
1258 /* try to see if the filter can be applied */
1259 err = replace_preds(call, filter, ps, filter_string, true);
1260 if (err)
1261 continue;
1262
1263 /* really apply the filter */
1264 filter_disable_preds(call);
1265 err = replace_preds(call, filter, ps, filter_string, false);
1266 if (err)
1267 filter_disable_preds(call);
1268 else {
1269 call->filter_active = 1;
1270 replace_filter_string(filter, filter_string);
1271 }
1272 fail = false;
1273 }
1274
1275 if (fail) {
1276 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1277 return -EINVAL;
1278 }
1279 return 0;
1280}
1281
1282int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1283{
1284 int err;
1160 struct filter_parse_state *ps; 1285 struct filter_parse_state *ps;
1161 1286
1162 mutex_lock(&event_mutex); 1287 mutex_lock(&event_mutex);
@@ -1168,8 +1293,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1168 if (!strcmp(strstrip(filter_string), "0")) { 1293 if (!strcmp(strstrip(filter_string), "0")) {
1169 filter_disable_preds(call); 1294 filter_disable_preds(call);
1170 remove_filter_string(call->filter); 1295 remove_filter_string(call->filter);
1171 mutex_unlock(&event_mutex); 1296 goto out_unlock;
1172 return 0;
1173 } 1297 }
1174 1298
1175 err = -ENOMEM; 1299 err = -ENOMEM;
@@ -1187,10 +1311,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1187 goto out; 1311 goto out;
1188 } 1312 }
1189 1313
1190 err = replace_preds(NULL, call, ps, filter_string, false); 1314 err = replace_preds(call, call->filter, ps, filter_string, false);
1191 if (err) 1315 if (err)
1192 append_filter_err(ps, call->filter); 1316 append_filter_err(ps, call->filter);
1193 1317 else
1318 call->filter_active = 1;
1194out: 1319out:
1195 filter_opstack_clear(ps); 1320 filter_opstack_clear(ps);
1196 postfix_clear(ps); 1321 postfix_clear(ps);
@@ -1205,7 +1330,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1205 char *filter_string) 1330 char *filter_string)
1206{ 1331{
1207 int err; 1332 int err;
1208
1209 struct filter_parse_state *ps; 1333 struct filter_parse_state *ps;
1210 1334
1211 mutex_lock(&event_mutex); 1335 mutex_lock(&event_mutex);
@@ -1215,10 +1339,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1215 goto out_unlock; 1339 goto out_unlock;
1216 1340
1217 if (!strcmp(strstrip(filter_string), "0")) { 1341 if (!strcmp(strstrip(filter_string), "0")) {
1218 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); 1342 filter_free_subsystem_preds(system);
1219 remove_filter_string(system->filter); 1343 remove_filter_string(system->filter);
1220 mutex_unlock(&event_mutex); 1344 goto out_unlock;
1221 return 0;
1222 } 1345 }
1223 1346
1224 err = -ENOMEM; 1347 err = -ENOMEM;
@@ -1235,31 +1358,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1235 goto out; 1358 goto out;
1236 } 1359 }
1237 1360
1238 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); 1361 err = replace_system_preds(system, ps, filter_string);
1239 1362 if (err)
1240 /* try to see the filter can be applied to which events */
1241 err = replace_preds(system, NULL, ps, filter_string, true);
1242 if (err) {
1243 append_filter_err(ps, system->filter); 1363 append_filter_err(ps, system->filter);
1244 goto out; 1364
1365out:
1366 filter_opstack_clear(ps);
1367 postfix_clear(ps);
1368 kfree(ps);
1369out_unlock:
1370 mutex_unlock(&event_mutex);
1371
1372 return err;
1373}
1374
1375#ifdef CONFIG_PERF_EVENTS
1376
1377void ftrace_profile_free_filter(struct perf_event *event)
1378{
1379 struct event_filter *filter = event->filter;
1380
1381 event->filter = NULL;
1382 __free_preds(filter);
1383}
1384
1385int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1386 char *filter_str)
1387{
1388 int err;
1389 struct event_filter *filter;
1390 struct filter_parse_state *ps;
1391 struct ftrace_event_call *call = NULL;
1392
1393 mutex_lock(&event_mutex);
1394
1395 list_for_each_entry(call, &ftrace_events, list) {
1396 if (call->id == event_id)
1397 break;
1245 } 1398 }
1246 1399
1247 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); 1400 err = -EINVAL;
1401 if (!call)
1402 goto out_unlock;
1403
1404 err = -EEXIST;
1405 if (event->filter)
1406 goto out_unlock;
1248 1407
1249 /* really apply the filter to the events */ 1408 filter = __alloc_preds();
1250 err = replace_preds(system, NULL, ps, filter_string, false); 1409 if (IS_ERR(filter)) {
1251 if (err) { 1410 err = PTR_ERR(filter);
1252 append_filter_err(ps, system->filter); 1411 goto out_unlock;
1253 filter_free_subsystem_preds(system, 2);
1254 } 1412 }
1255 1413
1256out: 1414 err = -ENOMEM;
1415 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1416 if (!ps)
1417 goto free_preds;
1418
1419 parse_init(ps, filter_ops, filter_str);
1420 err = filter_parse(ps);
1421 if (err)
1422 goto free_ps;
1423
1424 err = replace_preds(call, filter, ps, filter_str, false);
1425 if (!err)
1426 event->filter = filter;
1427
1428free_ps:
1257 filter_opstack_clear(ps); 1429 filter_opstack_clear(ps);
1258 postfix_clear(ps); 1430 postfix_clear(ps);
1259 kfree(ps); 1431 kfree(ps);
1432
1433free_preds:
1434 if (err)
1435 __free_preds(filter);
1436
1260out_unlock: 1437out_unlock:
1261 mutex_unlock(&event_mutex); 1438 mutex_unlock(&event_mutex);
1262 1439
1263 return err; 1440 return err;
1264} 1441}
1265 1442
1443#endif /* CONFIG_PERF_EVENTS */
1444
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 9753fcc61bc5..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -48,11 +48,11 @@
48struct ____ftrace_##name { \ 48struct ____ftrace_##name { \
49 tstruct \ 49 tstruct \
50}; \ 50}; \
51static void __used ____ftrace_check_##name(void) \ 51static void __always_unused ____ftrace_check_##name(void) \
52{ \ 52{ \
53 struct ____ftrace_##name *__entry = NULL; \ 53 struct ____ftrace_##name *__entry = NULL; \
54 \ 54 \
55 /* force cmpile-time check on F_printk() */ \ 55 /* force compile-time check on F_printk() */ \
56 printk(print); \ 56 printk(print); \
57} 57}
58 58
@@ -62,76 +62,6 @@ static void __used ____ftrace_check_##name(void) \
62 62
63#include "trace_entries.h" 63#include "trace_entries.h"
64 64
65
66#undef __field
67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\n", \
70 offsetof(typeof(field), item), \
71 sizeof(field.item)); \
72 if (!ret) \
73 return 0;
74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item)); \
81 if (!ret) \
82 return 0;
83
84#undef __array
85#define __array(type, item, len) \
86 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
87 "offset:%zu;\tsize:%zu;\n", \
88 offsetof(typeof(field), item), \
89 sizeof(field.item)); \
90 if (!ret) \
91 return 0;
92
93#undef __array_desc
94#define __array_desc(type, container, item, len) \
95 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
96 "offset:%zu;\tsize:%zu;\n", \
97 offsetof(typeof(field), container.item), \
98 sizeof(field.container.item)); \
99 if (!ret) \
100 return 0;
101
102#undef __dynamic_array
103#define __dynamic_array(type, item) \
104 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
105 "offset:%zu;\tsize:0;\n", \
106 offsetof(typeof(field), item)); \
107 if (!ret) \
108 return 0;
109
110#undef F_printk
111#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
112
113#undef __entry
114#define __entry REC
115
116#undef FTRACE_ENTRY
117#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
118static int \
119ftrace_format_##name(struct ftrace_event_call *unused, \
120 struct trace_seq *s) \
121{ \
122 struct struct_name field __attribute__((unused)); \
123 int ret = 0; \
124 \
125 tstruct; \
126 \
127 trace_seq_printf(s, "\nprint fmt: " print); \
128 \
129 return ret; \
130}
131
132#include "trace_entries.h"
133
134
135#undef __field 65#undef __field
136#define __field(type, item) \ 66#define __field(type, item) \
137 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
@@ -156,7 +86,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
156 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 86 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
157 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 87 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
158 offsetof(typeof(field), item), \ 88 offsetof(typeof(field), item), \
159 sizeof(field.item), 0, FILTER_OTHER); \ 89 sizeof(field.item), \
90 is_signed_type(type), FILTER_OTHER); \
160 if (ret) \ 91 if (ret) \
161 return ret; 92 return ret;
162 93
@@ -166,13 +97,18 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
166 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 97 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
167 offsetof(typeof(field), \ 98 offsetof(typeof(field), \
168 container.item), \ 99 container.item), \
169 sizeof(field.container.item), 0, \ 100 sizeof(field.container.item), \
170 FILTER_OTHER); \ 101 is_signed_type(type), FILTER_OTHER); \
171 if (ret) \ 102 if (ret) \
172 return ret; 103 return ret;
173 104
174#undef __dynamic_array 105#undef __dynamic_array
175#define __dynamic_array(type, item) 106#define __dynamic_array(type, item) \
107 ret = trace_define_field(event_call, #type, #item, \
108 offsetof(typeof(field), item), \
109 0, is_signed_type(type), FILTER_OTHER);\
110 if (ret) \
111 return ret;
176 112
177#undef FTRACE_ENTRY 113#undef FTRACE_ENTRY
178#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 114#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -182,10 +118,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
182 struct struct_name field; \ 118 struct struct_name field; \
183 int ret; \ 119 int ret; \
184 \ 120 \
185 ret = trace_define_common_fields(event_call); \
186 if (ret) \
187 return ret; \
188 \
189 tstruct; \ 121 tstruct; \
190 \ 122 \
191 return ret; \ 123 return ret; \
@@ -193,6 +125,14 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
193 125
194#include "trace_entries.h" 126#include "trace_entries.h"
195 127
128static int ftrace_raw_init_event(struct ftrace_event_call *call)
129{
130 INIT_LIST_HEAD(&call->fields);
131 return 0;
132}
133
134#undef __entry
135#define __entry REC
196 136
197#undef __field 137#undef __field
198#define __field(type, item) 138#define __field(type, item)
@@ -209,9 +149,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
209#undef __dynamic_array 149#undef __dynamic_array
210#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
211 151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
212#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
213#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
214static int ftrace_raw_init_event_##call(void); \
215 \ 157 \
216struct ftrace_event_call __used \ 158struct ftrace_event_call __used \
217__attribute__((__aligned__(4))) \ 159__attribute__((__aligned__(4))) \
@@ -219,14 +161,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
219 .name = #call, \ 161 .name = #call, \
220 .id = type, \ 162 .id = type, \
221 .system = __stringify(TRACE_SYSTEM), \ 163 .system = __stringify(TRACE_SYSTEM), \
222 .raw_init = ftrace_raw_init_event_##call, \ 164 .raw_init = ftrace_raw_init_event, \
223 .show_format = ftrace_format_##call, \ 165 .print_fmt = print, \
224 .define_fields = ftrace_define_fields_##call, \ 166 .define_fields = ftrace_define_fields_##call, \
225}; \ 167}; \
226static int ftrace_raw_init_event_##call(void) \
227{ \
228 INIT_LIST_HEAD(&event_##call.fields); \
229 return 0; \
230} \
231 168
232#include "trace_entries.h" 169#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01b2e4d..9aed1a5cf553 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -9,14 +9,27 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/slab.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14#include "trace.h" 15#include "trace.h"
15#include "trace_output.h" 16#include "trace_output.h"
16 17
17struct fgraph_data { 18struct fgraph_cpu_data {
18 pid_t last_pid; 19 pid_t last_pid;
19 int depth; 20 int depth;
21 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
23};
24
25struct fgraph_data {
26 struct fgraph_cpu_data *cpu_data;
27
28 /* Place to preserve last processed entry. */
29 struct ftrace_graph_ent_entry ent;
30 struct ftrace_graph_ret_entry ret;
31 int failed;
32 int cpu;
20}; 33};
21 34
22#define TRACE_GRAPH_INDENT 2 35#define TRACE_GRAPH_INDENT 2
@@ -176,7 +189,7 @@ static int __trace_graph_entry(struct trace_array *tr,
176 struct ring_buffer *buffer = tr->buffer; 189 struct ring_buffer *buffer = tr->buffer;
177 struct ftrace_graph_ent_entry *entry; 190 struct ftrace_graph_ent_entry *entry;
178 191
179 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 192 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
180 return 0; 193 return 0;
181 194
182 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 195 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -201,13 +214,11 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
201 int cpu; 214 int cpu;
202 int pc; 215 int pc;
203 216
204 if (unlikely(!tr))
205 return 0;
206
207 if (!ftrace_trace_task(current)) 217 if (!ftrace_trace_task(current))
208 return 0; 218 return 0;
209 219
210 if (!ftrace_graph_addr(trace->func)) 220 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func)))
211 return 0; 222 return 0;
212 223
213 local_irq_save(flags); 224 local_irq_save(flags);
@@ -220,9 +231,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
220 } else { 231 } else {
221 ret = 0; 232 ret = 0;
222 } 233 }
223 /* Only do the atomic if it is not already set */
224 if (!test_tsk_trace_graph(current))
225 set_tsk_trace_graph(current);
226 234
227 atomic_dec(&data->disabled); 235 atomic_dec(&data->disabled);
228 local_irq_restore(flags); 236 local_irq_restore(flags);
@@ -230,6 +238,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
230 return ret; 238 return ret;
231} 239}
232 240
241int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
242{
243 if (tracing_thresh)
244 return 1;
245 else
246 return trace_graph_entry(trace);
247}
248
233static void __trace_graph_return(struct trace_array *tr, 249static void __trace_graph_return(struct trace_array *tr,
234 struct ftrace_graph_ret *trace, 250 struct ftrace_graph_ret *trace,
235 unsigned long flags, 251 unsigned long flags,
@@ -240,7 +256,7 @@ static void __trace_graph_return(struct trace_array *tr,
240 struct ring_buffer *buffer = tr->buffer; 256 struct ring_buffer *buffer = tr->buffer;
241 struct ftrace_graph_ret_entry *entry; 257 struct ftrace_graph_ret_entry *entry;
242 258
243 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 259 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
244 return; 260 return;
245 261
246 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 262 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -270,19 +286,39 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
270 pc = preempt_count(); 286 pc = preempt_count();
271 __trace_graph_return(tr, trace, flags, pc); 287 __trace_graph_return(tr, trace, flags, pc);
272 } 288 }
273 if (!trace->depth)
274 clear_tsk_trace_graph(current);
275 atomic_dec(&data->disabled); 289 atomic_dec(&data->disabled);
276 local_irq_restore(flags); 290 local_irq_restore(flags);
277} 291}
278 292
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296
297 /* Make graph_array visible before we start tracing */
298
299 smp_mb();
300}
301
302void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
303{
304 if (tracing_thresh &&
305 (trace->rettime - trace->calltime < tracing_thresh))
306 return;
307 else
308 trace_graph_return(trace);
309}
310
279static int graph_trace_init(struct trace_array *tr) 311static int graph_trace_init(struct trace_array *tr)
280{ 312{
281 int ret; 313 int ret;
282 314
283 graph_array = tr; 315 set_graph_array(tr);
284 ret = register_ftrace_graph(&trace_graph_return, 316 if (tracing_thresh)
285 &trace_graph_entry); 317 ret = register_ftrace_graph(&trace_graph_thresh_return,
318 &trace_graph_thresh_entry);
319 else
320 ret = register_ftrace_graph(&trace_graph_return,
321 &trace_graph_entry);
286 if (ret) 322 if (ret)
287 return ret; 323 return ret;
288 tracing_start_cmdline_record(); 324 tracing_start_cmdline_record();
@@ -290,11 +326,6 @@ static int graph_trace_init(struct trace_array *tr)
290 return 0; 326 return 0;
291} 327}
292 328
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296}
297
298static void graph_trace_reset(struct trace_array *tr) 329static void graph_trace_reset(struct trace_array *tr)
299{ 330{
300 tracing_stop_cmdline_record(); 331 tracing_stop_cmdline_record();
@@ -384,7 +415,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
384 if (!data) 415 if (!data)
385 return TRACE_TYPE_HANDLED; 416 return TRACE_TYPE_HANDLED;
386 417
387 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 418 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
388 419
389 if (*last_pid == pid) 420 if (*last_pid == pid)
390 return TRACE_TYPE_HANDLED; 421 return TRACE_TYPE_HANDLED;
@@ -435,26 +466,49 @@ static struct ftrace_graph_ret_entry *
435get_return_for_leaf(struct trace_iterator *iter, 466get_return_for_leaf(struct trace_iterator *iter,
436 struct ftrace_graph_ent_entry *curr) 467 struct ftrace_graph_ent_entry *curr)
437{ 468{
438 struct ring_buffer_iter *ring_iter; 469 struct fgraph_data *data = iter->private;
470 struct ring_buffer_iter *ring_iter = NULL;
439 struct ring_buffer_event *event; 471 struct ring_buffer_event *event;
440 struct ftrace_graph_ret_entry *next; 472 struct ftrace_graph_ret_entry *next;
441 473
442 ring_iter = iter->buffer_iter[iter->cpu]; 474 /*
475 * If the previous output failed to write to the seq buffer,
476 * then we just reuse the data from before.
477 */
478 if (data && data->failed) {
479 curr = &data->ent;
480 next = &data->ret;
481 } else {
443 482
444 /* First peek to compare current entry and the next one */ 483 ring_iter = iter->buffer_iter[iter->cpu];
445 if (ring_iter) 484
446 event = ring_buffer_iter_peek(ring_iter, NULL); 485 /* First peek to compare current entry and the next one */
447 else { 486 if (ring_iter)
448 /* We need to consume the current entry to see the next one */ 487 event = ring_buffer_iter_peek(ring_iter, NULL);
449 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 488 else {
450 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 489 /*
451 NULL); 490 * We need to consume the current entry to see
452 } 491 * the next one.
492 */
493 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
494 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
495 NULL);
496 }
453 497
454 if (!event) 498 if (!event)
455 return NULL; 499 return NULL;
456 500
457 next = ring_buffer_event_data(event); 501 next = ring_buffer_event_data(event);
502
503 if (data) {
504 /*
505 * Save current and next entries for later reference
506 * if the output fails.
507 */
508 data->ent = *curr;
509 data->ret = *next;
510 }
511 }
458 512
459 if (next->ent.type != TRACE_GRAPH_RET) 513 if (next->ent.type != TRACE_GRAPH_RET)
460 return NULL; 514 return NULL;
@@ -639,15 +693,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
639 duration = graph_ret->rettime - graph_ret->calltime; 693 duration = graph_ret->rettime - graph_ret->calltime;
640 694
641 if (data) { 695 if (data) {
696 struct fgraph_cpu_data *cpu_data;
642 int cpu = iter->cpu; 697 int cpu = iter->cpu;
643 int *depth = &(per_cpu_ptr(data, cpu)->depth); 698
699 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
644 700
645 /* 701 /*
646 * Comments display at + 1 to depth. Since 702 * Comments display at + 1 to depth. Since
647 * this is a leaf function, keep the comments 703 * this is a leaf function, keep the comments
648 * equal to this depth. 704 * equal to this depth.
649 */ 705 */
650 *depth = call->depth - 1; 706 cpu_data->depth = call->depth - 1;
707
708 /* No need to keep this function around for this depth */
709 if (call->depth < FTRACE_RETFUNC_DEPTH)
710 cpu_data->enter_funcs[call->depth] = 0;
651 } 711 }
652 712
653 /* Overhead */ 713 /* Overhead */
@@ -687,10 +747,15 @@ print_graph_entry_nested(struct trace_iterator *iter,
687 int i; 747 int i;
688 748
689 if (data) { 749 if (data) {
750 struct fgraph_cpu_data *cpu_data;
690 int cpu = iter->cpu; 751 int cpu = iter->cpu;
691 int *depth = &(per_cpu_ptr(data, cpu)->depth);
692 752
693 *depth = call->depth; 753 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
754 cpu_data->depth = call->depth;
755
756 /* Save this function pointer to see if the exit matches */
757 if (call->depth < FTRACE_RETFUNC_DEPTH)
758 cpu_data->enter_funcs[call->depth] = call->func;
694 } 759 }
695 760
696 /* No overhead */ 761 /* No overhead */
@@ -782,19 +847,34 @@ static enum print_line_t
782print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 847print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
783 struct trace_iterator *iter) 848 struct trace_iterator *iter)
784{ 849{
785 int cpu = iter->cpu; 850 struct fgraph_data *data = iter->private;
786 struct ftrace_graph_ent *call = &field->graph_ent; 851 struct ftrace_graph_ent *call = &field->graph_ent;
787 struct ftrace_graph_ret_entry *leaf_ret; 852 struct ftrace_graph_ret_entry *leaf_ret;
853 static enum print_line_t ret;
854 int cpu = iter->cpu;
788 855
789 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 856 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
790 return TRACE_TYPE_PARTIAL_LINE; 857 return TRACE_TYPE_PARTIAL_LINE;
791 858
792 leaf_ret = get_return_for_leaf(iter, field); 859 leaf_ret = get_return_for_leaf(iter, field);
793 if (leaf_ret) 860 if (leaf_ret)
794 return print_graph_entry_leaf(iter, field, leaf_ret, s); 861 ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
795 else 862 else
796 return print_graph_entry_nested(iter, field, s, cpu); 863 ret = print_graph_entry_nested(iter, field, s, cpu);
864
865 if (data) {
866 /*
867 * If we failed to write our output, then we need to make
868 * note of it. Because we already consumed our entry.
869 */
870 if (s->full) {
871 data->failed = 1;
872 data->cpu = cpu;
873 } else
874 data->failed = 0;
875 }
797 876
877 return ret;
798} 878}
799 879
800static enum print_line_t 880static enum print_line_t
@@ -805,19 +885,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
805 struct fgraph_data *data = iter->private; 885 struct fgraph_data *data = iter->private;
806 pid_t pid = ent->pid; 886 pid_t pid = ent->pid;
807 int cpu = iter->cpu; 887 int cpu = iter->cpu;
888 int func_match = 1;
808 int ret; 889 int ret;
809 int i; 890 int i;
810 891
811 if (data) { 892 if (data) {
893 struct fgraph_cpu_data *cpu_data;
812 int cpu = iter->cpu; 894 int cpu = iter->cpu;
813 int *depth = &(per_cpu_ptr(data, cpu)->depth); 895
896 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
814 897
815 /* 898 /*
816 * Comments display at + 1 to depth. This is the 899 * Comments display at + 1 to depth. This is the
817 * return from a function, we now want the comments 900 * return from a function, we now want the comments
818 * to display at the same level of the bracket. 901 * to display at the same level of the bracket.
819 */ 902 */
820 *depth = trace->depth - 1; 903 cpu_data->depth = trace->depth - 1;
904
905 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
906 if (cpu_data->enter_funcs[trace->depth] != trace->func)
907 func_match = 0;
908 cpu_data->enter_funcs[trace->depth] = 0;
909 }
821 } 910 }
822 911
823 if (print_graph_prologue(iter, s, 0, 0)) 912 if (print_graph_prologue(iter, s, 0, 0))
@@ -842,9 +931,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
842 return TRACE_TYPE_PARTIAL_LINE; 931 return TRACE_TYPE_PARTIAL_LINE;
843 } 932 }
844 933
845 ret = trace_seq_printf(s, "}\n"); 934 /*
846 if (!ret) 935 * If the return function does not have a matching entry,
847 return TRACE_TYPE_PARTIAL_LINE; 936 * then the entry was lost. Instead of just printing
937 * the '}' and letting the user guess what function this
938 * belongs to, write out the function name.
939 */
940 if (func_match) {
941 ret = trace_seq_printf(s, "}\n");
942 if (!ret)
943 return TRACE_TYPE_PARTIAL_LINE;
944 } else {
945 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
946 if (!ret)
947 return TRACE_TYPE_PARTIAL_LINE;
948 }
848 949
849 /* Overrun */ 950 /* Overrun */
850 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 951 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
@@ -873,7 +974,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
873 int i; 974 int i;
874 975
875 if (data) 976 if (data)
876 depth = per_cpu_ptr(data, iter->cpu)->depth; 977 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
877 978
878 if (print_graph_prologue(iter, s, 0, 0)) 979 if (print_graph_prologue(iter, s, 0, 0))
879 return TRACE_TYPE_PARTIAL_LINE; 980 return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +1042,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
941enum print_line_t 1042enum print_line_t
942print_graph_function(struct trace_iterator *iter) 1043print_graph_function(struct trace_iterator *iter)
943{ 1044{
1045 struct ftrace_graph_ent_entry *field;
1046 struct fgraph_data *data = iter->private;
944 struct trace_entry *entry = iter->ent; 1047 struct trace_entry *entry = iter->ent;
945 struct trace_seq *s = &iter->seq; 1048 struct trace_seq *s = &iter->seq;
1049 int cpu = iter->cpu;
1050 int ret;
1051
1052 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1053 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1054 return TRACE_TYPE_HANDLED;
1055 }
1056
1057 /*
1058 * If the last output failed, there's a possibility we need
1059 * to print out the missing entry which would never go out.
1060 */
1061 if (data && data->failed) {
1062 field = &data->ent;
1063 iter->cpu = data->cpu;
1064 ret = print_graph_entry(field, s, iter);
1065 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1066 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1067 ret = TRACE_TYPE_NO_CONSUME;
1068 }
1069 iter->cpu = cpu;
1070 return ret;
1071 }
946 1072
947 switch (entry->type) { 1073 switch (entry->type) {
948 case TRACE_GRAPH_ENT: { 1074 case TRACE_GRAPH_ENT: {
@@ -952,7 +1078,7 @@ print_graph_function(struct trace_iterator *iter)
952 * sizeof(struct ftrace_graph_ent_entry) is very small, 1078 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack. 1079 * it can be safely saved at the stack.
954 */ 1080 */
955 struct ftrace_graph_ent_entry *field, saved; 1081 struct ftrace_graph_ent_entry saved;
956 trace_assign_type(field, entry); 1082 trace_assign_type(field, entry);
957 saved = *field; 1083 saved = *field;
958 return print_graph_entry(&saved, s, iter); 1084 return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1156,54 @@ static void print_graph_headers(struct seq_file *s)
1030static void graph_trace_open(struct trace_iterator *iter) 1156static void graph_trace_open(struct trace_iterator *iter)
1031{ 1157{
1032 /* pid and depth on the last trace processed */ 1158 /* pid and depth on the last trace processed */
1033 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1159 struct fgraph_data *data;
1034 int cpu; 1160 int cpu;
1035 1161
1162 iter->private = NULL;
1163
1164 data = kzalloc(sizeof(*data), GFP_KERNEL);
1036 if (!data) 1165 if (!data)
1037 pr_warning("function graph tracer: not enough memory\n"); 1166 goto out_err;
1038 else 1167
1039 for_each_possible_cpu(cpu) { 1168 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
1040 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1169 if (!data->cpu_data)
1041 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1170 goto out_err_free;
1042 *pid = -1; 1171
1043 *depth = 0; 1172 for_each_possible_cpu(cpu) {
1044 } 1173 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1174 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1175 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1176 *pid = -1;
1177 *depth = 0;
1178 *ignore = 0;
1179 }
1045 1180
1046 iter->private = data; 1181 iter->private = data;
1182
1183 return;
1184
1185 out_err_free:
1186 kfree(data);
1187 out_err:
1188 pr_warning("function graph tracer: not enough memory\n");
1047} 1189}
1048 1190
1049static void graph_trace_close(struct trace_iterator *iter) 1191static void graph_trace_close(struct trace_iterator *iter)
1050{ 1192{
1051 free_percpu(iter->private); 1193 struct fgraph_data *data = iter->private;
1194
1195 if (data) {
1196 free_percpu(data->cpu_data);
1197 kfree(data);
1198 }
1052} 1199}
1053 1200
1054static struct tracer graph_trace __read_mostly = { 1201static struct tracer graph_trace __read_mostly = {
1055 .name = "function_graph", 1202 .name = "function_graph",
1056 .open = graph_trace_open, 1203 .open = graph_trace_open,
1204 .pipe_open = graph_trace_open,
1057 .close = graph_trace_close, 1205 .close = graph_trace_close,
1206 .pipe_close = graph_trace_close,
1058 .wait_pipe = poll_wait_pipe, 1207 .wait_pipe = poll_wait_pipe,
1059 .init = graph_trace_init, 1208 .init = graph_trace_init,
1060 .reset = graph_trace_reset, 1209 .reset = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 69543a905cd5..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -20,10 +20,10 @@
20 20
21#define BTS_BUFFER_SIZE (1 << 13) 21#define BTS_BUFFER_SIZE (1 << 13)
22 22
23static DEFINE_PER_CPU(struct bts_tracer *, tracer); 23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); 24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25 25
26#define this_tracer per_cpu(tracer, smp_processor_id()) 26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27 27
28static int trace_hw_branches_enabled __read_mostly; 28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly; 29static int trace_hw_branches_suspended __read_mostly;
@@ -32,12 +32,13 @@ static struct trace_array *hw_branch_trace __read_mostly;
32 32
33static void bts_trace_init_cpu(int cpu) 33static void bts_trace_init_cpu(int cpu)
34{ 34{
35 per_cpu(tracer, cpu) = 35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, 36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 NULL, (size_t)-1, BTS_KERNEL); 37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
38 39
39 if (IS_ERR(per_cpu(tracer, cpu))) 40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
40 per_cpu(tracer, cpu) = NULL; 41 per_cpu(hwb_tracer, cpu) = NULL;
41} 42}
42 43
43static int bts_trace_init(struct trace_array *tr) 44static int bts_trace_init(struct trace_array *tr)
@@ -51,7 +52,7 @@ static int bts_trace_init(struct trace_array *tr)
51 for_each_online_cpu(cpu) { 52 for_each_online_cpu(cpu) {
52 bts_trace_init_cpu(cpu); 53 bts_trace_init_cpu(cpu);
53 54
54 if (likely(per_cpu(tracer, cpu))) 55 if (likely(per_cpu(hwb_tracer, cpu)))
55 trace_hw_branches_enabled = 1; 56 trace_hw_branches_enabled = 1;
56 } 57 }
57 trace_hw_branches_suspended = 0; 58 trace_hw_branches_suspended = 0;
@@ -67,9 +68,9 @@ static void bts_trace_reset(struct trace_array *tr)
67 68
68 get_online_cpus(); 69 get_online_cpus();
69 for_each_online_cpu(cpu) { 70 for_each_online_cpu(cpu) {
70 if (likely(per_cpu(tracer, cpu))) { 71 if (likely(per_cpu(hwb_tracer, cpu))) {
71 ds_release_bts(per_cpu(tracer, cpu)); 72 ds_release_bts(per_cpu(hwb_tracer, cpu));
72 per_cpu(tracer, cpu) = NULL; 73 per_cpu(hwb_tracer, cpu) = NULL;
73 } 74 }
74 } 75 }
75 trace_hw_branches_enabled = 0; 76 trace_hw_branches_enabled = 0;
@@ -83,8 +84,8 @@ static void bts_trace_start(struct trace_array *tr)
83 84
84 get_online_cpus(); 85 get_online_cpus();
85 for_each_online_cpu(cpu) 86 for_each_online_cpu(cpu)
86 if (likely(per_cpu(tracer, cpu))) 87 if (likely(per_cpu(hwb_tracer, cpu)))
87 ds_resume_bts(per_cpu(tracer, cpu)); 88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
88 trace_hw_branches_suspended = 0; 89 trace_hw_branches_suspended = 0;
89 put_online_cpus(); 90 put_online_cpus();
90} 91}
@@ -95,8 +96,8 @@ static void bts_trace_stop(struct trace_array *tr)
95 96
96 get_online_cpus(); 97 get_online_cpus();
97 for_each_online_cpu(cpu) 98 for_each_online_cpu(cpu)
98 if (likely(per_cpu(tracer, cpu))) 99 if (likely(per_cpu(hwb_tracer, cpu)))
99 ds_suspend_bts(per_cpu(tracer, cpu)); 100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
100 trace_hw_branches_suspended = 1; 101 trace_hw_branches_suspended = 1;
101 put_online_cpus(); 102 put_online_cpus();
102} 103}
@@ -114,16 +115,16 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
114 bts_trace_init_cpu(cpu); 115 bts_trace_init_cpu(cpu);
115 116
116 if (trace_hw_branches_suspended && 117 if (trace_hw_branches_suspended &&
117 likely(per_cpu(tracer, cpu))) 118 likely(per_cpu(hwb_tracer, cpu)))
118 ds_suspend_bts(per_cpu(tracer, cpu)); 119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
119 } 120 }
120 break; 121 break;
121 122
122 case CPU_DOWN_PREPARE: 123 case CPU_DOWN_PREPARE:
123 /* The notification is sent with interrupts enabled. */ 124 /* The notification is sent with interrupts enabled. */
124 if (likely(per_cpu(tracer, cpu))) { 125 if (likely(per_cpu(hwb_tracer, cpu))) {
125 ds_release_bts(per_cpu(tracer, cpu)); 126 ds_release_bts(per_cpu(hwb_tracer, cpu));
126 per_cpu(tracer, cpu) = NULL; 127 per_cpu(hwb_tracer, cpu) = NULL;
127 } 128 }
128 } 129 }
129 130
@@ -258,8 +259,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
258 259
259 get_online_cpus(); 260 get_online_cpus();
260 for_each_online_cpu(cpu) 261 for_each_online_cpu(cpu)
261 if (likely(per_cpu(tracer, cpu))) 262 if (likely(per_cpu(hwb_tracer, cpu)))
262 ds_suspend_bts(per_cpu(tracer, cpu)); 263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
263 /* 264 /*
264 * We need to collect the trace on the respective cpu since ftrace 265 * We need to collect the trace on the respective cpu since ftrace
265 * implicitly adds the record for the current cpu. 266 * implicitly adds the record for the current cpu.
@@ -268,8 +269,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
268 on_each_cpu(trace_bts_cpu, iter->tr, 1); 269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
269 270
270 for_each_online_cpu(cpu) 271 for_each_online_cpu(cpu)
271 if (likely(per_cpu(tracer, cpu))) 272 if (likely(per_cpu(hwb_tracer, cpu)))
272 ds_resume_bts(per_cpu(tracer, cpu)); 273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
273 put_online_cpus(); 274 put_online_cpus();
274} 275}
275 276
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
151 goto out_unlock; 151 goto out_unlock;
152 152
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc);
154 156
155 if (data->critical_sequence != max_sequence) 157 if (data->critical_sequence != max_sequence)
156 goto out_unlock; 158 goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..1251e367bae9
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1488 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
95 void *dummy)
96{
97 return regs_return_value(regs);
98}
99
100static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
101 void *dummy)
102{
103 return kernel_stack_pointer(regs);
104}
105
106/* Memory fetching by symbol */
107struct symbol_cache {
108 char *symbol;
109 long offset;
110 unsigned long addr;
111};
112
113static unsigned long update_symbol_cache(struct symbol_cache *sc)
114{
115 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
116 if (sc->addr)
117 sc->addr += sc->offset;
118 return sc->addr;
119}
120
121static void free_symbol_cache(struct symbol_cache *sc)
122{
123 kfree(sc->symbol);
124 kfree(sc);
125}
126
127static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
128{
129 struct symbol_cache *sc;
130
131 if (!sym || strlen(sym) == 0)
132 return NULL;
133 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
134 if (!sc)
135 return NULL;
136
137 sc->symbol = kstrdup(sym, GFP_KERNEL);
138 if (!sc->symbol) {
139 kfree(sc);
140 return NULL;
141 }
142 sc->offset = offset;
143
144 update_symbol_cache(sc);
145 return sc;
146}
147
148static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
149{
150 struct symbol_cache *sc = data;
151
152 if (sc->addr)
153 return fetch_memory(regs, (void *)sc->addr);
154 else
155 return 0;
156}
157
158/* Special indirect memory access interface */
159struct indirect_fetch_data {
160 struct fetch_func orig;
161 long offset;
162};
163
164static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
165{
166 struct indirect_fetch_data *ind = data;
167 unsigned long addr;
168
169 addr = call_fetch(&ind->orig, regs);
170 if (addr) {
171 addr += ind->offset;
172 return fetch_memory(regs, (void *)addr);
173 } else
174 return 0;
175}
176
177static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
178{
179 if (data->orig.func == fetch_indirect)
180 free_indirect_fetch_data(data->orig.data);
181 else if (data->orig.func == fetch_symbol)
182 free_symbol_cache(data->orig.data);
183 kfree(data);
184}
185
186/**
187 * Kprobe event core functions
188 */
189
190struct probe_arg {
191 struct fetch_func fetch;
192 const char *name;
193};
194
195/* Flags for trace_probe */
196#define TP_FLAG_TRACE 1
197#define TP_FLAG_PROFILE 2
198
199struct trace_probe {
200 struct list_head list;
201 struct kretprobe rp; /* Use rp.kp for kprobe use */
202 unsigned long nhit;
203 unsigned int flags; /* For TP_FLAG_* */
204 const char *symbol; /* symbol name */
205 struct ftrace_event_call call;
206 struct trace_event event;
207 unsigned int nr_args;
208 struct probe_arg args[];
209};
210
211#define SIZEOF_TRACE_PROBE(n) \
212 (offsetof(struct trace_probe, args) + \
213 (sizeof(struct probe_arg) * (n)))
214
215static __kprobes int probe_is_return(struct trace_probe *tp)
216{
217 return tp->rp.handler != NULL;
218}
219
220static __kprobes const char *probe_symbol(struct trace_probe *tp)
221{
222 return tp->symbol ? tp->symbol : "unknown";
223}
224
225static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
226{
227 int ret = -EINVAL;
228
229 if (ff->func == fetch_register) {
230 const char *name;
231 name = regs_query_register_name((unsigned int)((long)ff->data));
232 ret = snprintf(buf, n, "%%%s", name);
233 } else if (ff->func == fetch_stack)
234 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
235 else if (ff->func == fetch_memory)
236 ret = snprintf(buf, n, "@0x%p", ff->data);
237 else if (ff->func == fetch_symbol) {
238 struct symbol_cache *sc = ff->data;
239 if (sc->offset)
240 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
241 sc->offset);
242 else
243 ret = snprintf(buf, n, "@%s", sc->symbol);
244 } else if (ff->func == fetch_retvalue)
245 ret = snprintf(buf, n, "$retval");
246 else if (ff->func == fetch_stack_address)
247 ret = snprintf(buf, n, "$stack");
248 else if (ff->func == fetch_indirect) {
249 struct indirect_fetch_data *id = ff->data;
250 size_t l = 0;
251 ret = snprintf(buf, n, "%+ld(", id->offset);
252 if (ret >= n)
253 goto end;
254 l += ret;
255 ret = probe_arg_string(buf + l, n - l, &id->orig);
256 if (ret < 0)
257 goto end;
258 l += ret;
259 ret = snprintf(buf + l, n - l, ")");
260 ret += l;
261 }
262end:
263 if (ret >= n)
264 return -ENOSPC;
265 return ret;
266}
267
268static int register_probe_event(struct trace_probe *tp);
269static void unregister_probe_event(struct trace_probe *tp);
270
271static DEFINE_MUTEX(probe_lock);
272static LIST_HEAD(probe_list);
273
274static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
275static int kretprobe_dispatcher(struct kretprobe_instance *ri,
276 struct pt_regs *regs);
277
278/* Check the name is good for event/group */
279static int check_event_name(const char *name)
280{
281 if (!isalpha(*name) && *name != '_')
282 return 0;
283 while (*++name != '\0') {
284 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
285 return 0;
286 }
287 return 1;
288}
289
290/*
291 * Allocate new trace_probe and initialize it (including kprobes).
292 */
293static struct trace_probe *alloc_trace_probe(const char *group,
294 const char *event,
295 void *addr,
296 const char *symbol,
297 unsigned long offs,
298 int nargs, int is_return)
299{
300 struct trace_probe *tp;
301 int ret = -ENOMEM;
302
303 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
304 if (!tp)
305 return ERR_PTR(ret);
306
307 if (symbol) {
308 tp->symbol = kstrdup(symbol, GFP_KERNEL);
309 if (!tp->symbol)
310 goto error;
311 tp->rp.kp.symbol_name = tp->symbol;
312 tp->rp.kp.offset = offs;
313 } else
314 tp->rp.kp.addr = addr;
315
316 if (is_return)
317 tp->rp.handler = kretprobe_dispatcher;
318 else
319 tp->rp.kp.pre_handler = kprobe_dispatcher;
320
321 if (!event || !check_event_name(event)) {
322 ret = -EINVAL;
323 goto error;
324 }
325
326 tp->call.name = kstrdup(event, GFP_KERNEL);
327 if (!tp->call.name)
328 goto error;
329
330 if (!group || !check_event_name(group)) {
331 ret = -EINVAL;
332 goto error;
333 }
334
335 tp->call.system = kstrdup(group, GFP_KERNEL);
336 if (!tp->call.system)
337 goto error;
338
339 INIT_LIST_HEAD(&tp->list);
340 return tp;
341error:
342 kfree(tp->call.name);
343 kfree(tp->symbol);
344 kfree(tp);
345 return ERR_PTR(ret);
346}
347
348static void free_probe_arg(struct probe_arg *arg)
349{
350 if (arg->fetch.func == fetch_symbol)
351 free_symbol_cache(arg->fetch.data);
352 else if (arg->fetch.func == fetch_indirect)
353 free_indirect_fetch_data(arg->fetch.data);
354 kfree(arg->name);
355}
356
357static void free_trace_probe(struct trace_probe *tp)
358{
359 int i;
360
361 for (i = 0; i < tp->nr_args; i++)
362 free_probe_arg(&tp->args[i]);
363
364 kfree(tp->call.system);
365 kfree(tp->call.name);
366 kfree(tp->symbol);
367 kfree(tp);
368}
369
370static struct trace_probe *find_probe_event(const char *event,
371 const char *group)
372{
373 struct trace_probe *tp;
374
375 list_for_each_entry(tp, &probe_list, list)
376 if (strcmp(tp->call.name, event) == 0 &&
377 strcmp(tp->call.system, group) == 0)
378 return tp;
379 return NULL;
380}
381
382/* Unregister a trace_probe and probe_event: call with locking probe_lock */
383static void unregister_trace_probe(struct trace_probe *tp)
384{
385 if (probe_is_return(tp))
386 unregister_kretprobe(&tp->rp);
387 else
388 unregister_kprobe(&tp->rp.kp);
389 list_del(&tp->list);
390 unregister_probe_event(tp);
391}
392
393/* Register a trace_probe and probe_event */
394static int register_trace_probe(struct trace_probe *tp)
395{
396 struct trace_probe *old_tp;
397 int ret;
398
399 mutex_lock(&probe_lock);
400
401 /* register as an event */
402 old_tp = find_probe_event(tp->call.name, tp->call.system);
403 if (old_tp) {
404 /* delete old event */
405 unregister_trace_probe(old_tp);
406 free_trace_probe(old_tp);
407 }
408 ret = register_probe_event(tp);
409 if (ret) {
410 pr_warning("Faild to register probe event(%d)\n", ret);
411 goto end;
412 }
413
414 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
415 if (probe_is_return(tp))
416 ret = register_kretprobe(&tp->rp);
417 else
418 ret = register_kprobe(&tp->rp.kp);
419
420 if (ret) {
421 pr_warning("Could not insert probe(%d)\n", ret);
422 if (ret == -EILSEQ) {
423 pr_warning("Probing address(0x%p) is not an "
424 "instruction boundary.\n",
425 tp->rp.kp.addr);
426 ret = -EINVAL;
427 }
428 unregister_probe_event(tp);
429 } else
430 list_add_tail(&tp->list, &probe_list);
431end:
432 mutex_unlock(&probe_lock);
433 return ret;
434}
435
436/* Split symbol and offset. */
437static int split_symbol_offset(char *symbol, unsigned long *offset)
438{
439 char *tmp;
440 int ret;
441
442 if (!offset)
443 return -EINVAL;
444
445 tmp = strchr(symbol, '+');
446 if (tmp) {
447 /* skip sign because strict_strtol doesn't accept '+' */
448 ret = strict_strtoul(tmp + 1, 0, offset);
449 if (ret)
450 return ret;
451 *tmp = '\0';
452 } else
453 *offset = 0;
454 return 0;
455}
456
457#define PARAM_MAX_ARGS 16
458#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
459
460static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
461{
462 int ret = 0;
463 unsigned long param;
464
465 if (strcmp(arg, "retval") == 0) {
466 if (is_return) {
467 ff->func = fetch_retvalue;
468 ff->data = NULL;
469 } else
470 ret = -EINVAL;
471 } else if (strncmp(arg, "stack", 5) == 0) {
472 if (arg[5] == '\0') {
473 ff->func = fetch_stack_address;
474 ff->data = NULL;
475 } else if (isdigit(arg[5])) {
476 ret = strict_strtoul(arg + 5, 10, &param);
477 if (ret || param > PARAM_MAX_STACK)
478 ret = -EINVAL;
479 else {
480 ff->func = fetch_stack;
481 ff->data = (void *)param;
482 }
483 } else
484 ret = -EINVAL;
485 } else
486 ret = -EINVAL;
487 return ret;
488}
489
490/* Recursive argument parser */
491static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
492{
493 int ret = 0;
494 unsigned long param;
495 long offset;
496 char *tmp;
497
498 switch (arg[0]) {
499 case '$':
500 ret = parse_probe_vars(arg + 1, ff, is_return);
501 break;
502 case '%': /* named register */
503 ret = regs_query_register_offset(arg + 1);
504 if (ret >= 0) {
505 ff->func = fetch_register;
506 ff->data = (void *)(unsigned long)ret;
507 ret = 0;
508 }
509 break;
510 case '@': /* memory or symbol */
511 if (isdigit(arg[1])) {
512 ret = strict_strtoul(arg + 1, 0, &param);
513 if (ret)
514 break;
515 ff->func = fetch_memory;
516 ff->data = (void *)param;
517 } else {
518 ret = split_symbol_offset(arg + 1, &offset);
519 if (ret)
520 break;
521 ff->data = alloc_symbol_cache(arg + 1, offset);
522 if (ff->data)
523 ff->func = fetch_symbol;
524 else
525 ret = -EINVAL;
526 }
527 break;
528 case '+': /* indirect memory */
529 case '-':
530 tmp = strchr(arg, '(');
531 if (!tmp) {
532 ret = -EINVAL;
533 break;
534 }
535 *tmp = '\0';
536 ret = strict_strtol(arg + 1, 0, &offset);
537 if (ret)
538 break;
539 if (arg[0] == '-')
540 offset = -offset;
541 arg = tmp + 1;
542 tmp = strrchr(arg, ')');
543 if (tmp) {
544 struct indirect_fetch_data *id;
545 *tmp = '\0';
546 id = kzalloc(sizeof(struct indirect_fetch_data),
547 GFP_KERNEL);
548 if (!id)
549 return -ENOMEM;
550 id->offset = offset;
551 ret = __parse_probe_arg(arg, &id->orig, is_return);
552 if (ret)
553 kfree(id);
554 else {
555 ff->func = fetch_indirect;
556 ff->data = (void *)id;
557 }
558 } else
559 ret = -EINVAL;
560 break;
561 default:
562 /* TODO: support custom handler */
563 ret = -EINVAL;
564 }
565 return ret;
566}
567
568/* String length checking wrapper */
569static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
570{
571 if (strlen(arg) > MAX_ARGSTR_LEN) {
572 pr_info("Argument is too long.: %s\n", arg);
573 return -ENOSPC;
574 }
575 return __parse_probe_arg(arg, ff, is_return);
576}
577
578/* Return 1 if name is reserved or already used by another argument */
579static int conflict_field_name(const char *name,
580 struct probe_arg *args, int narg)
581{
582 int i;
583 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
584 if (strcmp(reserved_field_names[i], name) == 0)
585 return 1;
586 for (i = 0; i < narg; i++)
587 if (strcmp(args[i].name, name) == 0)
588 return 1;
589 return 0;
590}
591
592static int create_trace_probe(int argc, char **argv)
593{
594 /*
595 * Argument syntax:
596 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
597 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
598 * Fetch args:
599 * $retval : fetch return value
600 * $stack : fetch stack address
601 * $stackN : fetch Nth of stack (N:0-)
602 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
603 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
604 * %REG : fetch register REG
605 * Indirect memory fetch:
606 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
607 * Alias name of args:
608 * NAME=FETCHARG : set NAME as alias of FETCHARG.
609 */
610 struct trace_probe *tp;
611 int i, ret = 0;
612 int is_return = 0, is_delete = 0;
613 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
614 unsigned long offset = 0;
615 void *addr = NULL;
616 char buf[MAX_EVENT_NAME_LEN];
617
618 /* argc must be >= 1 */
619 if (argv[0][0] == 'p')
620 is_return = 0;
621 else if (argv[0][0] == 'r')
622 is_return = 1;
623 else if (argv[0][0] == '-')
624 is_delete = 1;
625 else {
626 pr_info("Probe definition must be started with 'p', 'r' or"
627 " '-'.\n");
628 return -EINVAL;
629 }
630
631 if (argv[0][1] == ':') {
632 event = &argv[0][2];
633 if (strchr(event, '/')) {
634 group = event;
635 event = strchr(group, '/') + 1;
636 event[-1] = '\0';
637 if (strlen(group) == 0) {
638 pr_info("Group name is not specified\n");
639 return -EINVAL;
640 }
641 }
642 if (strlen(event) == 0) {
643 pr_info("Event name is not specified\n");
644 return -EINVAL;
645 }
646 }
647 if (!group)
648 group = KPROBE_EVENT_SYSTEM;
649
650 if (is_delete) {
651 if (!event) {
652 pr_info("Delete command needs an event name.\n");
653 return -EINVAL;
654 }
655 tp = find_probe_event(event, group);
656 if (!tp) {
657 pr_info("Event %s/%s doesn't exist.\n", group, event);
658 return -ENOENT;
659 }
660 /* delete an event */
661 unregister_trace_probe(tp);
662 free_trace_probe(tp);
663 return 0;
664 }
665
666 if (argc < 2) {
667 pr_info("Probe point is not specified.\n");
668 return -EINVAL;
669 }
670 if (isdigit(argv[1][0])) {
671 if (is_return) {
672 pr_info("Return probe point must be a symbol.\n");
673 return -EINVAL;
674 }
675 /* an address specified */
676 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
677 if (ret) {
678 pr_info("Failed to parse address.\n");
679 return ret;
680 }
681 } else {
682 /* a symbol specified */
683 symbol = argv[1];
684 /* TODO: support .init module functions */
685 ret = split_symbol_offset(symbol, &offset);
686 if (ret) {
687 pr_info("Failed to parse symbol.\n");
688 return ret;
689 }
690 if (offset && is_return) {
691 pr_info("Return probe must be used without offset.\n");
692 return -EINVAL;
693 }
694 }
695 argc -= 2; argv += 2;
696
697 /* setup a probe */
698 if (!event) {
699 /* Make a new event name */
700 if (symbol)
701 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
702 is_return ? 'r' : 'p', symbol, offset);
703 else
704 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
705 is_return ? 'r' : 'p', addr);
706 event = buf;
707 }
708 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
709 is_return);
710 if (IS_ERR(tp)) {
711 pr_info("Failed to allocate trace_probe.(%d)\n",
712 (int)PTR_ERR(tp));
713 return PTR_ERR(tp);
714 }
715
716 /* parse arguments */
717 ret = 0;
718 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
719 /* Parse argument name */
720 arg = strchr(argv[i], '=');
721 if (arg)
722 *arg++ = '\0';
723 else
724 arg = argv[i];
725
726 if (conflict_field_name(argv[i], tp->args, i)) {
727 pr_info("Argument%d name '%s' conflicts with "
728 "another field.\n", i, argv[i]);
729 ret = -EINVAL;
730 goto error;
731 }
732
733 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
734 if (!tp->args[i].name) {
735 pr_info("Failed to allocate argument%d name '%s'.\n",
736 i, argv[i]);
737 ret = -ENOMEM;
738 goto error;
739 }
740
741 /* Parse fetch argument */
742 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
743 if (ret) {
744 pr_info("Parse error at argument%d. (%d)\n", i, ret);
745 kfree(tp->args[i].name);
746 goto error;
747 }
748
749 tp->nr_args++;
750 }
751
752 ret = register_trace_probe(tp);
753 if (ret)
754 goto error;
755 return 0;
756
757error:
758 free_trace_probe(tp);
759 return ret;
760}
761
762static void cleanup_all_probes(void)
763{
764 struct trace_probe *tp;
765
766 mutex_lock(&probe_lock);
767 /* TODO: Use batch unregistration */
768 while (!list_empty(&probe_list)) {
769 tp = list_entry(probe_list.next, struct trace_probe, list);
770 unregister_trace_probe(tp);
771 free_trace_probe(tp);
772 }
773 mutex_unlock(&probe_lock);
774}
775
776
777/* Probes listing interfaces */
778static void *probes_seq_start(struct seq_file *m, loff_t *pos)
779{
780 mutex_lock(&probe_lock);
781 return seq_list_start(&probe_list, *pos);
782}
783
784static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
785{
786 return seq_list_next(v, &probe_list, pos);
787}
788
789static void probes_seq_stop(struct seq_file *m, void *v)
790{
791 mutex_unlock(&probe_lock);
792}
793
794static int probes_seq_show(struct seq_file *m, void *v)
795{
796 struct trace_probe *tp = v;
797 int i, ret;
798 char buf[MAX_ARGSTR_LEN + 1];
799
800 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
801 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
802
803 if (!tp->symbol)
804 seq_printf(m, " 0x%p", tp->rp.kp.addr);
805 else if (tp->rp.kp.offset)
806 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
807 else
808 seq_printf(m, " %s", probe_symbol(tp));
809
810 for (i = 0; i < tp->nr_args; i++) {
811 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
812 if (ret < 0) {
813 pr_warning("Argument%d decoding error(%d).\n", i, ret);
814 return ret;
815 }
816 seq_printf(m, " %s=%s", tp->args[i].name, buf);
817 }
818 seq_printf(m, "\n");
819 return 0;
820}
821
822static const struct seq_operations probes_seq_op = {
823 .start = probes_seq_start,
824 .next = probes_seq_next,
825 .stop = probes_seq_stop,
826 .show = probes_seq_show
827};
828
829static int probes_open(struct inode *inode, struct file *file)
830{
831 if ((file->f_mode & FMODE_WRITE) &&
832 (file->f_flags & O_TRUNC))
833 cleanup_all_probes();
834
835 return seq_open(file, &probes_seq_op);
836}
837
838static int command_trace_probe(const char *buf)
839{
840 char **argv;
841 int argc = 0, ret = 0;
842
843 argv = argv_split(GFP_KERNEL, buf, &argc);
844 if (!argv)
845 return -ENOMEM;
846
847 if (argc)
848 ret = create_trace_probe(argc, argv);
849
850 argv_free(argv);
851 return ret;
852}
853
854#define WRITE_BUFSIZE 128
855
856static ssize_t probes_write(struct file *file, const char __user *buffer,
857 size_t count, loff_t *ppos)
858{
859 char *kbuf, *tmp;
860 int ret;
861 size_t done;
862 size_t size;
863
864 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
865 if (!kbuf)
866 return -ENOMEM;
867
868 ret = done = 0;
869 while (done < count) {
870 size = count - done;
871 if (size >= WRITE_BUFSIZE)
872 size = WRITE_BUFSIZE - 1;
873 if (copy_from_user(kbuf, buffer + done, size)) {
874 ret = -EFAULT;
875 goto out;
876 }
877 kbuf[size] = '\0';
878 tmp = strchr(kbuf, '\n');
879 if (tmp) {
880 *tmp = '\0';
881 size = tmp - kbuf + 1;
882 } else if (done + size < count) {
883 pr_warning("Line length is too long: "
884 "Should be less than %d.", WRITE_BUFSIZE);
885 ret = -EINVAL;
886 goto out;
887 }
888 done += size;
889 /* Remove comments */
890 tmp = strchr(kbuf, '#');
891 if (tmp)
892 *tmp = '\0';
893
894 ret = command_trace_probe(kbuf);
895 if (ret)
896 goto out;
897 }
898 ret = done;
899out:
900 kfree(kbuf);
901 return ret;
902}
903
904static const struct file_operations kprobe_events_ops = {
905 .owner = THIS_MODULE,
906 .open = probes_open,
907 .read = seq_read,
908 .llseek = seq_lseek,
909 .release = seq_release,
910 .write = probes_write,
911};
912
913/* Probes profiling interfaces */
914static int probes_profile_seq_show(struct seq_file *m, void *v)
915{
916 struct trace_probe *tp = v;
917
918 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
919 tp->rp.kp.nmissed);
920
921 return 0;
922}
923
924static const struct seq_operations profile_seq_op = {
925 .start = probes_seq_start,
926 .next = probes_seq_next,
927 .stop = probes_seq_stop,
928 .show = probes_profile_seq_show
929};
930
931static int profile_open(struct inode *inode, struct file *file)
932{
933 return seq_open(file, &profile_seq_op);
934}
935
936static const struct file_operations kprobe_profile_ops = {
937 .owner = THIS_MODULE,
938 .open = profile_open,
939 .read = seq_read,
940 .llseek = seq_lseek,
941 .release = seq_release,
942};
943
944/* Kprobe handler */
945static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
946{
947 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
948 struct kprobe_trace_entry *entry;
949 struct ring_buffer_event *event;
950 struct ring_buffer *buffer;
951 int size, i, pc;
952 unsigned long irq_flags;
953 struct ftrace_event_call *call = &tp->call;
954
955 tp->nhit++;
956
957 local_save_flags(irq_flags);
958 pc = preempt_count();
959
960 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
961
962 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
963 irq_flags, pc);
964 if (!event)
965 return;
966
967 entry = ring_buffer_event_data(event);
968 entry->nargs = tp->nr_args;
969 entry->ip = (unsigned long)kp->addr;
970 for (i = 0; i < tp->nr_args; i++)
971 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
972
973 if (!filter_current_check_discard(buffer, call, entry, event))
974 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
975}
976
977/* Kretprobe handler */
978static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
979 struct pt_regs *regs)
980{
981 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
982 struct kretprobe_trace_entry *entry;
983 struct ring_buffer_event *event;
984 struct ring_buffer *buffer;
985 int size, i, pc;
986 unsigned long irq_flags;
987 struct ftrace_event_call *call = &tp->call;
988
989 local_save_flags(irq_flags);
990 pc = preempt_count();
991
992 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
993
994 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
995 irq_flags, pc);
996 if (!event)
997 return;
998
999 entry = ring_buffer_event_data(event);
1000 entry->nargs = tp->nr_args;
1001 entry->func = (unsigned long)tp->rp.kp.addr;
1002 entry->ret_ip = (unsigned long)ri->ret_addr;
1003 for (i = 0; i < tp->nr_args; i++)
1004 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1005
1006 if (!filter_current_check_discard(buffer, call, entry, event))
1007 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1008}
1009
1010/* Event entry printers */
1011enum print_line_t
1012print_kprobe_event(struct trace_iterator *iter, int flags)
1013{
1014 struct kprobe_trace_entry *field;
1015 struct trace_seq *s = &iter->seq;
1016 struct trace_event *event;
1017 struct trace_probe *tp;
1018 int i;
1019
1020 field = (struct kprobe_trace_entry *)iter->ent;
1021 event = ftrace_find_event(field->ent.type);
1022 tp = container_of(event, struct trace_probe, event);
1023
1024 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1025 goto partial;
1026
1027 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1028 goto partial;
1029
1030 if (!trace_seq_puts(s, ")"))
1031 goto partial;
1032
1033 for (i = 0; i < field->nargs; i++)
1034 if (!trace_seq_printf(s, " %s=%lx",
1035 tp->args[i].name, field->args[i]))
1036 goto partial;
1037
1038 if (!trace_seq_puts(s, "\n"))
1039 goto partial;
1040
1041 return TRACE_TYPE_HANDLED;
1042partial:
1043 return TRACE_TYPE_PARTIAL_LINE;
1044}
1045
1046enum print_line_t
1047print_kretprobe_event(struct trace_iterator *iter, int flags)
1048{
1049 struct kretprobe_trace_entry *field;
1050 struct trace_seq *s = &iter->seq;
1051 struct trace_event *event;
1052 struct trace_probe *tp;
1053 int i;
1054
1055 field = (struct kretprobe_trace_entry *)iter->ent;
1056 event = ftrace_find_event(field->ent.type);
1057 tp = container_of(event, struct trace_probe, event);
1058
1059 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1060 goto partial;
1061
1062 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1063 goto partial;
1064
1065 if (!trace_seq_puts(s, " <- "))
1066 goto partial;
1067
1068 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1069 goto partial;
1070
1071 if (!trace_seq_puts(s, ")"))
1072 goto partial;
1073
1074 for (i = 0; i < field->nargs; i++)
1075 if (!trace_seq_printf(s, " %s=%lx",
1076 tp->args[i].name, field->args[i]))
1077 goto partial;
1078
1079 if (!trace_seq_puts(s, "\n"))
1080 goto partial;
1081
1082 return TRACE_TYPE_HANDLED;
1083partial:
1084 return TRACE_TYPE_PARTIAL_LINE;
1085}
1086
1087static int probe_event_enable(struct ftrace_event_call *call)
1088{
1089 struct trace_probe *tp = (struct trace_probe *)call->data;
1090
1091 tp->flags |= TP_FLAG_TRACE;
1092 if (probe_is_return(tp))
1093 return enable_kretprobe(&tp->rp);
1094 else
1095 return enable_kprobe(&tp->rp.kp);
1096}
1097
1098static void probe_event_disable(struct ftrace_event_call *call)
1099{
1100 struct trace_probe *tp = (struct trace_probe *)call->data;
1101
1102 tp->flags &= ~TP_FLAG_TRACE;
1103 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1104 if (probe_is_return(tp))
1105 disable_kretprobe(&tp->rp);
1106 else
1107 disable_kprobe(&tp->rp.kp);
1108 }
1109}
1110
1111static int probe_event_raw_init(struct ftrace_event_call *event_call)
1112{
1113 INIT_LIST_HEAD(&event_call->fields);
1114
1115 return 0;
1116}
1117
1118#undef DEFINE_FIELD
1119#define DEFINE_FIELD(type, item, name, is_signed) \
1120 do { \
1121 ret = trace_define_field(event_call, #type, name, \
1122 offsetof(typeof(field), item), \
1123 sizeof(field.item), is_signed, \
1124 FILTER_OTHER); \
1125 if (ret) \
1126 return ret; \
1127 } while (0)
1128
1129static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1130{
1131 int ret, i;
1132 struct kprobe_trace_entry field;
1133 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1134
1135 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1136 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1137 /* Set argument names as fields */
1138 for (i = 0; i < tp->nr_args; i++)
1139 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1140 return 0;
1141}
1142
1143static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1144{
1145 int ret, i;
1146 struct kretprobe_trace_entry field;
1147 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1148
1149 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1150 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1151 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1152 /* Set argument names as fields */
1153 for (i = 0; i < tp->nr_args; i++)
1154 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1155 return 0;
1156}
1157
1158static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1159{
1160 int i;
1161 int pos = 0;
1162
1163 const char *fmt, *arg;
1164
1165 if (!probe_is_return(tp)) {
1166 fmt = "(%lx)";
1167 arg = "REC->" FIELD_STRING_IP;
1168 } else {
1169 fmt = "(%lx <- %lx)";
1170 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1171 }
1172
1173 /* When len=0, we just calculate the needed length */
1174#define LEN_OR_ZERO (len ? len - pos : 0)
1175
1176 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1177
1178 for (i = 0; i < tp->nr_args; i++) {
1179 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx",
1180 tp->args[i].name);
1181 }
1182
1183 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1184
1185 for (i = 0; i < tp->nr_args; i++) {
1186 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1187 tp->args[i].name);
1188 }
1189
1190#undef LEN_OR_ZERO
1191
1192 /* return the length of print_fmt */
1193 return pos;
1194}
1195
1196static int set_print_fmt(struct trace_probe *tp)
1197{
1198 int len;
1199 char *print_fmt;
1200
1201 /* First: called with 0 length to calculate the needed length */
1202 len = __set_print_fmt(tp, NULL, 0);
1203 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1204 if (!print_fmt)
1205 return -ENOMEM;
1206
1207 /* Second: actually write the @print_fmt */
1208 __set_print_fmt(tp, print_fmt, len + 1);
1209 tp->call.print_fmt = print_fmt;
1210
1211 return 0;
1212}
1213
1214#ifdef CONFIG_PERF_EVENTS
1215
1216/* Kprobe profile handler */
1217static __kprobes void kprobe_perf_func(struct kprobe *kp,
1218 struct pt_regs *regs)
1219{
1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1221 struct ftrace_event_call *call = &tp->call;
1222 struct kprobe_trace_entry *entry;
1223 int size, __size, i;
1224 unsigned long irq_flags;
1225 int rctx;
1226
1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1229 size -= sizeof(u32);
1230 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1231 "profile buffer not large enough"))
1232 return;
1233
1234 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1235 if (!entry)
1236 return;
1237
1238 entry->nargs = tp->nr_args;
1239 entry->ip = (unsigned long)kp->addr;
1240 for (i = 0; i < tp->nr_args; i++)
1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1242
1243 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
1244}
1245
1246/* Kretprobe profile handler */
1247static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1248 struct pt_regs *regs)
1249{
1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1251 struct ftrace_event_call *call = &tp->call;
1252 struct kretprobe_trace_entry *entry;
1253 int size, __size, i;
1254 unsigned long irq_flags;
1255 int rctx;
1256
1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1259 size -= sizeof(u32);
1260 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1261 "profile buffer not large enough"))
1262 return;
1263
1264 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1265 if (!entry)
1266 return;
1267
1268 entry->nargs = tp->nr_args;
1269 entry->func = (unsigned long)tp->rp.kp.addr;
1270 entry->ret_ip = (unsigned long)ri->ret_addr;
1271 for (i = 0; i < tp->nr_args; i++)
1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1273
1274 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
1275 irq_flags, regs);
1276}
1277
1278static int probe_perf_enable(struct ftrace_event_call *call)
1279{
1280 struct trace_probe *tp = (struct trace_probe *)call->data;
1281
1282 tp->flags |= TP_FLAG_PROFILE;
1283
1284 if (probe_is_return(tp))
1285 return enable_kretprobe(&tp->rp);
1286 else
1287 return enable_kprobe(&tp->rp.kp);
1288}
1289
1290static void probe_perf_disable(struct ftrace_event_call *call)
1291{
1292 struct trace_probe *tp = (struct trace_probe *)call->data;
1293
1294 tp->flags &= ~TP_FLAG_PROFILE;
1295
1296 if (!(tp->flags & TP_FLAG_TRACE)) {
1297 if (probe_is_return(tp))
1298 disable_kretprobe(&tp->rp);
1299 else
1300 disable_kprobe(&tp->rp.kp);
1301 }
1302}
1303#endif /* CONFIG_PERF_EVENTS */
1304
1305
1306static __kprobes
1307int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1308{
1309 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1310
1311 if (tp->flags & TP_FLAG_TRACE)
1312 kprobe_trace_func(kp, regs);
1313#ifdef CONFIG_PERF_EVENTS
1314 if (tp->flags & TP_FLAG_PROFILE)
1315 kprobe_perf_func(kp, regs);
1316#endif
1317 return 0; /* We don't tweek kernel, so just return 0 */
1318}
1319
1320static __kprobes
1321int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1322{
1323 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1324
1325 if (tp->flags & TP_FLAG_TRACE)
1326 kretprobe_trace_func(ri, regs);
1327#ifdef CONFIG_PERF_EVENTS
1328 if (tp->flags & TP_FLAG_PROFILE)
1329 kretprobe_perf_func(ri, regs);
1330#endif
1331 return 0; /* We don't tweek kernel, so just return 0 */
1332}
1333
1334static int register_probe_event(struct trace_probe *tp)
1335{
1336 struct ftrace_event_call *call = &tp->call;
1337 int ret;
1338
1339 /* Initialize ftrace_event_call */
1340 if (probe_is_return(tp)) {
1341 tp->event.trace = print_kretprobe_event;
1342 call->raw_init = probe_event_raw_init;
1343 call->define_fields = kretprobe_event_define_fields;
1344 } else {
1345 tp->event.trace = print_kprobe_event;
1346 call->raw_init = probe_event_raw_init;
1347 call->define_fields = kprobe_event_define_fields;
1348 }
1349 if (set_print_fmt(tp) < 0)
1350 return -ENOMEM;
1351 call->event = &tp->event;
1352 call->id = register_ftrace_event(&tp->event);
1353 if (!call->id) {
1354 kfree(call->print_fmt);
1355 return -ENODEV;
1356 }
1357 call->enabled = 0;
1358 call->regfunc = probe_event_enable;
1359 call->unregfunc = probe_event_disable;
1360
1361#ifdef CONFIG_PERF_EVENTS
1362 call->perf_event_enable = probe_perf_enable;
1363 call->perf_event_disable = probe_perf_disable;
1364#endif
1365 call->data = tp;
1366 ret = trace_add_event_call(call);
1367 if (ret) {
1368 pr_info("Failed to register kprobe event: %s\n", call->name);
1369 kfree(call->print_fmt);
1370 unregister_ftrace_event(&tp->event);
1371 }
1372 return ret;
1373}
1374
1375static void unregister_probe_event(struct trace_probe *tp)
1376{
1377 /* tp->event is unregistered in trace_remove_event_call() */
1378 trace_remove_event_call(&tp->call);
1379 kfree(tp->call.print_fmt);
1380}
1381
1382/* Make a debugfs interface for controling probe points */
1383static __init int init_kprobe_trace(void)
1384{
1385 struct dentry *d_tracer;
1386 struct dentry *entry;
1387
1388 d_tracer = tracing_init_dentry();
1389 if (!d_tracer)
1390 return 0;
1391
1392 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1393 NULL, &kprobe_events_ops);
1394
1395 /* Event list interface */
1396 if (!entry)
1397 pr_warning("Could not create debugfs "
1398 "'kprobe_events' entry\n");
1399
1400 /* Profile interface */
1401 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1402 NULL, &kprobe_profile_ops);
1403
1404 if (!entry)
1405 pr_warning("Could not create debugfs "
1406 "'kprobe_profile' entry\n");
1407 return 0;
1408}
1409fs_initcall(init_kprobe_trace);
1410
1411
1412#ifdef CONFIG_FTRACE_STARTUP_TEST
1413
1414static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1415 int a4, int a5, int a6)
1416{
1417 return a1 + a2 + a3 + a4 + a5 + a6;
1418}
1419
1420static __init int kprobe_trace_self_tests_init(void)
1421{
1422 int ret, warn = 0;
1423 int (*target)(int, int, int, int, int, int);
1424 struct trace_probe *tp;
1425
1426 target = kprobe_trace_selftest_target;
1427
1428 pr_info("Testing kprobe tracing: ");
1429
1430 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1431 "$stack $stack0 +0($stack)");
1432 if (WARN_ON_ONCE(ret)) {
1433 pr_warning("error on probing function entry.\n");
1434 warn++;
1435 } else {
1436 /* Enable trace point */
1437 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1438 if (WARN_ON_ONCE(tp == NULL)) {
1439 pr_warning("error on getting new probe.\n");
1440 warn++;
1441 } else
1442 probe_event_enable(&tp->call);
1443 }
1444
1445 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1446 "$retval");
1447 if (WARN_ON_ONCE(ret)) {
1448 pr_warning("error on probing function return.\n");
1449 warn++;
1450 } else {
1451 /* Enable trace point */
1452 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1453 if (WARN_ON_ONCE(tp == NULL)) {
1454 pr_warning("error on getting new probe.\n");
1455 warn++;
1456 } else
1457 probe_event_enable(&tp->call);
1458 }
1459
1460 if (warn)
1461 goto end;
1462
1463 ret = target(1, 2, 3, 4, 5, 6);
1464
1465 ret = command_trace_probe("-:testprobe");
1466 if (WARN_ON_ONCE(ret)) {
1467 pr_warning("error on deleting a probe.\n");
1468 warn++;
1469 }
1470
1471 ret = command_trace_probe("-:testprobe2");
1472 if (WARN_ON_ONCE(ret)) {
1473 pr_warning("error on deleting a probe.\n");
1474 warn++;
1475 }
1476
1477end:
1478 cleanup_all_probes();
1479 if (warn)
1480 pr_cont("NG: Some tests are failed. Please check them.\n");
1481 else
1482 pr_cont("OK\n");
1483 return 0;
1484}
1485
1486late_initcall(kprobe_trace_self_tests_init);
1487
1488#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..d59cd6879477
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,520 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/slab.h>
27#include <linux/fs.h>
28
29#include "trace_output.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35#include <asm/atomic.h>
36
37/*
38 * For now, let us restrict the no. of symbols traced simultaneously to number
39 * of available hardware breakpoint registers.
40 */
41#define KSYM_TRACER_MAX HBP_NUM
42
43#define KSYM_TRACER_OP_LEN 3 /* rw- */
44
45struct trace_ksym {
46 struct perf_event **ksym_hbp;
47 struct perf_event_attr attr;
48#ifdef CONFIG_PROFILE_KSYM_TRACER
49 atomic64_t counter;
50#endif
51 struct hlist_node ksym_hlist;
52};
53
54static struct trace_array *ksym_trace_array;
55
56static unsigned int ksym_filter_entry_count;
57static unsigned int ksym_tracing_enabled;
58
59static HLIST_HEAD(ksym_filter_head);
60
61static DEFINE_MUTEX(ksym_tracer_mutex);
62
63#ifdef CONFIG_PROFILE_KSYM_TRACER
64
65#define MAX_UL_INT 0xffffffff
66
67void ksym_collect_stats(unsigned long hbp_hit_addr)
68{
69 struct hlist_node *node;
70 struct trace_ksym *entry;
71
72 rcu_read_lock();
73 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
74 if (entry->attr.bp_addr == hbp_hit_addr) {
75 atomic64_inc(&entry->counter);
76 break;
77 }
78 }
79 rcu_read_unlock();
80}
81#endif /* CONFIG_PROFILE_KSYM_TRACER */
82
83void ksym_hbp_handler(struct perf_event *hbp, int nmi,
84 struct perf_sample_data *data,
85 struct pt_regs *regs)
86{
87 struct ring_buffer_event *event;
88 struct ksym_trace_entry *entry;
89 struct ring_buffer *buffer;
90 int pc;
91
92 if (!ksym_tracing_enabled)
93 return;
94
95 buffer = ksym_trace_array->buffer;
96
97 pc = preempt_count();
98
99 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
100 sizeof(*entry), 0, pc);
101 if (!event)
102 return;
103
104 entry = ring_buffer_event_data(event);
105 entry->ip = instruction_pointer(regs);
106 entry->type = hw_breakpoint_type(hbp);
107 entry->addr = hw_breakpoint_addr(hbp);
108 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
109
110#ifdef CONFIG_PROFILE_KSYM_TRACER
111 ksym_collect_stats(hw_breakpoint_addr(hbp));
112#endif /* CONFIG_PROFILE_KSYM_TRACER */
113
114 trace_buffer_unlock_commit(buffer, event, 0, pc);
115}
116
117/* Valid access types are represented as
118 *
119 * rw- : Set Read/Write Access Breakpoint
120 * -w- : Set Write Access Breakpoint
121 * --- : Clear Breakpoints
122 * --x : Set Execution Break points (Not available yet)
123 *
124 */
125static int ksym_trace_get_access_type(char *str)
126{
127 int access = 0;
128
129 if (str[0] == 'r')
130 access |= HW_BREAKPOINT_R;
131
132 if (str[1] == 'w')
133 access |= HW_BREAKPOINT_W;
134
135 if (str[2] == 'x')
136 access |= HW_BREAKPOINT_X;
137
138 switch (access) {
139 case HW_BREAKPOINT_R:
140 case HW_BREAKPOINT_W:
141 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
142 return access;
143 default:
144 return -EINVAL;
145 }
146}
147
148/*
149 * There can be several possible malformed requests and we attempt to capture
150 * all of them. We enumerate some of the rules
151 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
152 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
153 * <module>:<ksym_name>:<op>.
154 * 2. No delimiter symbol ':' in the input string
155 * 3. Spurious operator symbols or symbols not in their respective positions
156 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
157 * 5. Kernel symbol not a part of /proc/kallsyms
158 * 6. Duplicate requests
159 */
160static int parse_ksym_trace_str(char *input_string, char **ksymname,
161 unsigned long *addr)
162{
163 int ret;
164
165 *ksymname = strsep(&input_string, ":");
166 *addr = kallsyms_lookup_name(*ksymname);
167
168 /* Check for malformed request: (2), (1) and (5) */
169 if ((!input_string) ||
170 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
171 (*addr == 0))
172 return -EINVAL;;
173
174 ret = ksym_trace_get_access_type(input_string);
175
176 return ret;
177}
178
179int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
180{
181 struct trace_ksym *entry;
182 int ret = -ENOMEM;
183
184 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
185 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
186 " new requests for tracing can be accepted now.\n",
187 KSYM_TRACER_MAX);
188 return -ENOSPC;
189 }
190
191 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
192 if (!entry)
193 return -ENOMEM;
194
195 hw_breakpoint_init(&entry->attr);
196
197 entry->attr.bp_type = op;
198 entry->attr.bp_addr = addr;
199 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
200
201 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
202 ksym_hbp_handler);
203
204 if (IS_ERR(entry->ksym_hbp)) {
205 ret = PTR_ERR(entry->ksym_hbp);
206 printk(KERN_INFO "ksym_tracer request failed. Try again"
207 " later!!\n");
208 goto err;
209 }
210
211 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
212 ksym_filter_entry_count++;
213
214 return 0;
215
216err:
217 kfree(entry);
218
219 return ret;
220}
221
222static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
223 size_t count, loff_t *ppos)
224{
225 struct trace_ksym *entry;
226 struct hlist_node *node;
227 struct trace_seq *s;
228 ssize_t cnt = 0;
229 int ret;
230
231 s = kmalloc(sizeof(*s), GFP_KERNEL);
232 if (!s)
233 return -ENOMEM;
234 trace_seq_init(s);
235
236 mutex_lock(&ksym_tracer_mutex);
237
238 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
239 ret = trace_seq_printf(s, "%pS:",
240 (void *)(unsigned long)entry->attr.bp_addr);
241 if (entry->attr.bp_type == HW_BREAKPOINT_R)
242 ret = trace_seq_puts(s, "r--\n");
243 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
244 ret = trace_seq_puts(s, "-w-\n");
245 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
246 ret = trace_seq_puts(s, "rw-\n");
247 WARN_ON_ONCE(!ret);
248 }
249
250 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
251
252 mutex_unlock(&ksym_tracer_mutex);
253
254 kfree(s);
255
256 return cnt;
257}
258
259static void __ksym_trace_reset(void)
260{
261 struct trace_ksym *entry;
262 struct hlist_node *node, *node1;
263
264 mutex_lock(&ksym_tracer_mutex);
265 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
266 ksym_hlist) {
267 unregister_wide_hw_breakpoint(entry->ksym_hbp);
268 ksym_filter_entry_count--;
269 hlist_del_rcu(&(entry->ksym_hlist));
270 synchronize_rcu();
271 kfree(entry);
272 }
273 mutex_unlock(&ksym_tracer_mutex);
274}
275
276static ssize_t ksym_trace_filter_write(struct file *file,
277 const char __user *buffer,
278 size_t count, loff_t *ppos)
279{
280 struct trace_ksym *entry;
281 struct hlist_node *node;
282 char *buf, *input_string, *ksymname = NULL;
283 unsigned long ksym_addr = 0;
284 int ret, op, changed = 0;
285
286 buf = kzalloc(count + 1, GFP_KERNEL);
287 if (!buf)
288 return -ENOMEM;
289
290 ret = -EFAULT;
291 if (copy_from_user(buf, buffer, count))
292 goto out;
293
294 buf[count] = '\0';
295 input_string = strstrip(buf);
296
297 /*
298 * Clear all breakpoints if:
299 * 1: echo > ksym_trace_filter
300 * 2: echo 0 > ksym_trace_filter
301 * 3: echo "*:---" > ksym_trace_filter
302 */
303 if (!input_string[0] || !strcmp(input_string, "0") ||
304 !strcmp(input_string, "*:---")) {
305 __ksym_trace_reset();
306 ret = 0;
307 goto out;
308 }
309
310 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
311 if (ret < 0)
312 goto out;
313
314 mutex_lock(&ksym_tracer_mutex);
315
316 ret = -EINVAL;
317 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
318 if (entry->attr.bp_addr == ksym_addr) {
319 /* Check for malformed request: (6) */
320 if (entry->attr.bp_type != op)
321 changed = 1;
322 else
323 goto out_unlock;
324 break;
325 }
326 }
327 if (changed) {
328 unregister_wide_hw_breakpoint(entry->ksym_hbp);
329 entry->attr.bp_type = op;
330 ret = 0;
331 if (op > 0) {
332 entry->ksym_hbp =
333 register_wide_hw_breakpoint(&entry->attr,
334 ksym_hbp_handler);
335 if (IS_ERR(entry->ksym_hbp))
336 ret = PTR_ERR(entry->ksym_hbp);
337 else
338 goto out_unlock;
339 }
340 /* Error or "symbol:---" case: drop it */
341 ksym_filter_entry_count--;
342 hlist_del_rcu(&(entry->ksym_hlist));
343 synchronize_rcu();
344 kfree(entry);
345 goto out_unlock;
346 } else {
347 /* Check for malformed request: (4) */
348 if (op)
349 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
350 }
351out_unlock:
352 mutex_unlock(&ksym_tracer_mutex);
353out:
354 kfree(buf);
355 return !ret ? count : ret;
356}
357
358static const struct file_operations ksym_tracing_fops = {
359 .open = tracing_open_generic,
360 .read = ksym_trace_filter_read,
361 .write = ksym_trace_filter_write,
362};
363
364static void ksym_trace_reset(struct trace_array *tr)
365{
366 ksym_tracing_enabled = 0;
367 __ksym_trace_reset();
368}
369
370static int ksym_trace_init(struct trace_array *tr)
371{
372 int cpu, ret = 0;
373
374 for_each_online_cpu(cpu)
375 tracing_reset(tr, cpu);
376 ksym_tracing_enabled = 1;
377 ksym_trace_array = tr;
378
379 return ret;
380}
381
382static void ksym_trace_print_header(struct seq_file *m)
383{
384 seq_puts(m,
385 "# TASK-PID CPU# Symbol "
386 "Type Function\n");
387 seq_puts(m,
388 "# | | | "
389 " | |\n");
390}
391
392static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
393{
394 struct trace_entry *entry = iter->ent;
395 struct trace_seq *s = &iter->seq;
396 struct ksym_trace_entry *field;
397 char str[KSYM_SYMBOL_LEN];
398 int ret;
399
400 if (entry->type != TRACE_KSYM)
401 return TRACE_TYPE_UNHANDLED;
402
403 trace_assign_type(field, entry);
404
405 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
406 entry->pid, iter->cpu, (char *)field->addr);
407 if (!ret)
408 return TRACE_TYPE_PARTIAL_LINE;
409
410 switch (field->type) {
411 case HW_BREAKPOINT_R:
412 ret = trace_seq_printf(s, " R ");
413 break;
414 case HW_BREAKPOINT_W:
415 ret = trace_seq_printf(s, " W ");
416 break;
417 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
418 ret = trace_seq_printf(s, " RW ");
419 break;
420 default:
421 return TRACE_TYPE_PARTIAL_LINE;
422 }
423
424 if (!ret)
425 return TRACE_TYPE_PARTIAL_LINE;
426
427 sprint_symbol(str, field->ip);
428 ret = trace_seq_printf(s, "%s\n", str);
429 if (!ret)
430 return TRACE_TYPE_PARTIAL_LINE;
431
432 return TRACE_TYPE_HANDLED;
433}
434
435struct tracer ksym_tracer __read_mostly =
436{
437 .name = "ksym_tracer",
438 .init = ksym_trace_init,
439 .reset = ksym_trace_reset,
440#ifdef CONFIG_FTRACE_SELFTEST
441 .selftest = trace_selftest_startup_ksym,
442#endif
443 .print_header = ksym_trace_print_header,
444 .print_line = ksym_trace_output
445};
446
447#ifdef CONFIG_PROFILE_KSYM_TRACER
448static int ksym_profile_show(struct seq_file *m, void *v)
449{
450 struct hlist_node *node;
451 struct trace_ksym *entry;
452 int access_type = 0;
453 char fn_name[KSYM_NAME_LEN];
454
455 seq_puts(m, " Access Type ");
456 seq_puts(m, " Symbol Counter\n");
457 seq_puts(m, " ----------- ");
458 seq_puts(m, " ------ -------\n");
459
460 rcu_read_lock();
461 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
462
463 access_type = entry->attr.bp_type;
464
465 switch (access_type) {
466 case HW_BREAKPOINT_R:
467 seq_puts(m, " R ");
468 break;
469 case HW_BREAKPOINT_W:
470 seq_puts(m, " W ");
471 break;
472 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
473 seq_puts(m, " RW ");
474 break;
475 default:
476 seq_puts(m, " NA ");
477 }
478
479 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
480 seq_printf(m, " %-36s", fn_name);
481 else
482 seq_printf(m, " %-36s", "<NA>");
483 seq_printf(m, " %15llu\n",
484 (unsigned long long)atomic64_read(&entry->counter));
485 }
486 rcu_read_unlock();
487
488 return 0;
489}
490
491static int ksym_profile_open(struct inode *node, struct file *file)
492{
493 return single_open(file, ksym_profile_show, NULL);
494}
495
496static const struct file_operations ksym_profile_fops = {
497 .open = ksym_profile_open,
498 .read = seq_read,
499 .llseek = seq_lseek,
500 .release = single_release,
501};
502#endif /* CONFIG_PROFILE_KSYM_TRACER */
503
504__init static int init_ksym_trace(void)
505{
506 struct dentry *d_tracer;
507
508 d_tracer = tracing_init_dentry();
509
510 trace_create_file("ksym_trace_filter", 0644, d_tracer,
511 NULL, &ksym_tracing_fops);
512
513#ifdef CONFIG_PROFILE_KSYM_TRACER
514 trace_create_file("ksym_profile", 0444, d_tracer,
515 NULL, &ksym_profile_fops);
516#endif
517
518 return register_tracer(&ksym_tracer);
519}
520device_initcall(init_ksym_trace);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0acd834659ed..017fa376505d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/slab.h>
12#include <linux/time.h> 13#include <linux/time.h>
13 14
14#include <asm/atomic.h> 15#include <asm/atomic.h>
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
85 va_list ap; 93 va_list ap;
86 int ret; 94 int ret;
87 95
88 if (!len) 96 if (s->full || !len)
89 return 0; 97 return 0;
90 98
91 va_start(ap, fmt); 99 va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
93 va_end(ap); 101 va_end(ap);
94 102
95 /* If we can't write it all, don't bother writing anything */ 103 /* If we can't write it all, don't bother writing anything */
96 if (ret >= len) 104 if (ret >= len) {
105 s->full = 1;
97 return 0; 106 return 0;
107 }
98 108
99 s->len += ret; 109 s->len += ret;
100 110
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
119 int len = (PAGE_SIZE - 1) - s->len; 129 int len = (PAGE_SIZE - 1) - s->len;
120 int ret; 130 int ret;
121 131
122 if (!len) 132 if (s->full || !len)
123 return 0; 133 return 0;
124 134
125 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 135 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
126 136
127 /* If we can't write it all, don't bother writing anything */ 137 /* If we can't write it all, don't bother writing anything */
128 if (ret >= len) 138 if (ret >= len) {
139 s->full = 1;
129 return 0; 140 return 0;
141 }
130 142
131 s->len += ret; 143 s->len += ret;
132 144
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
139 int len = (PAGE_SIZE - 1) - s->len; 151 int len = (PAGE_SIZE - 1) - s->len;
140 int ret; 152 int ret;
141 153
142 if (!len) 154 if (s->full || !len)
143 return 0; 155 return 0;
144 156
145 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 157 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
146 158
147 /* If we can't write it all, don't bother writing anything */ 159 /* If we can't write it all, don't bother writing anything */
148 if (ret >= len) 160 if (ret >= len) {
161 s->full = 1;
149 return 0; 162 return 0;
163 }
150 164
151 s->len += ret; 165 s->len += ret;
152 166
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
167{ 181{
168 int len = strlen(str); 182 int len = strlen(str);
169 183
170 if (len > ((PAGE_SIZE - 1) - s->len)) 184 if (s->full)
185 return 0;
186
187 if (len > ((PAGE_SIZE - 1) - s->len)) {
188 s->full = 1;
171 return 0; 189 return 0;
190 }
172 191
173 memcpy(s->buffer + s->len, str, len); 192 memcpy(s->buffer + s->len, str, len);
174 s->len += len; 193 s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
178 197
179int trace_seq_putc(struct trace_seq *s, unsigned char c) 198int trace_seq_putc(struct trace_seq *s, unsigned char c)
180{ 199{
181 if (s->len >= (PAGE_SIZE - 1)) 200 if (s->full)
182 return 0; 201 return 0;
183 202
203 if (s->len >= (PAGE_SIZE - 1)) {
204 s->full = 1;
205 return 0;
206 }
207
184 s->buffer[s->len++] = c; 208 s->buffer[s->len++] = c;
185 209
186 return 1; 210 return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
188 212
189int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
190{ 214{
191 if (len > ((PAGE_SIZE - 1) - s->len)) 215 if (s->full)
192 return 0; 216 return 0;
193 217
218 if (len > ((PAGE_SIZE - 1) - s->len)) {
219 s->full = 1;
220 return 0;
221 }
222
194 memcpy(s->buffer + s->len, mem, len); 223 memcpy(s->buffer + s->len, mem, len);
195 s->len += len; 224 s->len += len;
196 225
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
203 const unsigned char *data = mem; 232 const unsigned char *data = mem;
204 int i, j; 233 int i, j;
205 234
235 if (s->full)
236 return 0;
237
206#ifdef __BIG_ENDIAN 238#ifdef __BIG_ENDIAN
207 for (i = 0, j = 0; i < len; i++) { 239 for (i = 0, j = 0; i < len; i++) {
208#else 240#else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
220{ 252{
221 void *ret; 253 void *ret;
222 254
223 if (len > ((PAGE_SIZE - 1) - s->len)) 255 if (s->full)
256 return 0;
257
258 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1;
224 return NULL; 260 return NULL;
261 }
225 262
226 ret = s->buffer + s->len; 263 ret = s->buffer + s->len;
227 s->len += len; 264 s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
233{ 270{
234 unsigned char *p; 271 unsigned char *p;
235 272
236 if (s->len >= (PAGE_SIZE - 1)) 273 if (s->full)
274 return 0;
275
276 if (s->len >= (PAGE_SIZE - 1)) {
277 s->full = 1;
237 return 0; 278 return 0;
279 }
280
238 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 281 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
239 if (!IS_ERR(p)) { 282 if (!IS_ERR(p)) {
240 p = mangle_path(s->buffer + s->len, p, "\n"); 283 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
247 return 1; 290 return 1;
248 } 291 }
249 292
293 s->full = 1;
250 return 0; 294 return 0;
251} 295}
252 296
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
373 unsigned long vmstart = 0; 417 unsigned long vmstart = 0;
374 int ret = 1; 418 int ret = 1;
375 419
420 if (s->full)
421 return 0;
422
376 if (mm) { 423 if (mm) {
377 const struct vm_area_struct *vma; 424 const struct vm_area_struct *vma;
378 425
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int wakeup_current_cpu;
28static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
29static int wakeup_rt; 29static int wakeup_rt;
30 30
31static raw_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void __wakeup_reset(struct trace_array *tr); 34static void __wakeup_reset(struct trace_array *tr);
35 35
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
143 goto out; 143 goto out;
144 144
145 local_irq_save(flags); 145 local_irq_save(flags);
146 __raw_spin_lock(&wakeup_lock); 146 arch_spin_lock(&wakeup_lock);
147 147
148 /* We could race with grabbing wakeup_lock */ 148 /* We could race with grabbing wakeup_lock */
149 if (unlikely(!tracer_enabled || next != wakeup_task)) 149 if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
169 169
170out_unlock: 170out_unlock:
171 __wakeup_reset(wakeup_trace); 171 __wakeup_reset(wakeup_trace);
172 __raw_spin_unlock(&wakeup_lock); 172 arch_spin_unlock(&wakeup_lock);
173 local_irq_restore(flags); 173 local_irq_restore(flags);
174out: 174out:
175 atomic_dec(&wakeup_trace->data[cpu]->disabled); 175 atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
193 tracing_reset_online_cpus(tr); 193 tracing_reset_online_cpus(tr);
194 194
195 local_irq_save(flags); 195 local_irq_save(flags);
196 __raw_spin_lock(&wakeup_lock); 196 arch_spin_lock(&wakeup_lock);
197 __wakeup_reset(tr); 197 __wakeup_reset(tr);
198 __raw_spin_unlock(&wakeup_lock); 198 arch_spin_unlock(&wakeup_lock);
199 local_irq_restore(flags); 199 local_irq_restore(flags);
200} 200}
201 201
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
225 goto out; 225 goto out;
226 226
227 /* interrupts should be off from try_to_wake_up */ 227 /* interrupts should be off from try_to_wake_up */
228 __raw_spin_lock(&wakeup_lock); 228 arch_spin_lock(&wakeup_lock);
229 229
230 /* check for races. */ 230 /* check for races. */
231 if (!tracer_enabled || p->prio >= wakeup_prio) 231 if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 256
257out_locked: 257out_locked:
258 __raw_spin_unlock(&wakeup_lock); 258 arch_spin_unlock(&wakeup_lock);
259out: 259out:
260 atomic_dec(&wakeup_trace->data[cpu]->disabled); 260 atomic_dec(&wakeup_trace->data[cpu]->disabled);
261} 261}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..81003b4d617f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -3,6 +3,7 @@
3#include <linux/stringify.h> 3#include <linux/stringify.h>
4#include <linux/kthread.h> 4#include <linux/kthread.h>
5#include <linux/delay.h> 5#include <linux/delay.h>
6#include <linux/slab.h>
6 7
7static inline int trace_valid_entry(struct trace_entry *entry) 8static inline int trace_valid_entry(struct trace_entry *entry)
8{ 9{
@@ -17,6 +18,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 18 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 19 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 20 case TRACE_HW_BRANCHES:
21 case TRACE_KSYM:
20 return 1; 22 return 1;
21 } 23 }
22 return 0; 24 return 0;
@@ -66,7 +68,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
66 68
67 /* Don't allow flipping of max traces now */ 69 /* Don't allow flipping of max traces now */
68 local_irq_save(flags); 70 local_irq_save(flags);
69 __raw_spin_lock(&ftrace_max_lock); 71 arch_spin_lock(&ftrace_max_lock);
70 72
71 cnt = ring_buffer_entries(tr->buffer); 73 cnt = ring_buffer_entries(tr->buffer);
72 74
@@ -84,7 +86,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
84 break; 86 break;
85 } 87 }
86 tracing_on(); 88 tracing_on();
87 __raw_spin_unlock(&ftrace_max_lock); 89 arch_spin_unlock(&ftrace_max_lock);
88 local_irq_restore(flags); 90 local_irq_restore(flags);
89 91
90 if (count) 92 if (count)
@@ -808,3 +810,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 810 return ret;
809} 811}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 812#endif /* CONFIG_HW_BRANCH_TRACER */
813
814#ifdef CONFIG_KSYM_TRACER
815static int ksym_selftest_dummy;
816
817int
818trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
819{
820 unsigned long count;
821 int ret;
822
823 /* start the tracing */
824 ret = tracer_init(trace, tr);
825 if (ret) {
826 warn_failed_init_tracer(trace, ret);
827 return ret;
828 }
829
830 ksym_selftest_dummy = 0;
831 /* Register the read-write tracing request */
832
833 ret = process_new_ksym_entry("ksym_selftest_dummy",
834 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
835 (unsigned long)(&ksym_selftest_dummy));
836
837 if (ret < 0) {
838 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
839 goto ret_path;
840 }
841 /* Perform a read and a write operation over the dummy variable to
842 * trigger the tracer
843 */
844 if (ksym_selftest_dummy == 0)
845 ksym_selftest_dummy++;
846
847 /* stop the tracing. */
848 tracing_stop();
849 /* check the trace buffer */
850 ret = trace_test_buffer(tr, &count);
851 trace->reset(tr);
852 tracing_start();
853
854 /* read & write operations - one each is performed on the dummy variable
855 * triggering two entries in the trace buffer
856 */
857 if (!ret && count != 2) {
858 printk(KERN_CONT "Ksym tracer startup test failed");
859 ret = -1;
860 }
861
862ret_path:
863 return ret;
864}
865#endif /* CONFIG_KSYM_TRACER */
866
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
27}; 27};
28 28
29static unsigned long max_stack_size; 29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 32
33static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
54 return; 54 return;
55 55
56 local_irq_save(flags); 56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock); 57 arch_spin_lock(&max_stack_lock);
58 58
59 /* a race could have already updated it */ 59 /* a race could have already updated it */
60 if (this_size <= max_stack_size) 60 if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
103 } 103 }
104 104
105 out: 105 out:
106 __raw_spin_unlock(&max_stack_lock); 106 arch_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags); 107 local_irq_restore(flags);
108} 108}
109 109
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock); 175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -206,8 +218,14 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 218
207static void *t_start(struct seq_file *m, loff_t *pos) 219static void *t_start(struct seq_file *m, loff_t *pos)
208{ 220{
221 int cpu;
222
209 local_irq_disable(); 223 local_irq_disable();
210 __raw_spin_lock(&max_stack_lock); 224
225 cpu = smp_processor_id();
226 per_cpu(trace_active, cpu)++;
227
228 arch_spin_lock(&max_stack_lock);
211 229
212 if (*pos == 0) 230 if (*pos == 0)
213 return SEQ_START_TOKEN; 231 return SEQ_START_TOKEN;
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 235
218static void t_stop(struct seq_file *m, void *p) 236static void t_stop(struct seq_file *m, void *p)
219{ 237{
220 __raw_spin_unlock(&max_stack_lock); 238 int cpu;
239
240 arch_spin_unlock(&max_stack_lock);
241
242 cpu = smp_processor_id();
243 per_cpu(trace_active, cpu)--;
244
221 local_irq_enable(); 245 local_irq_enable();
222} 246}
223 247
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index a4bb239eb987..96cffb269e73 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -10,6 +10,7 @@
10 10
11 11
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/slab.h>
13#include <linux/rbtree.h> 14#include <linux/rbtree.h>
14#include <linux/debugfs.h> 15#include <linux/debugfs.h>
15#include "trace_stat.h" 16#include "trace_stat.h"
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 527e17eae575..4d6d711717f2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,6 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/slab.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
4#include <linux/ftrace.h> 5#include <linux/ftrace.h>
5#include <linux/perf_event.h> 6#include <linux/perf_event.h>
@@ -14,6 +15,43 @@ static int sys_refcount_exit;
14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16 17
18extern unsigned long __start_syscalls_metadata[];
19extern unsigned long __stop_syscalls_metadata[];
20
21static struct syscall_metadata **syscalls_metadata;
22
23static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
24{
25 struct syscall_metadata *start;
26 struct syscall_metadata *stop;
27 char str[KSYM_SYMBOL_LEN];
28
29
30 start = (struct syscall_metadata *)__start_syscalls_metadata;
31 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
32 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
33
34 for ( ; start < stop; start++) {
35 /*
36 * Only compare after the "sys" prefix. Archs that use
37 * syscall wrappers may have syscalls symbols aliases prefixed
38 * with "SyS" instead of "sys", leading to an unwanted
39 * mismatch.
40 */
41 if (start->name && !strcmp(start->name + 3, str + 3))
42 return start;
43 }
44 return NULL;
45}
46
47static struct syscall_metadata *syscall_nr_to_meta(int nr)
48{
49 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
50 return NULL;
51
52 return syscalls_metadata[nr];
53}
54
17enum print_line_t 55enum print_line_t
18print_syscall_enter(struct trace_iterator *iter, int flags) 56print_syscall_enter(struct trace_iterator *iter, int flags)
19{ 57{
@@ -30,7 +68,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
30 if (!entry) 68 if (!entry)
31 goto end; 69 goto end;
32 70
33 if (entry->enter_id != ent->type) { 71 if (entry->enter_event->id != ent->type) {
34 WARN_ON_ONCE(1); 72 WARN_ON_ONCE(1);
35 goto end; 73 goto end;
36 } 74 }
@@ -85,7 +123,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
85 return TRACE_TYPE_HANDLED; 123 return TRACE_TYPE_HANDLED;
86 } 124 }
87 125
88 if (entry->exit_id != ent->type) { 126 if (entry->exit_event->id != ent->type) {
89 WARN_ON_ONCE(1); 127 WARN_ON_ONCE(1);
90 return TRACE_TYPE_UNHANDLED; 128 return TRACE_TYPE_UNHANDLED;
91 } 129 }
@@ -103,92 +141,79 @@ extern char *__bad_type_size(void);
103#define SYSCALL_FIELD(type, name) \ 141#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \ 142 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \ 143 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 144 #type, #name, offsetof(typeof(trace), name), \
145 sizeof(trace.name), is_signed_type(type)
107 146
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 147static
148int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
109{ 149{
110 int i; 150 int i;
111 int nr; 151 int pos = 0;
112 int ret;
113 struct syscall_metadata *entry;
114 struct syscall_trace_enter trace;
115 int offset = offsetof(struct syscall_trace_enter, args);
116 152
117 nr = syscall_name_to_nr(call->data); 153 /* When len=0, we just calculate the needed length */
118 entry = syscall_nr_to_meta(nr); 154#define LEN_OR_ZERO (len ? len - pos : 0)
119
120 if (!entry)
121 return 0;
122
123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
124 SYSCALL_FIELD(int, nr));
125 if (!ret)
126 return 0;
127 155
156 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
128 for (i = 0; i < entry->nb_args; i++) { 157 for (i = 0; i < entry->nb_args; i++) {
129 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 158 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
130 entry->args[i]); 159 entry->args[i], sizeof(unsigned long),
131 if (!ret) 160 i == entry->nb_args - 1 ? "" : ", ");
132 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
134 sizeof(unsigned long));
135 if (!ret)
136 return 0;
137 offset += sizeof(unsigned long);
138 } 161 }
162 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
139 163
140 trace_seq_puts(s, "\nprint fmt: \"");
141 for (i = 0; i < entry->nb_args; i++) { 164 for (i = 0; i < entry->nb_args; i++) {
142 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 165 pos += snprintf(buf + pos, LEN_OR_ZERO,
143 sizeof(unsigned long), 166 ", ((unsigned long)(REC->%s))", entry->args[i]);
144 i == entry->nb_args - 1 ? "" : ", ");
145 if (!ret)
146 return 0;
147 } 167 }
148 trace_seq_putc(s, '"');
149 168
150 for (i = 0; i < entry->nb_args; i++) { 169#undef LEN_OR_ZERO
151 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
152 entry->args[i]);
153 if (!ret)
154 return 0;
155 }
156 170
157 return trace_seq_putc(s, '\n'); 171 /* return the length of print_fmt */
172 return pos;
158} 173}
159 174
160int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 175static int set_syscall_print_fmt(struct ftrace_event_call *call)
161{ 176{
162 int ret; 177 char *print_fmt;
163 struct syscall_trace_exit trace; 178 int len;
179 struct syscall_metadata *entry = call->data;
164 180
165 ret = trace_seq_printf(s, 181 if (entry->enter_event != call) {
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 182 call->print_fmt = "\"0x%lx\", REC->ret";
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
168 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(long, ret));
170 if (!ret)
171 return 0; 183 return 0;
184 }
185
186 /* First: called with 0 length to calculate the needed length */
187 len = __set_enter_print_fmt(entry, NULL, 0);
188
189 print_fmt = kmalloc(len + 1, GFP_KERNEL);
190 if (!print_fmt)
191 return -ENOMEM;
192
193 /* Second: actually write the @print_fmt */
194 __set_enter_print_fmt(entry, print_fmt, len + 1);
195 call->print_fmt = print_fmt;
196
197 return 0;
198}
199
200static void free_syscall_print_fmt(struct ftrace_event_call *call)
201{
202 struct syscall_metadata *entry = call->data;
172 203
173 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 204 if (entry->enter_event == call)
205 kfree(call->print_fmt);
174} 206}
175 207
176int syscall_enter_define_fields(struct ftrace_event_call *call) 208int syscall_enter_define_fields(struct ftrace_event_call *call)
177{ 209{
178 struct syscall_trace_enter trace; 210 struct syscall_trace_enter trace;
179 struct syscall_metadata *meta; 211 struct syscall_metadata *meta = call->data;
180 int ret; 212 int ret;
181 int nr;
182 int i; 213 int i;
183 int offset = offsetof(typeof(trace), args); 214 int offset = offsetof(typeof(trace), args);
184 215
185 nr = syscall_name_to_nr(call->data); 216 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
186 meta = syscall_nr_to_meta(nr);
187
188 if (!meta)
189 return 0;
190
191 ret = trace_define_common_fields(call);
192 if (ret) 217 if (ret)
193 return ret; 218 return ret;
194 219
@@ -208,11 +233,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
208 struct syscall_trace_exit trace; 233 struct syscall_trace_exit trace;
209 int ret; 234 int ret;
210 235
211 ret = trace_define_common_fields(call); 236 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
212 if (ret) 237 if (ret)
213 return ret; 238 return ret;
214 239
215 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, 240 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
216 FILTER_OTHER); 241 FILTER_OTHER);
217 242
218 return ret; 243 return ret;
@@ -239,8 +264,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
239 264
240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 265 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
241 266
242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 267 event = trace_current_buffer_lock_reserve(&buffer,
243 size, 0, 0); 268 sys_data->enter_event->id, size, 0, 0);
244 if (!event) 269 if (!event)
245 return; 270 return;
246 271
@@ -271,8 +296,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
271 if (!sys_data) 296 if (!sys_data)
272 return; 297 return;
273 298
274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 299 event = trace_current_buffer_lock_reserve(&buffer,
275 sizeof(*entry), 0, 0); 300 sys_data->exit_event->id, sizeof(*entry), 0, 0);
276 if (!event) 301 if (!event)
277 return; 302 return;
278 303
@@ -285,23 +310,18 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
285 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 310 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
286} 311}
287 312
288int reg_event_syscall_enter(void *ptr) 313int reg_event_syscall_enter(struct ftrace_event_call *call)
289{ 314{
290 int ret = 0; 315 int ret = 0;
291 int num; 316 int num;
292 char *name;
293 317
294 name = (char *)ptr; 318 num = ((struct syscall_metadata *)call->data)->syscall_nr;
295 num = syscall_name_to_nr(name);
296 if (num < 0 || num >= NR_syscalls) 319 if (num < 0 || num >= NR_syscalls)
297 return -ENOSYS; 320 return -ENOSYS;
298 mutex_lock(&syscall_trace_lock); 321 mutex_lock(&syscall_trace_lock);
299 if (!sys_refcount_enter) 322 if (!sys_refcount_enter)
300 ret = register_trace_sys_enter(ftrace_syscall_enter); 323 ret = register_trace_sys_enter(ftrace_syscall_enter);
301 if (ret) { 324 if (!ret) {
302 pr_info("event trace: Could not activate"
303 "syscall entry trace point");
304 } else {
305 set_bit(num, enabled_enter_syscalls); 325 set_bit(num, enabled_enter_syscalls);
306 sys_refcount_enter++; 326 sys_refcount_enter++;
307 } 327 }
@@ -309,13 +329,11 @@ int reg_event_syscall_enter(void *ptr)
309 return ret; 329 return ret;
310} 330}
311 331
312void unreg_event_syscall_enter(void *ptr) 332void unreg_event_syscall_enter(struct ftrace_event_call *call)
313{ 333{
314 int num; 334 int num;
315 char *name;
316 335
317 name = (char *)ptr; 336 num = ((struct syscall_metadata *)call->data)->syscall_nr;
318 num = syscall_name_to_nr(name);
319 if (num < 0 || num >= NR_syscalls) 337 if (num < 0 || num >= NR_syscalls)
320 return; 338 return;
321 mutex_lock(&syscall_trace_lock); 339 mutex_lock(&syscall_trace_lock);
@@ -326,23 +344,18 @@ void unreg_event_syscall_enter(void *ptr)
326 mutex_unlock(&syscall_trace_lock); 344 mutex_unlock(&syscall_trace_lock);
327} 345}
328 346
329int reg_event_syscall_exit(void *ptr) 347int reg_event_syscall_exit(struct ftrace_event_call *call)
330{ 348{
331 int ret = 0; 349 int ret = 0;
332 int num; 350 int num;
333 char *name;
334 351
335 name = (char *)ptr; 352 num = ((struct syscall_metadata *)call->data)->syscall_nr;
336 num = syscall_name_to_nr(name);
337 if (num < 0 || num >= NR_syscalls) 353 if (num < 0 || num >= NR_syscalls)
338 return -ENOSYS; 354 return -ENOSYS;
339 mutex_lock(&syscall_trace_lock); 355 mutex_lock(&syscall_trace_lock);
340 if (!sys_refcount_exit) 356 if (!sys_refcount_exit)
341 ret = register_trace_sys_exit(ftrace_syscall_exit); 357 ret = register_trace_sys_exit(ftrace_syscall_exit);
342 if (ret) { 358 if (!ret) {
343 pr_info("event trace: Could not activate"
344 "syscall exit trace point");
345 } else {
346 set_bit(num, enabled_exit_syscalls); 359 set_bit(num, enabled_exit_syscalls);
347 sys_refcount_exit++; 360 sys_refcount_exit++;
348 } 361 }
@@ -350,13 +363,11 @@ int reg_event_syscall_exit(void *ptr)
350 return ret; 363 return ret;
351} 364}
352 365
353void unreg_event_syscall_exit(void *ptr) 366void unreg_event_syscall_exit(struct ftrace_event_call *call)
354{ 367{
355 int num; 368 int num;
356 char *name;
357 369
358 name = (char *)ptr; 370 num = ((struct syscall_metadata *)call->data)->syscall_nr;
359 num = syscall_name_to_nr(name);
360 if (num < 0 || num >= NR_syscalls) 371 if (num < 0 || num >= NR_syscalls)
361 return; 372 return;
362 mutex_lock(&syscall_trace_lock); 373 mutex_lock(&syscall_trace_lock);
@@ -367,33 +378,73 @@ void unreg_event_syscall_exit(void *ptr)
367 mutex_unlock(&syscall_trace_lock); 378 mutex_unlock(&syscall_trace_lock);
368} 379}
369 380
370struct trace_event event_syscall_enter = { 381int init_syscall_trace(struct ftrace_event_call *call)
371 .trace = print_syscall_enter, 382{
372}; 383 int id;
384
385 if (set_syscall_print_fmt(call) < 0)
386 return -ENOMEM;
373 387
374struct trace_event event_syscall_exit = { 388 id = trace_event_raw_init(call);
375 .trace = print_syscall_exit,
376};
377 389
378#ifdef CONFIG_EVENT_PROFILE 390 if (id < 0) {
391 free_syscall_print_fmt(call);
392 return id;
393 }
379 394
380static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 395 return id;
381static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 396}
382static int sys_prof_refcount_enter; 397
383static int sys_prof_refcount_exit; 398unsigned long __init arch_syscall_addr(int nr)
399{
400 return (unsigned long)sys_call_table[nr];
401}
384 402
385static void prof_syscall_enter(struct pt_regs *regs, long id) 403int __init init_ftrace_syscalls(void)
404{
405 struct syscall_metadata *meta;
406 unsigned long addr;
407 int i;
408
409 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
410 NR_syscalls, GFP_KERNEL);
411 if (!syscalls_metadata) {
412 WARN_ON(1);
413 return -ENOMEM;
414 }
415
416 for (i = 0; i < NR_syscalls; i++) {
417 addr = arch_syscall_addr(i);
418 meta = find_syscall_meta(addr);
419 if (!meta)
420 continue;
421
422 meta->syscall_nr = i;
423 syscalls_metadata[i] = meta;
424 }
425
426 return 0;
427}
428core_initcall(init_ftrace_syscalls);
429
430#ifdef CONFIG_PERF_EVENTS
431
432static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
433static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
434static int sys_perf_refcount_enter;
435static int sys_perf_refcount_exit;
436
437static void perf_syscall_enter(struct pt_regs *regs, long id)
386{ 438{
387 struct syscall_metadata *sys_data; 439 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec; 440 struct syscall_trace_enter *rec;
389 unsigned long flags; 441 unsigned long flags;
390 char *raw_data;
391 int syscall_nr; 442 int syscall_nr;
443 int rctx;
392 int size; 444 int size;
393 int cpu;
394 445
395 syscall_nr = syscall_get_nr(current, regs); 446 syscall_nr = syscall_get_nr(current, regs);
396 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 447 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
397 return; 448 return;
398 449
399 sys_data = syscall_nr_to_meta(syscall_nr); 450 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -405,91 +456,67 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
405 size = ALIGN(size + sizeof(u32), sizeof(u64)); 456 size = ALIGN(size + sizeof(u32), sizeof(u64));
406 size -= sizeof(u32); 457 size -= sizeof(u32);
407 458
408 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 459 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
409 "profile buffer not large enough")) 460 "perf buffer not large enough"))
410 return; 461 return;
411 462
412 /* Protect the per cpu buffer, begin the rcu read side */ 463 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
413 local_irq_save(flags); 464 sys_data->enter_event->id, &rctx, &flags);
414 465 if (!rec)
415 cpu = smp_processor_id(); 466 return;
416
417 if (in_nmi())
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421
422 if (!raw_data)
423 goto end;
424
425 raw_data = per_cpu_ptr(raw_data, cpu);
426
427 /* zero the dead bytes from align to not leak stack to user */
428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
429 467
430 rec = (struct syscall_trace_enter *) raw_data;
431 tracing_generic_entry_update(&rec->ent, 0, 0);
432 rec->ent.type = sys_data->enter_id;
433 rec->nr = syscall_nr; 468 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 469 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args); 470 (unsigned long *)&rec->args);
436 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 471 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
437
438end:
439 local_irq_restore(flags);
440} 472}
441 473
442int reg_prof_syscall_enter(char *name) 474int perf_sysenter_enable(struct ftrace_event_call *call)
443{ 475{
444 int ret = 0; 476 int ret = 0;
445 int num; 477 int num;
446 478
447 num = syscall_name_to_nr(name); 479 num = ((struct syscall_metadata *)call->data)->syscall_nr;
448 if (num < 0 || num >= NR_syscalls)
449 return -ENOSYS;
450 480
451 mutex_lock(&syscall_trace_lock); 481 mutex_lock(&syscall_trace_lock);
452 if (!sys_prof_refcount_enter) 482 if (!sys_perf_refcount_enter)
453 ret = register_trace_sys_enter(prof_syscall_enter); 483 ret = register_trace_sys_enter(perf_syscall_enter);
454 if (ret) { 484 if (ret) {
455 pr_info("event trace: Could not activate" 485 pr_info("event trace: Could not activate"
456 "syscall entry trace point"); 486 "syscall entry trace point");
457 } else { 487 } else {
458 set_bit(num, enabled_prof_enter_syscalls); 488 set_bit(num, enabled_perf_enter_syscalls);
459 sys_prof_refcount_enter++; 489 sys_perf_refcount_enter++;
460 } 490 }
461 mutex_unlock(&syscall_trace_lock); 491 mutex_unlock(&syscall_trace_lock);
462 return ret; 492 return ret;
463} 493}
464 494
465void unreg_prof_syscall_enter(char *name) 495void perf_sysenter_disable(struct ftrace_event_call *call)
466{ 496{
467 int num; 497 int num;
468 498
469 num = syscall_name_to_nr(name); 499 num = ((struct syscall_metadata *)call->data)->syscall_nr;
470 if (num < 0 || num >= NR_syscalls)
471 return;
472 500
473 mutex_lock(&syscall_trace_lock); 501 mutex_lock(&syscall_trace_lock);
474 sys_prof_refcount_enter--; 502 sys_perf_refcount_enter--;
475 clear_bit(num, enabled_prof_enter_syscalls); 503 clear_bit(num, enabled_perf_enter_syscalls);
476 if (!sys_prof_refcount_enter) 504 if (!sys_perf_refcount_enter)
477 unregister_trace_sys_enter(prof_syscall_enter); 505 unregister_trace_sys_enter(perf_syscall_enter);
478 mutex_unlock(&syscall_trace_lock); 506 mutex_unlock(&syscall_trace_lock);
479} 507}
480 508
481static void prof_syscall_exit(struct pt_regs *regs, long ret) 509static void perf_syscall_exit(struct pt_regs *regs, long ret)
482{ 510{
483 struct syscall_metadata *sys_data; 511 struct syscall_metadata *sys_data;
484 struct syscall_trace_exit *rec; 512 struct syscall_trace_exit *rec;
485 unsigned long flags; 513 unsigned long flags;
486 int syscall_nr; 514 int syscall_nr;
487 char *raw_data; 515 int rctx;
488 int size; 516 int size;
489 int cpu;
490 517
491 syscall_nr = syscall_get_nr(current, regs); 518 syscall_nr = syscall_get_nr(current, regs);
492 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 519 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
493 return; 520 return;
494 521
495 sys_data = syscall_nr_to_meta(syscall_nr); 522 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -504,79 +531,55 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
504 * Impossible, but be paranoid with the future 531 * Impossible, but be paranoid with the future
505 * How to put this check outside runtime? 532 * How to put this check outside runtime?
506 */ 533 */
507 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 534 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
508 "exit event has grown above profile buffer size")) 535 "exit event has grown above perf buffer size"))
509 return; 536 return;
510 537
511 /* Protect the per cpu buffer, begin the rcu read side */ 538 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
512 local_irq_save(flags); 539 sys_data->exit_event->id, &rctx, &flags);
513 cpu = smp_processor_id(); 540 if (!rec)
514 541 return;
515 if (in_nmi())
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519
520 if (!raw_data)
521 goto end;
522
523 raw_data = per_cpu_ptr(raw_data, cpu);
524
525 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
527
528 rec = (struct syscall_trace_exit *)raw_data;
529 542
530 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id;
532 rec->nr = syscall_nr; 543 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs); 544 rec->ret = syscall_get_return_value(current, regs);
534 545
535 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 546 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
536
537end:
538 local_irq_restore(flags);
539} 547}
540 548
541int reg_prof_syscall_exit(char *name) 549int perf_sysexit_enable(struct ftrace_event_call *call)
542{ 550{
543 int ret = 0; 551 int ret = 0;
544 int num; 552 int num;
545 553
546 num = syscall_name_to_nr(name); 554 num = ((struct syscall_metadata *)call->data)->syscall_nr;
547 if (num < 0 || num >= NR_syscalls)
548 return -ENOSYS;
549 555
550 mutex_lock(&syscall_trace_lock); 556 mutex_lock(&syscall_trace_lock);
551 if (!sys_prof_refcount_exit) 557 if (!sys_perf_refcount_exit)
552 ret = register_trace_sys_exit(prof_syscall_exit); 558 ret = register_trace_sys_exit(perf_syscall_exit);
553 if (ret) { 559 if (ret) {
554 pr_info("event trace: Could not activate" 560 pr_info("event trace: Could not activate"
555 "syscall entry trace point"); 561 "syscall exit trace point");
556 } else { 562 } else {
557 set_bit(num, enabled_prof_exit_syscalls); 563 set_bit(num, enabled_perf_exit_syscalls);
558 sys_prof_refcount_exit++; 564 sys_perf_refcount_exit++;
559 } 565 }
560 mutex_unlock(&syscall_trace_lock); 566 mutex_unlock(&syscall_trace_lock);
561 return ret; 567 return ret;
562} 568}
563 569
564void unreg_prof_syscall_exit(char *name) 570void perf_sysexit_disable(struct ftrace_event_call *call)
565{ 571{
566 int num; 572 int num;
567 573
568 num = syscall_name_to_nr(name); 574 num = ((struct syscall_metadata *)call->data)->syscall_nr;
569 if (num < 0 || num >= NR_syscalls)
570 return;
571 575
572 mutex_lock(&syscall_trace_lock); 576 mutex_lock(&syscall_trace_lock);
573 sys_prof_refcount_exit--; 577 sys_perf_refcount_exit--;
574 clear_bit(num, enabled_prof_exit_syscalls); 578 clear_bit(num, enabled_perf_exit_syscalls);
575 if (!sys_prof_refcount_exit) 579 if (!sys_perf_refcount_exit)
576 unregister_trace_sys_exit(prof_syscall_exit); 580 unregister_trace_sys_exit(perf_syscall_exit);
577 mutex_unlock(&syscall_trace_lock); 581 mutex_unlock(&syscall_trace_lock);
578} 582}
579 583
580#endif 584#endif /* CONFIG_PERF_EVENTS */
581
582 585
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index f6693969287d..a7974a552ca9 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = {
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
95 .address = backtrace_address, 95 .address = backtrace_address,
96 .walk_stack = print_context_stack,
96}; 97};
97 98
98static int 99static int
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 40cafb07dffd..cc2d2faa7d9e 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/slab.h>
12#include <linux/kref.h> 13#include <linux/kref.h>
13#include "trace_stat.h" 14#include "trace_stat.h"
14#include "trace.h" 15#include "trace.h"