aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-01-04 19:17:33 -0500
committerTejun Heo <tj@kernel.org>2010-01-04 19:17:33 -0500
commit32032df6c2f6c9c6b2ada2ce42322231824f70c2 (patch)
treeb1ce838a37044bb38dfc128e2116ca35630e629a /kernel/trace
parent22b737f4c75197372d64afc6ed1bccd58c00e549 (diff)
parentc5974b835a909ff15c3b7e6cf6789b5eb919f419 (diff)
Merge branch 'master' into percpu
Conflicts: arch/powerpc/platforms/pseries/hvCall.S include/linux/percpu.h
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig140
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c39
-rw-r--r--kernel/trace/ftrace.c440
-rw-r--r--kernel/trace/kmemtrace.c2
-rw-r--r--kernel/trace/power-traces.c2
-rw-r--r--kernel/trace/ring_buffer.c81
-rw-r--r--kernel/trace/ring_buffer_benchmark.c85
-rw-r--r--kernel/trace/trace.c316
-rw-r--r--kernel/trace/trace.h105
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_clock.c16
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_event_profile.c56
-rw-r--r--kernel/trace/trace_events.c228
-rw-r--r--kernel/trace/trace_events_filter.c426
-rw-r--r--kernel/trace/trace_export.c54
-rw-r--r--kernel/trace/trace_functions_graph.c165
-rw-r--r--kernel/trace/trace_hw_branches.c8
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c1553
-rw-r--r--kernel/trace/trace_ksym.c519
-rw-r--r--kernel/trace/trace_output.c98
-rw-r--r--kernel/trace/trace_sched_wakeup.c16
-rw-r--r--kernel/trace/trace_selftest.c59
-rw-r--r--kernel/trace/trace_stack.c16
-rw-r--r--kernel/trace/trace_syscalls.c237
-rw-r--r--kernel/trace/trace_sysprof.c1
28 files changed, 3787 insertions, 903 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..6c22d8a2f289 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,17 +12,17 @@ config NOP_TRACER
12config HAVE_FTRACE_NMI_ENTER 12config HAVE_FTRACE_NMI_ENTER
13 bool 13 bool
14 help 14 help
15 See Documentation/trace/ftrace-implementation.txt 15 See Documentation/trace/ftrace-design.txt
16 16
17config HAVE_FUNCTION_TRACER 17config HAVE_FUNCTION_TRACER
18 bool 18 bool
19 help 19 help
20 See Documentation/trace/ftrace-implementation.txt 20 See Documentation/trace/ftrace-design.txt
21 21
22config HAVE_FUNCTION_GRAPH_TRACER 22config HAVE_FUNCTION_GRAPH_TRACER
23 bool 23 bool
24 help 24 help
25 See Documentation/trace/ftrace-implementation.txt 25 See Documentation/trace/ftrace-design.txt
26 26
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
@@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 34config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 35 bool
36 help 36 help
37 See Documentation/trace/ftrace-implementation.txt 37 See Documentation/trace/ftrace-design.txt
38 38
39config HAVE_DYNAMIC_FTRACE 39config HAVE_DYNAMIC_FTRACE
40 bool 40 bool
41 help 41 help
42 See Documentation/trace/ftrace-implementation.txt 42 See Documentation/trace/ftrace-design.txt
43 43
44config HAVE_FTRACE_MCOUNT_RECORD 44config HAVE_FTRACE_MCOUNT_RECORD
45 bool 45 bool
46 help 46 help
47 See Documentation/trace/ftrace-implementation.txt 47 See Documentation/trace/ftrace-design.txt
48 48
49config HAVE_HW_BRANCH_TRACER 49config HAVE_HW_BRANCH_TRACER
50 bool 50 bool
@@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER
52config HAVE_SYSCALL_TRACEPOINTS 52config HAVE_SYSCALL_TRACEPOINTS
53 bool 53 bool
54 help 54 help
55 See Documentation/trace/ftrace-implementation.txt 55 See Documentation/trace/ftrace-design.txt
56 56
57config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
58 bool 58 bool
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP
83# This allows those options to appear when no other tracer is selected. But the 83# This allows those options to appear when no other tracer is selected. But the
84# options do not appear when something else selects it. We need the two options 84# options do not appear when something else selects it. We need the two options
85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
86# hidding of the automatic options. 86# hiding of the automatic options.
87 87
88config TRACING 88config TRACING
89 bool 89 bool
@@ -119,7 +119,7 @@ menuconfig FTRACE
119 bool "Tracers" 119 bool "Tracers"
120 default y if DEBUG_KERNEL 120 default y if DEBUG_KERNEL
121 help 121 help
122 Enable the kernel tracing infrastructure. 122 Enable the kernel tracing infrastructure.
123 123
124if FTRACE 124if FTRACE
125 125
@@ -133,7 +133,7 @@ config FUNCTION_TRACER
133 help 133 help
134 Enable the kernel to trace every kernel function. This is done 134 Enable the kernel to trace every kernel function. This is done
135 by using a compiler feature to insert a small, 5-byte No-Operation 135 by using a compiler feature to insert a small, 5-byte No-Operation
136 instruction to the beginning of every kernel function, which NOP 136 instruction at the beginning of every kernel function, which NOP
137 sequence is then dynamically patched into a tracer call when 137 sequence is then dynamically patched into a tracer call when
138 tracing is enabled by the administrator. If it's runtime disabled 138 tracing is enabled by the administrator. If it's runtime disabled
139 (the bootup default), then the overhead of the instructions is very 139 (the bootup default), then the overhead of the instructions is very
@@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER
150 and its entry. 150 and its entry.
151 Its first purpose is to trace the duration of functions and 151 Its first purpose is to trace the duration of functions and
152 draw a call graph for each thread with some information like 152 draw a call graph for each thread with some information like
153 the return value. This is done by setting the current return 153 the return value. This is done by setting the current return
154 address on the current task structure into a stack of calls. 154 address on the current task structure into a stack of calls.
155 155
156 156
@@ -173,7 +173,7 @@ config IRQSOFF_TRACER
173 173
174 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 174 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
175 175
176 (Note that kernel size and overhead increases with this option 176 (Note that kernel size and overhead increase with this option
177 enabled. This option and the preempt-off timing option can be 177 enabled. This option and the preempt-off timing option can be
178 used together or separately.) 178 used together or separately.)
179 179
@@ -186,7 +186,7 @@ config PREEMPT_TRACER
186 select TRACER_MAX_TRACE 186 select TRACER_MAX_TRACE
187 select RING_BUFFER_ALLOW_SWAP 187 select RING_BUFFER_ALLOW_SWAP
188 help 188 help
189 This option measures the time spent in preemption off critical 189 This option measures the time spent in preemption-off critical
190 sections, with microsecond accuracy. 190 sections, with microsecond accuracy.
191 191
192 The default measurement method is a maximum search, which is 192 The default measurement method is a maximum search, which is
@@ -195,7 +195,7 @@ config PREEMPT_TRACER
195 195
196 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 196 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
197 197
198 (Note that kernel size and overhead increases with this option 198 (Note that kernel size and overhead increase with this option
199 enabled. This option and the irqs-off timing option can be 199 enabled. This option and the irqs-off timing option can be
200 used together or separately.) 200 used together or separately.)
201 201
@@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS
222 depends on !GENERIC_TRACER 222 depends on !GENERIC_TRACER
223 select TRACING 223 select TRACING
224 help 224 help
225 This tracer hooks to various trace points in the kernel 225 This tracer hooks to various trace points in the kernel,
226 allowing the user to pick and choose which trace point they 226 allowing the user to pick and choose which trace point they
227 want to trace. It also includes the sched_switch tracer plugin. 227 want to trace. It also includes the sched_switch tracer plugin.
228 228
@@ -265,19 +265,19 @@ choice
265 The likely/unlikely profiler only looks at the conditions that 265 The likely/unlikely profiler only looks at the conditions that
266 are annotated with a likely or unlikely macro. 266 are annotated with a likely or unlikely macro.
267 267
268 The "all branch" profiler will profile every if statement in the 268 The "all branch" profiler will profile every if-statement in the
269 kernel. This profiler will also enable the likely/unlikely 269 kernel. This profiler will also enable the likely/unlikely
270 profiler as well. 270 profiler.
271 271
272 Either of the above profilers add a bit of overhead to the system. 272 Either of the above profilers adds a bit of overhead to the system.
273 If unsure choose "No branch profiling". 273 If unsure, choose "No branch profiling".
274 274
275config BRANCH_PROFILE_NONE 275config BRANCH_PROFILE_NONE
276 bool "No branch profiling" 276 bool "No branch profiling"
277 help 277 help
278 No branch profiling. Branch profiling adds a bit of overhead. 278 No branch profiling. Branch profiling adds a bit of overhead.
279 Only enable it if you want to analyse the branching behavior. 279 Only enable it if you want to analyse the branching behavior.
280 Otherwise keep it disabled. 280 Otherwise keep it disabled.
281 281
282config PROFILE_ANNOTATED_BRANCHES 282config PROFILE_ANNOTATED_BRANCHES
283 bool "Trace likely/unlikely profiler" 283 bool "Trace likely/unlikely profiler"
@@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES
288 288
289 /sys/kernel/debug/tracing/profile_annotated_branch 289 /sys/kernel/debug/tracing/profile_annotated_branch
290 290
291 Note: this will add a significant overhead, only turn this 291 Note: this will add a significant overhead; only turn this
292 on if you need to profile the system's use of these macros. 292 on if you need to profile the system's use of these macros.
293 293
294config PROFILE_ALL_BRANCHES 294config PROFILE_ALL_BRANCHES
@@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES
305 305
306 This configuration, when enabled, will impose a great overhead 306 This configuration, when enabled, will impose a great overhead
307 on the system. This should only be enabled when the system 307 on the system. This should only be enabled when the system
308 is to be analyzed 308 is to be analyzed in much detail.
309endchoice 309endchoice
310 310
311config TRACING_BRANCHES 311config TRACING_BRANCHES
@@ -335,10 +335,31 @@ config POWER_TRACER
335 depends on X86 335 depends on X86
336 select GENERIC_TRACER 336 select GENERIC_TRACER
337 help 337 help
338 This tracer helps developers to analyze and optimize the kernels 338 This tracer helps developers to analyze and optimize the kernel's
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
342 363
343config STACK_TRACER 364config STACK_TRACER
344 bool "Trace max stack" 365 bool "Trace max stack"
@@ -370,14 +391,14 @@ config HW_BRANCH_TRACER
370 select GENERIC_TRACER 391 select GENERIC_TRACER
371 help 392 help
372 This tracer records all branches on the system in a circular 393 This tracer records all branches on the system in a circular
373 buffer giving access to the last N branches for each cpu. 394 buffer, giving access to the last N branches for each cpu.
374 395
375config KMEMTRACE 396config KMEMTRACE
376 bool "Trace SLAB allocations" 397 bool "Trace SLAB allocations"
377 select GENERIC_TRACER 398 select GENERIC_TRACER
378 help 399 help
379 kmemtrace provides tracing for slab allocator functions, such as 400 kmemtrace provides tracing for slab allocator functions, such as
380 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected 401 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
381 data is then fed to the userspace application in order to analyse 402 data is then fed to the userspace application in order to analyse
382 allocation hotspots, internal fragmentation and so on, making it 403 allocation hotspots, internal fragmentation and so on, making it
383 possible to see how well an allocator performs, as well as debug 404 possible to see how well an allocator performs, as well as debug
@@ -396,15 +417,15 @@ config WORKQUEUE_TRACER
396 bool "Trace workqueues" 417 bool "Trace workqueues"
397 select GENERIC_TRACER 418 select GENERIC_TRACER
398 help 419 help
399 The workqueue tracer provides some statistical informations 420 The workqueue tracer provides some statistical information
400 about each cpu workqueue thread such as the number of the 421 about each cpu workqueue thread such as the number of the
401 works inserted and executed since their creation. It can help 422 works inserted and executed since their creation. It can help
402 to evaluate the amount of work each of them have to perform. 423 to evaluate the amount of work each of them has to perform.
403 For example it can help a developer to decide whether he should 424 For example it can help a developer to decide whether he should
404 choose a per cpu workqueue instead of a singlethreaded one. 425 choose a per-cpu workqueue instead of a singlethreaded one.
405 426
406config BLK_DEV_IO_TRACE 427config BLK_DEV_IO_TRACE
407 bool "Support for tracing block io actions" 428 bool "Support for tracing block IO actions"
408 depends on SYSFS 429 depends on SYSFS
409 depends on BLOCK 430 depends on BLOCK
410 select RELAY 431 select RELAY
@@ -428,38 +449,55 @@ config BLK_DEV_IO_TRACE
428 449
429 If unsure, say N. 450 If unsure, say N.
430 451
452config KPROBE_EVENT
453 depends on KPROBES
454 depends on X86
455 bool "Enable kprobes-based dynamic events"
456 select TRACING
457 default y
458 help
459 This allows the user to add tracing events (similar to tracepoints)
460 on the fly via the ftrace interface. See
461 Documentation/trace/kprobetrace.txt for more details.
462
463 Those events can be inserted wherever kprobes can probe, and record
464 various register and memory values.
465
466 This option is also required by perf-probe subcommand of perf tools.
467 If you want to use perf tools, this option is strongly recommended.
468
431config DYNAMIC_FTRACE 469config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 470 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 471 depends on FUNCTION_TRACER
434 depends on HAVE_DYNAMIC_FTRACE 472 depends on HAVE_DYNAMIC_FTRACE
435 default y 473 default y
436 help 474 help
437 This option will modify all the calls to ftrace dynamically 475 This option will modify all the calls to ftrace dynamically
438 (will patch them out of the binary image and replaces them 476 (will patch them out of the binary image and replace them
439 with a No-Op instruction) as they are called. A table is 477 with a No-Op instruction) as they are called. A table is
440 created to dynamically enable them again. 478 created to dynamically enable them again.
441 479
442 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise 480 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
443 has native performance as long as no tracing is active. 481 otherwise has native performance as long as no tracing is active.
444 482
445 The changes to the code are done by a kernel thread that 483 The changes to the code are done by a kernel thread that
446 wakes up once a second and checks to see if any ftrace calls 484 wakes up once a second and checks to see if any ftrace calls
447 were made. If so, it runs stop_machine (stops all CPUS) 485 were made. If so, it runs stop_machine (stops all CPUS)
448 and modifies the code to jump over the call to ftrace. 486 and modifies the code to jump over the call to ftrace.
449 487
450config FUNCTION_PROFILER 488config FUNCTION_PROFILER
451 bool "Kernel function profiler" 489 bool "Kernel function profiler"
452 depends on FUNCTION_TRACER 490 depends on FUNCTION_TRACER
453 default n 491 default n
454 help 492 help
455 This option enables the kernel function profiler. A file is created 493 This option enables the kernel function profiler. A file is created
456 in debugfs called function_profile_enabled which defaults to zero. 494 in debugfs called function_profile_enabled which defaults to zero.
457 When a 1 is echoed into this file profiling begins, and when a 495 When a 1 is echoed into this file profiling begins, and when a
458 zero is entered, profiling stops. A file in the trace_stats 496 zero is entered, profiling stops. A "functions" file is created in
459 directory called functions, that show the list of functions that 497 the trace_stats directory; this file shows the list of functions that
460 have been hit and their counters. 498 have been hit and their counters.
461 499
462 If in doubt, say N 500 If in doubt, say N.
463 501
464config FTRACE_MCOUNT_RECORD 502config FTRACE_MCOUNT_RECORD
465 def_bool y 503 def_bool y
@@ -518,8 +556,8 @@ config RING_BUFFER_BENCHMARK
518 tristate "Ring buffer benchmark stress tester" 556 tristate "Ring buffer benchmark stress tester"
519 depends on RING_BUFFER 557 depends on RING_BUFFER
520 help 558 help
521 This option creates a test to stress the ring buffer and bench mark it. 559 This option creates a test to stress the ring buffer and benchmark it.
522 It creates its own ring buffer such that it will not interfer with 560 It creates its own ring buffer such that it will not interfere with
523 any other users of the ring buffer (such as ftrace). It then creates 561 any other users of the ring buffer (such as ftrace). It then creates
524 a producer and consumer that will run for 10 seconds and sleep for 562 a producer and consumer that will run for 10 seconds and sleep for
525 10 seconds. Each interval it will print out the number of events 563 10 seconds. Each interval it will print out the number of events
@@ -528,7 +566,7 @@ config RING_BUFFER_BENCHMARK
528 It does not disable interrupts or raise its priority, so it may be 566 It does not disable interrupts or raise its priority, so it may be
529 affected by processes that are running. 567 affected by processes that are running.
530 568
531 If unsure, say N 569 If unsure, say N.
532 570
533endif # FTRACE 571endif # FTRACE
534 572
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..cd9ecd89ec77 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,8 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 58obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 59
58libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 3eb159c277c8..d9d6206e0b14 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -856,6 +856,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
856} 856}
857 857
858/** 858/**
859 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
860 * @q: queue the io is for
861 * @rq: the source request
862 * @dev: target device
863 * @from: source sector
864 *
865 * Description:
866 * Device mapper remaps request to other devices.
867 * Add a trace for that action.
868 *
869 **/
870static void blk_add_trace_rq_remap(struct request_queue *q,
871 struct request *rq, dev_t dev,
872 sector_t from)
873{
874 struct blk_trace *bt = q->blk_trace;
875 struct blk_io_trace_remap r;
876
877 if (likely(!bt))
878 return;
879
880 r.device_from = cpu_to_be32(dev);
881 r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
882 r.sector_from = cpu_to_be64(from);
883
884 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
885 rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
886 sizeof(r), &r);
887}
888
889/**
859 * blk_add_driver_data - Add binary message with driver-specific data 890 * blk_add_driver_data - Add binary message with driver-specific data
860 * @q: queue the io is for 891 * @q: queue the io is for
861 * @rq: io request 892 * @rq: io request
@@ -922,10 +953,13 @@ static void blk_register_tracepoints(void)
922 WARN_ON(ret); 953 WARN_ON(ret);
923 ret = register_trace_block_remap(blk_add_trace_remap); 954 ret = register_trace_block_remap(blk_add_trace_remap);
924 WARN_ON(ret); 955 WARN_ON(ret);
956 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap);
957 WARN_ON(ret);
925} 958}
926 959
927static void blk_unregister_tracepoints(void) 960static void blk_unregister_tracepoints(void)
928{ 961{
962 unregister_trace_block_rq_remap(blk_add_trace_rq_remap);
929 unregister_trace_block_remap(blk_add_trace_remap); 963 unregister_trace_block_remap(blk_add_trace_remap);
930 unregister_trace_block_split(blk_add_trace_split); 964 unregister_trace_block_split(blk_add_trace_split);
931 unregister_trace_block_unplug_io(blk_add_trace_unplug_io); 965 unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
@@ -1657,6 +1691,11 @@ int blk_trace_init_sysfs(struct device *dev)
1657 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); 1691 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
1658} 1692}
1659 1693
1694void blk_trace_remove_sysfs(struct device *dev)
1695{
1696 sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
1697}
1698
1660#endif /* CONFIG_BLK_DEV_IO_TRACE */ 1699#endif /* CONFIG_BLK_DEV_IO_TRACE */
1661 1700
1662#ifdef CONFIG_EVENT_TRACING 1701#ifdef CONFIG_EVENT_TRACING
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 46592feab5a6..7968762c8167 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -60,6 +60,13 @@ static int last_ftrace_enabled;
60/* Quick disabling of function tracer. */ 60/* Quick disabling of function tracer. */
61int function_trace_stop; 61int function_trace_stop;
62 62
63/* List for set_ftrace_pid's pids. */
64LIST_HEAD(ftrace_pids);
65struct ftrace_pid {
66 struct list_head list;
67 struct pid *pid;
68};
69
63/* 70/*
64 * ftrace_disabled is set when an anomaly is discovered. 71 * ftrace_disabled is set when an anomaly is discovered.
65 * ftrace_disabled is much stronger than ftrace_enabled. 72 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -78,6 +85,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
78ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
79ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
80 87
88#ifdef CONFIG_FUNCTION_GRAPH_TRACER
89static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
90#endif
91
81static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 92static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
82{ 93{
83 struct ftrace_ops *op = ftrace_list; 94 struct ftrace_ops *op = ftrace_list;
@@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 else 166 else
156 func = ftrace_list_func; 167 func = ftrace_list_func;
157 168
158 if (ftrace_pid_trace) { 169 if (!list_empty(&ftrace_pids)) {
159 set_ftrace_pid_function(func); 170 set_ftrace_pid_function(func);
160 func = ftrace_pid_func; 171 func = ftrace_pid_func;
161 } 172 }
@@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
203 if (ftrace_list->next == &ftrace_list_end) { 214 if (ftrace_list->next == &ftrace_list_end) {
204 ftrace_func_t func = ftrace_list->func; 215 ftrace_func_t func = ftrace_list->func;
205 216
206 if (ftrace_pid_trace) { 217 if (!list_empty(&ftrace_pids)) {
207 set_ftrace_pid_function(func); 218 set_ftrace_pid_function(func);
208 func = ftrace_pid_func; 219 func = ftrace_pid_func;
209 } 220 }
@@ -225,9 +236,13 @@ static void ftrace_update_pid_func(void)
225 if (ftrace_trace_function == ftrace_stub) 236 if (ftrace_trace_function == ftrace_stub)
226 return; 237 return;
227 238
239#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
228 func = ftrace_trace_function; 240 func = ftrace_trace_function;
241#else
242 func = __ftrace_trace_function;
243#endif
229 244
230 if (ftrace_pid_trace) { 245 if (!list_empty(&ftrace_pids)) {
231 set_ftrace_pid_function(func); 246 set_ftrace_pid_function(func);
232 func = ftrace_pid_func; 247 func = ftrace_pid_func;
233 } else { 248 } else {
@@ -736,7 +751,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
736 out: 751 out:
737 mutex_unlock(&ftrace_profile_lock); 752 mutex_unlock(&ftrace_profile_lock);
738 753
739 filp->f_pos += cnt; 754 *ppos += cnt;
740 755
741 return cnt; 756 return cnt;
742} 757}
@@ -817,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
817} 832}
818#endif /* CONFIG_FUNCTION_PROFILER */ 833#endif /* CONFIG_FUNCTION_PROFILER */
819 834
820/* set when tracing only a pid */
821struct pid *ftrace_pid_trace;
822static struct pid * const ftrace_swapper_pid = &init_struct_pid; 835static struct pid * const ftrace_swapper_pid = &init_struct_pid;
823 836
824#ifdef CONFIG_DYNAMIC_FTRACE 837#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1074,14 +1087,9 @@ static void ftrace_replace_code(int enable)
1074 failed = __ftrace_replace_code(rec, enable); 1087 failed = __ftrace_replace_code(rec, enable);
1075 if (failed) { 1088 if (failed) {
1076 rec->flags |= FTRACE_FL_FAILED; 1089 rec->flags |= FTRACE_FL_FAILED;
1077 if ((system_state == SYSTEM_BOOTING) || 1090 ftrace_bug(failed, rec->ip);
1078 !core_kernel_text(rec->ip)) { 1091 /* Stop processing */
1079 ftrace_free_rec(rec); 1092 return;
1080 } else {
1081 ftrace_bug(failed, rec->ip);
1082 /* Stop processing */
1083 return;
1084 }
1085 } 1093 }
1086 } while_for_each_ftrace_rec(); 1094 } while_for_each_ftrace_rec();
1087} 1095}
@@ -1262,12 +1270,34 @@ static int ftrace_update_code(struct module *mod)
1262 ftrace_new_addrs = p->newlist; 1270 ftrace_new_addrs = p->newlist;
1263 p->flags = 0L; 1271 p->flags = 0L;
1264 1272
1265 /* convert record (i.e, patch mcount-call with NOP) */ 1273 /*
1266 if (ftrace_code_disable(mod, p)) { 1274 * Do the initial record convertion from mcount jump
1267 p->flags |= FTRACE_FL_CONVERTED; 1275 * to the NOP instructions.
1268 ftrace_update_cnt++; 1276 */
1269 } else 1277 if (!ftrace_code_disable(mod, p)) {
1270 ftrace_free_rec(p); 1278 ftrace_free_rec(p);
1279 continue;
1280 }
1281
1282 p->flags |= FTRACE_FL_CONVERTED;
1283 ftrace_update_cnt++;
1284
1285 /*
1286 * If the tracing is enabled, go ahead and enable the record.
1287 *
1288 * The reason not to enable the record immediatelly is the
1289 * inherent check of ftrace_make_nop/ftrace_make_call for
1290 * correct previous instructions. Making first the NOP
1291 * conversion puts the module to the correct state, thus
1292 * passing the ftrace_make_call check.
1293 */
1294 if (ftrace_start_up) {
1295 int failed = __ftrace_replace_code(p, 1);
1296 if (failed) {
1297 ftrace_bug(failed, p->ip);
1298 ftrace_free_rec(p);
1299 }
1300 }
1271 } 1301 }
1272 1302
1273 stop = ftrace_now(raw_smp_processor_id()); 1303 stop = ftrace_now(raw_smp_processor_id());
@@ -1657,60 +1687,6 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1657 return ret; 1687 return ret;
1658} 1688}
1659 1689
1660enum {
1661 MATCH_FULL,
1662 MATCH_FRONT_ONLY,
1663 MATCH_MIDDLE_ONLY,
1664 MATCH_END_ONLY,
1665};
1666
1667/*
1668 * (static function - no need for kernel doc)
1669 *
1670 * Pass in a buffer containing a glob and this function will
1671 * set search to point to the search part of the buffer and
1672 * return the type of search it is (see enum above).
1673 * This does modify buff.
1674 *
1675 * Returns enum type.
1676 * search returns the pointer to use for comparison.
1677 * not returns 1 if buff started with a '!'
1678 * 0 otherwise.
1679 */
1680static int
1681ftrace_setup_glob(char *buff, int len, char **search, int *not)
1682{
1683 int type = MATCH_FULL;
1684 int i;
1685
1686 if (buff[0] == '!') {
1687 *not = 1;
1688 buff++;
1689 len--;
1690 } else
1691 *not = 0;
1692
1693 *search = buff;
1694
1695 for (i = 0; i < len; i++) {
1696 if (buff[i] == '*') {
1697 if (!i) {
1698 *search = buff + 1;
1699 type = MATCH_END_ONLY;
1700 } else {
1701 if (type == MATCH_END_ONLY)
1702 type = MATCH_MIDDLE_ONLY;
1703 else
1704 type = MATCH_FRONT_ONLY;
1705 buff[i] = 0;
1706 break;
1707 }
1708 }
1709 }
1710
1711 return type;
1712}
1713
1714static int ftrace_match(char *str, char *regex, int len, int type) 1690static int ftrace_match(char *str, char *regex, int len, int type)
1715{ 1691{
1716 int matched = 0; 1692 int matched = 0;
@@ -1748,7 +1724,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1748 return ftrace_match(str, regex, len, type); 1724 return ftrace_match(str, regex, len, type);
1749} 1725}
1750 1726
1751static void ftrace_match_records(char *buff, int len, int enable) 1727static int ftrace_match_records(char *buff, int len, int enable)
1752{ 1728{
1753 unsigned int search_len; 1729 unsigned int search_len;
1754 struct ftrace_page *pg; 1730 struct ftrace_page *pg;
@@ -1757,9 +1733,10 @@ static void ftrace_match_records(char *buff, int len, int enable)
1757 char *search; 1733 char *search;
1758 int type; 1734 int type;
1759 int not; 1735 int not;
1736 int found = 0;
1760 1737
1761 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1738 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1762 type = ftrace_setup_glob(buff, len, &search, &not); 1739 type = filter_parse_regex(buff, len, &search, &not);
1763 1740
1764 search_len = strlen(search); 1741 search_len = strlen(search);
1765 1742
@@ -1774,6 +1751,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1774 rec->flags &= ~flag; 1751 rec->flags &= ~flag;
1775 else 1752 else
1776 rec->flags |= flag; 1753 rec->flags |= flag;
1754 found = 1;
1777 } 1755 }
1778 /* 1756 /*
1779 * Only enable filtering if we have a function that 1757 * Only enable filtering if we have a function that
@@ -1783,6 +1761,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
1783 ftrace_filtered = 1; 1761 ftrace_filtered = 1;
1784 } while_for_each_ftrace_rec(); 1762 } while_for_each_ftrace_rec();
1785 mutex_unlock(&ftrace_lock); 1763 mutex_unlock(&ftrace_lock);
1764
1765 return found;
1786} 1766}
1787 1767
1788static int 1768static int
@@ -1804,7 +1784,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1804 return 1; 1784 return 1;
1805} 1785}
1806 1786
1807static void ftrace_match_module_records(char *buff, char *mod, int enable) 1787static int ftrace_match_module_records(char *buff, char *mod, int enable)
1808{ 1788{
1809 unsigned search_len = 0; 1789 unsigned search_len = 0;
1810 struct ftrace_page *pg; 1790 struct ftrace_page *pg;
@@ -1813,6 +1793,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1813 char *search = buff; 1793 char *search = buff;
1814 unsigned long flag; 1794 unsigned long flag;
1815 int not = 0; 1795 int not = 0;
1796 int found = 0;
1816 1797
1817 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1798 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1818 1799
@@ -1827,7 +1808,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1827 } 1808 }
1828 1809
1829 if (strlen(buff)) { 1810 if (strlen(buff)) {
1830 type = ftrace_setup_glob(buff, strlen(buff), &search, &not); 1811 type = filter_parse_regex(buff, strlen(buff), &search, &not);
1831 search_len = strlen(search); 1812 search_len = strlen(search);
1832 } 1813 }
1833 1814
@@ -1843,12 +1824,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1843 rec->flags &= ~flag; 1824 rec->flags &= ~flag;
1844 else 1825 else
1845 rec->flags |= flag; 1826 rec->flags |= flag;
1827 found = 1;
1846 } 1828 }
1847 if (enable && (rec->flags & FTRACE_FL_FILTER)) 1829 if (enable && (rec->flags & FTRACE_FL_FILTER))
1848 ftrace_filtered = 1; 1830 ftrace_filtered = 1;
1849 1831
1850 } while_for_each_ftrace_rec(); 1832 } while_for_each_ftrace_rec();
1851 mutex_unlock(&ftrace_lock); 1833 mutex_unlock(&ftrace_lock);
1834
1835 return found;
1852} 1836}
1853 1837
1854/* 1838/*
@@ -1877,8 +1861,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1877 if (!strlen(mod)) 1861 if (!strlen(mod))
1878 return -EINVAL; 1862 return -EINVAL;
1879 1863
1880 ftrace_match_module_records(func, mod, enable); 1864 if (ftrace_match_module_records(func, mod, enable))
1881 return 0; 1865 return 0;
1866 return -EINVAL;
1882} 1867}
1883 1868
1884static struct ftrace_func_command ftrace_mod_cmd = { 1869static struct ftrace_func_command ftrace_mod_cmd = {
@@ -1992,7 +1977,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1992 int count = 0; 1977 int count = 0;
1993 char *search; 1978 char *search;
1994 1979
1995 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 1980 type = filter_parse_regex(glob, strlen(glob), &search, &not);
1996 len = strlen(search); 1981 len = strlen(search);
1997 1982
1998 /* we do not support '!' for function probes */ 1983 /* we do not support '!' for function probes */
@@ -2069,7 +2054,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2069 else if (glob) { 2054 else if (glob) {
2070 int not; 2055 int not;
2071 2056
2072 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 2057 type = filter_parse_regex(glob, strlen(glob), &search, &not);
2073 len = strlen(search); 2058 len = strlen(search);
2074 2059
2075 /* we do not support '!' for function probes */ 2060 /* we do not support '!' for function probes */
@@ -2175,8 +2160,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2175 func = strsep(&next, ":"); 2160 func = strsep(&next, ":");
2176 2161
2177 if (!next) { 2162 if (!next) {
2178 ftrace_match_records(func, len, enable); 2163 if (ftrace_match_records(func, len, enable))
2179 return 0; 2164 return 0;
2165 return ret;
2180 } 2166 }
2181 2167
2182 /* command found */ 2168 /* command found */
@@ -2222,16 +2208,15 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2222 !trace_parser_cont(parser)) { 2208 !trace_parser_cont(parser)) {
2223 ret = ftrace_process_regex(parser->buffer, 2209 ret = ftrace_process_regex(parser->buffer,
2224 parser->idx, enable); 2210 parser->idx, enable);
2225 if (ret)
2226 goto out;
2227
2228 trace_parser_clear(parser); 2211 trace_parser_clear(parser);
2212 if (ret)
2213 goto out_unlock;
2229 } 2214 }
2230 2215
2231 ret = read; 2216 ret = read;
2232 2217out_unlock:
2233 mutex_unlock(&ftrace_regex_lock); 2218 mutex_unlock(&ftrace_regex_lock);
2234out: 2219
2235 return ret; 2220 return ret;
2236} 2221}
2237 2222
@@ -2313,6 +2298,32 @@ static int __init set_ftrace_filter(char *str)
2313} 2298}
2314__setup("ftrace_filter=", set_ftrace_filter); 2299__setup("ftrace_filter=", set_ftrace_filter);
2315 2300
2301#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2302static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2303static int __init set_graph_function(char *str)
2304{
2305 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
2306 return 1;
2307}
2308__setup("ftrace_graph_filter=", set_graph_function);
2309
2310static void __init set_ftrace_early_graph(char *buf)
2311{
2312 int ret;
2313 char *func;
2314
2315 while (buf) {
2316 func = strsep(&buf, ",");
2317 /* we allow only one expression at a time */
2318 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2319 func);
2320 if (ret)
2321 printk(KERN_DEBUG "ftrace: function %s not "
2322 "traceable\n", func);
2323 }
2324}
2325#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2326
2316static void __init set_ftrace_early_filter(char *buf, int enable) 2327static void __init set_ftrace_early_filter(char *buf, int enable)
2317{ 2328{
2318 char *func; 2329 char *func;
@@ -2329,6 +2340,10 @@ static void __init set_ftrace_early_filters(void)
2329 set_ftrace_early_filter(ftrace_filter_buf, 1); 2340 set_ftrace_early_filter(ftrace_filter_buf, 1);
2330 if (ftrace_notrace_buf[0]) 2341 if (ftrace_notrace_buf[0])
2331 set_ftrace_early_filter(ftrace_notrace_buf, 0); 2342 set_ftrace_early_filter(ftrace_notrace_buf, 0);
2343#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2344 if (ftrace_graph_buf[0])
2345 set_ftrace_early_graph(ftrace_graph_buf);
2346#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2332} 2347}
2333 2348
2334static int 2349static int
@@ -2514,7 +2529,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2514 return -ENODEV; 2529 return -ENODEV;
2515 2530
2516 /* decode regex */ 2531 /* decode regex */
2517 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not); 2532 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2518 if (not) 2533 if (not)
2519 return -EINVAL; 2534 return -EINVAL;
2520 2535
@@ -2537,10 +2552,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2537 exists = true; 2552 exists = true;
2538 break; 2553 break;
2539 } 2554 }
2540 if (!exists) { 2555 if (!exists)
2541 array[(*idx)++] = rec->ip; 2556 array[(*idx)++] = rec->ip;
2542 found = 1; 2557 found = 1;
2543 }
2544 } 2558 }
2545 } while_for_each_ftrace_rec(); 2559 } while_for_each_ftrace_rec();
2546 2560
@@ -2625,7 +2639,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2625 return 0; 2639 return 0;
2626} 2640}
2627 2641
2628static int ftrace_convert_nops(struct module *mod, 2642static int ftrace_process_locs(struct module *mod,
2629 unsigned long *start, 2643 unsigned long *start,
2630 unsigned long *end) 2644 unsigned long *end)
2631{ 2645{
@@ -2658,19 +2672,17 @@ static int ftrace_convert_nops(struct module *mod,
2658} 2672}
2659 2673
2660#ifdef CONFIG_MODULES 2674#ifdef CONFIG_MODULES
2661void ftrace_release(void *start, void *end) 2675void ftrace_release_mod(struct module *mod)
2662{ 2676{
2663 struct dyn_ftrace *rec; 2677 struct dyn_ftrace *rec;
2664 struct ftrace_page *pg; 2678 struct ftrace_page *pg;
2665 unsigned long s = (unsigned long)start;
2666 unsigned long e = (unsigned long)end;
2667 2679
2668 if (ftrace_disabled || !start || start == end) 2680 if (ftrace_disabled)
2669 return; 2681 return;
2670 2682
2671 mutex_lock(&ftrace_lock); 2683 mutex_lock(&ftrace_lock);
2672 do_for_each_ftrace_rec(pg, rec) { 2684 do_for_each_ftrace_rec(pg, rec) {
2673 if ((rec->ip >= s) && (rec->ip < e)) { 2685 if (within_module_core(rec->ip, mod)) {
2674 /* 2686 /*
2675 * rec->ip is changed in ftrace_free_rec() 2687 * rec->ip is changed in ftrace_free_rec()
2676 * It should not between s and e if record was freed. 2688 * It should not between s and e if record was freed.
@@ -2687,7 +2699,7 @@ static void ftrace_init_module(struct module *mod,
2687{ 2699{
2688 if (ftrace_disabled || start == end) 2700 if (ftrace_disabled || start == end)
2689 return; 2701 return;
2690 ftrace_convert_nops(mod, start, end); 2702 ftrace_process_locs(mod, start, end);
2691} 2703}
2692 2704
2693static int ftrace_module_notify(struct notifier_block *self, 2705static int ftrace_module_notify(struct notifier_block *self,
@@ -2702,9 +2714,7 @@ static int ftrace_module_notify(struct notifier_block *self,
2702 mod->num_ftrace_callsites); 2714 mod->num_ftrace_callsites);
2703 break; 2715 break;
2704 case MODULE_STATE_GOING: 2716 case MODULE_STATE_GOING:
2705 ftrace_release(mod->ftrace_callsites, 2717 ftrace_release_mod(mod);
2706 mod->ftrace_callsites +
2707 mod->num_ftrace_callsites);
2708 break; 2718 break;
2709 } 2719 }
2710 2720
@@ -2750,7 +2760,7 @@ void __init ftrace_init(void)
2750 2760
2751 last_ftrace_enabled = ftrace_enabled = 1; 2761 last_ftrace_enabled = ftrace_enabled = 1;
2752 2762
2753 ret = ftrace_convert_nops(NULL, 2763 ret = ftrace_process_locs(NULL,
2754 __start_mcount_loc, 2764 __start_mcount_loc,
2755 __stop_mcount_loc); 2765 __stop_mcount_loc);
2756 2766
@@ -2783,23 +2793,6 @@ static inline void ftrace_startup_enable(int command) { }
2783# define ftrace_shutdown_sysctl() do { } while (0) 2793# define ftrace_shutdown_sysctl() do { } while (0)
2784#endif /* CONFIG_DYNAMIC_FTRACE */ 2794#endif /* CONFIG_DYNAMIC_FTRACE */
2785 2795
2786static ssize_t
2787ftrace_pid_read(struct file *file, char __user *ubuf,
2788 size_t cnt, loff_t *ppos)
2789{
2790 char buf[64];
2791 int r;
2792
2793 if (ftrace_pid_trace == ftrace_swapper_pid)
2794 r = sprintf(buf, "swapper tasks\n");
2795 else if (ftrace_pid_trace)
2796 r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
2797 else
2798 r = sprintf(buf, "no pid\n");
2799
2800 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2801}
2802
2803static void clear_ftrace_swapper(void) 2796static void clear_ftrace_swapper(void)
2804{ 2797{
2805 struct task_struct *p; 2798 struct task_struct *p;
@@ -2850,14 +2843,12 @@ static void set_ftrace_pid(struct pid *pid)
2850 rcu_read_unlock(); 2843 rcu_read_unlock();
2851} 2844}
2852 2845
2853static void clear_ftrace_pid_task(struct pid **pid) 2846static void clear_ftrace_pid_task(struct pid *pid)
2854{ 2847{
2855 if (*pid == ftrace_swapper_pid) 2848 if (pid == ftrace_swapper_pid)
2856 clear_ftrace_swapper(); 2849 clear_ftrace_swapper();
2857 else 2850 else
2858 clear_ftrace_pid(*pid); 2851 clear_ftrace_pid(pid);
2859
2860 *pid = NULL;
2861} 2852}
2862 2853
2863static void set_ftrace_pid_task(struct pid *pid) 2854static void set_ftrace_pid_task(struct pid *pid)
@@ -2868,74 +2859,184 @@ static void set_ftrace_pid_task(struct pid *pid)
2868 set_ftrace_pid(pid); 2859 set_ftrace_pid(pid);
2869} 2860}
2870 2861
2871static ssize_t 2862static int ftrace_pid_add(int p)
2872ftrace_pid_write(struct file *filp, const char __user *ubuf,
2873 size_t cnt, loff_t *ppos)
2874{ 2863{
2875 struct pid *pid; 2864 struct pid *pid;
2876 char buf[64]; 2865 struct ftrace_pid *fpid;
2877 long val; 2866 int ret = -EINVAL;
2878 int ret;
2879 2867
2880 if (cnt >= sizeof(buf)) 2868 mutex_lock(&ftrace_lock);
2881 return -EINVAL;
2882 2869
2883 if (copy_from_user(&buf, ubuf, cnt)) 2870 if (!p)
2884 return -EFAULT; 2871 pid = ftrace_swapper_pid;
2872 else
2873 pid = find_get_pid(p);
2885 2874
2886 buf[cnt] = 0; 2875 if (!pid)
2876 goto out;
2887 2877
2888 ret = strict_strtol(buf, 10, &val); 2878 ret = 0;
2889 if (ret < 0)
2890 return ret;
2891 2879
2892 mutex_lock(&ftrace_lock); 2880 list_for_each_entry(fpid, &ftrace_pids, list)
2893 if (val < 0) { 2881 if (fpid->pid == pid)
2894 /* disable pid tracing */ 2882 goto out_put;
2895 if (!ftrace_pid_trace)
2896 goto out;
2897 2883
2898 clear_ftrace_pid_task(&ftrace_pid_trace); 2884 ret = -ENOMEM;
2899 2885
2900 } else { 2886 fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
2901 /* swapper task is special */ 2887 if (!fpid)
2902 if (!val) { 2888 goto out_put;
2903 pid = ftrace_swapper_pid;
2904 if (pid == ftrace_pid_trace)
2905 goto out;
2906 } else {
2907 pid = find_get_pid(val);
2908 2889
2909 if (pid == ftrace_pid_trace) { 2890 list_add(&fpid->list, &ftrace_pids);
2910 put_pid(pid); 2891 fpid->pid = pid;
2911 goto out;
2912 }
2913 }
2914 2892
2915 if (ftrace_pid_trace) 2893 set_ftrace_pid_task(pid);
2916 clear_ftrace_pid_task(&ftrace_pid_trace);
2917 2894
2918 if (!pid) 2895 ftrace_update_pid_func();
2919 goto out; 2896 ftrace_startup_enable(0);
2897
2898 mutex_unlock(&ftrace_lock);
2899 return 0;
2920 2900
2921 ftrace_pid_trace = pid; 2901out_put:
2902 if (pid != ftrace_swapper_pid)
2903 put_pid(pid);
2904
2905out:
2906 mutex_unlock(&ftrace_lock);
2907 return ret;
2908}
2909
2910static void ftrace_pid_reset(void)
2911{
2912 struct ftrace_pid *fpid, *safe;
2913
2914 mutex_lock(&ftrace_lock);
2915 list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
2916 struct pid *pid = fpid->pid;
2922 2917
2923 set_ftrace_pid_task(ftrace_pid_trace); 2918 clear_ftrace_pid_task(pid);
2919
2920 list_del(&fpid->list);
2921 kfree(fpid);
2924 } 2922 }
2925 2923
2926 /* update the function call */
2927 ftrace_update_pid_func(); 2924 ftrace_update_pid_func();
2928 ftrace_startup_enable(0); 2925 ftrace_startup_enable(0);
2929 2926
2930 out:
2931 mutex_unlock(&ftrace_lock); 2927 mutex_unlock(&ftrace_lock);
2928}
2932 2929
2933 return cnt; 2930static void *fpid_start(struct seq_file *m, loff_t *pos)
2931{
2932 mutex_lock(&ftrace_lock);
2933
2934 if (list_empty(&ftrace_pids) && (!*pos))
2935 return (void *) 1;
2936
2937 return seq_list_start(&ftrace_pids, *pos);
2938}
2939
2940static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
2941{
2942 if (v == (void *)1)
2943 return NULL;
2944
2945 return seq_list_next(v, &ftrace_pids, pos);
2946}
2947
2948static void fpid_stop(struct seq_file *m, void *p)
2949{
2950 mutex_unlock(&ftrace_lock);
2951}
2952
2953static int fpid_show(struct seq_file *m, void *v)
2954{
2955 const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
2956
2957 if (v == (void *)1) {
2958 seq_printf(m, "no pid\n");
2959 return 0;
2960 }
2961
2962 if (fpid->pid == ftrace_swapper_pid)
2963 seq_printf(m, "swapper tasks\n");
2964 else
2965 seq_printf(m, "%u\n", pid_vnr(fpid->pid));
2966
2967 return 0;
2968}
2969
2970static const struct seq_operations ftrace_pid_sops = {
2971 .start = fpid_start,
2972 .next = fpid_next,
2973 .stop = fpid_stop,
2974 .show = fpid_show,
2975};
2976
2977static int
2978ftrace_pid_open(struct inode *inode, struct file *file)
2979{
2980 int ret = 0;
2981
2982 if ((file->f_mode & FMODE_WRITE) &&
2983 (file->f_flags & O_TRUNC))
2984 ftrace_pid_reset();
2985
2986 if (file->f_mode & FMODE_READ)
2987 ret = seq_open(file, &ftrace_pid_sops);
2988
2989 return ret;
2990}
2991
2992static ssize_t
2993ftrace_pid_write(struct file *filp, const char __user *ubuf,
2994 size_t cnt, loff_t *ppos)
2995{
2996 char buf[64], *tmp;
2997 long val;
2998 int ret;
2999
3000 if (cnt >= sizeof(buf))
3001 return -EINVAL;
3002
3003 if (copy_from_user(&buf, ubuf, cnt))
3004 return -EFAULT;
3005
3006 buf[cnt] = 0;
3007
3008 /*
3009 * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
3010 * to clean the filter quietly.
3011 */
3012 tmp = strstrip(buf);
3013 if (strlen(tmp) == 0)
3014 return 1;
3015
3016 ret = strict_strtol(tmp, 10, &val);
3017 if (ret < 0)
3018 return ret;
3019
3020 ret = ftrace_pid_add(val);
3021
3022 return ret ? ret : cnt;
3023}
3024
3025static int
3026ftrace_pid_release(struct inode *inode, struct file *file)
3027{
3028 if (file->f_mode & FMODE_READ)
3029 seq_release(inode, file);
3030
3031 return 0;
2934} 3032}
2935 3033
2936static const struct file_operations ftrace_pid_fops = { 3034static const struct file_operations ftrace_pid_fops = {
2937 .read = ftrace_pid_read, 3035 .open = ftrace_pid_open,
2938 .write = ftrace_pid_write, 3036 .write = ftrace_pid_write,
3037 .read = seq_read,
3038 .llseek = seq_lseek,
3039 .release = ftrace_pid_release,
2939}; 3040};
2940 3041
2941static __init int ftrace_init_debugfs(void) 3042static __init int ftrace_init_debugfs(void)
@@ -3298,4 +3399,3 @@ void ftrace_graph_stop(void)
3298 ftrace_stop(); 3399 ftrace_stop();
3299} 3400}
3300#endif 3401#endif
3301
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 81b1645c8549..a91da69f153a 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -501,7 +501,7 @@ static int __init init_kmem_tracer(void)
501 return 1; 501 return 1;
502 } 502 }
503 503
504 if (!register_tracer(&kmem_tracer)) { 504 if (register_tracer(&kmem_tracer) != 0) {
505 pr_warning("Warning: could not register the kmem tracer\n"); 505 pr_warning("Warning: could not register the kmem tracer\n");
506 return 1; 506 return 1;
507 } 507 }
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..9f4f565b01e6 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
14#define CREATE_TRACE_POINTS 14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 15#include <trace/events/power.h>
16 16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 17EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20 18
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d4ff01970547..2326b04c95c4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
397 int ret; 397 int ret;
398 398
399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" 399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\n", 400 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp)); 401 (unsigned int)sizeof(field.time_stamp),
402 (unsigned int)is_signed_type(u64));
402 403
403 ret = trace_seq_printf(s, "\tfield: local_t commit;\t" 404 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
404 "offset:%u;\tsize:%u;\n", 405 "offset:%u;\tsize:%u;\tsigned:%u;\n",
405 (unsigned int)offsetof(typeof(field), commit), 406 (unsigned int)offsetof(typeof(field), commit),
406 (unsigned int)sizeof(field.commit)); 407 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long));
407 409
408 ret = trace_seq_printf(s, "\tfield: char data;\t" 410 ret = trace_seq_printf(s, "\tfield: char data;\t"
409 "offset:%u;\tsize:%u;\n", 411 "offset:%u;\tsize:%u;\tsigned:%u;\n",
410 (unsigned int)offsetof(typeof(field), data), 412 (unsigned int)offsetof(typeof(field), data),
411 (unsigned int)BUF_PAGE_SIZE); 413 (unsigned int)BUF_PAGE_SIZE,
414 (unsigned int)is_signed_type(char));
412 415
413 return ret; 416 return ret;
414} 417}
@@ -420,7 +423,7 @@ struct ring_buffer_per_cpu {
420 int cpu; 423 int cpu;
421 struct ring_buffer *buffer; 424 struct ring_buffer *buffer;
422 spinlock_t reader_lock; /* serialize readers */ 425 spinlock_t reader_lock; /* serialize readers */
423 raw_spinlock_t lock; 426 arch_spinlock_t lock;
424 struct lock_class_key lock_key; 427 struct lock_class_key lock_key;
425 struct list_head *pages; 428 struct list_head *pages;
426 struct buffer_page *head_page; /* read from head */ 429 struct buffer_page *head_page; /* read from head */
@@ -483,7 +486,7 @@ struct ring_buffer_iter {
483/* Up this if you want to test the TIME_EXTENTS and normalization */ 486/* Up this if you want to test the TIME_EXTENTS and normalization */
484#define DEBUG_SHIFT 0 487#define DEBUG_SHIFT 0
485 488
486static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) 489static inline u64 rb_time_stamp(struct ring_buffer *buffer)
487{ 490{
488 /* shift to debug/test normalization and TIME_EXTENTS */ 491 /* shift to debug/test normalization and TIME_EXTENTS */
489 return buffer->clock() << DEBUG_SHIFT; 492 return buffer->clock() << DEBUG_SHIFT;
@@ -494,7 +497,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
494 u64 time; 497 u64 time;
495 498
496 preempt_disable_notrace(); 499 preempt_disable_notrace();
497 time = rb_time_stamp(buffer, cpu); 500 time = rb_time_stamp(buffer);
498 preempt_enable_no_resched_notrace(); 501 preempt_enable_no_resched_notrace();
499 502
500 return time; 503 return time;
@@ -599,7 +602,7 @@ static struct list_head *rb_list_head(struct list_head *list)
599} 602}
600 603
601/* 604/*
602 * rb_is_head_page - test if the give page is the head page 605 * rb_is_head_page - test if the given page is the head page
603 * 606 *
604 * Because the reader may move the head_page pointer, we can 607 * Because the reader may move the head_page pointer, we can
605 * not trust what the head page is (it may be pointing to 608 * not trust what the head page is (it may be pointing to
@@ -995,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
995 cpu_buffer->buffer = buffer; 998 cpu_buffer->buffer = buffer;
996 spin_lock_init(&cpu_buffer->reader_lock); 999 spin_lock_init(&cpu_buffer->reader_lock);
997 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1000 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
998 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1001 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
999 1002
1000 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1003 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1001 GFP_KERNEL, cpu_to_node(cpu)); 1004 GFP_KERNEL, cpu_to_node(cpu));
@@ -1190,9 +1193,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1190 struct list_head *p; 1193 struct list_head *p;
1191 unsigned i; 1194 unsigned i;
1192 1195
1193 atomic_inc(&cpu_buffer->record_disabled); 1196 spin_lock_irq(&cpu_buffer->reader_lock);
1194 synchronize_sched();
1195
1196 rb_head_page_deactivate(cpu_buffer); 1197 rb_head_page_deactivate(cpu_buffer);
1197 1198
1198 for (i = 0; i < nr_pages; i++) { 1199 for (i = 0; i < nr_pages; i++) {
@@ -1207,11 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1207 return; 1208 return;
1208 1209
1209 rb_reset_cpu(cpu_buffer); 1210 rb_reset_cpu(cpu_buffer);
1210
1211 rb_check_pages(cpu_buffer); 1211 rb_check_pages(cpu_buffer);
1212 1212
1213 atomic_dec(&cpu_buffer->record_disabled); 1213 spin_unlock_irq(&cpu_buffer->reader_lock);
1214
1215} 1214}
1216 1215
1217static void 1216static void
@@ -1222,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1222 struct list_head *p; 1221 struct list_head *p;
1223 unsigned i; 1222 unsigned i;
1224 1223
1225 atomic_inc(&cpu_buffer->record_disabled);
1226 synchronize_sched();
1227
1228 spin_lock_irq(&cpu_buffer->reader_lock); 1224 spin_lock_irq(&cpu_buffer->reader_lock);
1229 rb_head_page_deactivate(cpu_buffer); 1225 rb_head_page_deactivate(cpu_buffer);
1230 1226
@@ -1237,11 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1237 list_add_tail(&bpage->list, cpu_buffer->pages); 1233 list_add_tail(&bpage->list, cpu_buffer->pages);
1238 } 1234 }
1239 rb_reset_cpu(cpu_buffer); 1235 rb_reset_cpu(cpu_buffer);
1240 spin_unlock_irq(&cpu_buffer->reader_lock);
1241
1242 rb_check_pages(cpu_buffer); 1236 rb_check_pages(cpu_buffer);
1243 1237
1244 atomic_dec(&cpu_buffer->record_disabled); 1238 spin_unlock_irq(&cpu_buffer->reader_lock);
1245} 1239}
1246 1240
1247/** 1241/**
@@ -1249,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1249 * @buffer: the buffer to resize. 1243 * @buffer: the buffer to resize.
1250 * @size: the new size. 1244 * @size: the new size.
1251 * 1245 *
1252 * The tracer is responsible for making sure that the buffer is
1253 * not being used while changing the size.
1254 * Note: We may be able to change the above requirement by using
1255 * RCU synchronizations.
1256 *
1257 * Minimum size is 2 * BUF_PAGE_SIZE. 1246 * Minimum size is 2 * BUF_PAGE_SIZE.
1258 * 1247 *
1259 * Returns -1 on failure. 1248 * Returns -1 on failure.
@@ -1285,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1285 if (size == buffer_size) 1274 if (size == buffer_size)
1286 return size; 1275 return size;
1287 1276
1277 atomic_inc(&buffer->record_disabled);
1278
1279 /* Make sure all writers are done with this buffer. */
1280 synchronize_sched();
1281
1288 mutex_lock(&buffer->mutex); 1282 mutex_lock(&buffer->mutex);
1289 get_online_cpus(); 1283 get_online_cpus();
1290 1284
@@ -1347,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1347 put_online_cpus(); 1341 put_online_cpus();
1348 mutex_unlock(&buffer->mutex); 1342 mutex_unlock(&buffer->mutex);
1349 1343
1344 atomic_dec(&buffer->record_disabled);
1345
1350 return size; 1346 return size;
1351 1347
1352 free_pages: 1348 free_pages:
@@ -1356,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1356 } 1352 }
1357 put_online_cpus(); 1353 put_online_cpus();
1358 mutex_unlock(&buffer->mutex); 1354 mutex_unlock(&buffer->mutex);
1355 atomic_dec(&buffer->record_disabled);
1359 return -ENOMEM; 1356 return -ENOMEM;
1360 1357
1361 /* 1358 /*
@@ -1365,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1365 out_fail: 1362 out_fail:
1366 put_online_cpus(); 1363 put_online_cpus();
1367 mutex_unlock(&buffer->mutex); 1364 mutex_unlock(&buffer->mutex);
1365 atomic_dec(&buffer->record_disabled);
1368 return -1; 1366 return -1;
1369} 1367}
1370EXPORT_SYMBOL_GPL(ring_buffer_resize); 1368EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -1785,9 +1783,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1785static struct ring_buffer_event * 1783static struct ring_buffer_event *
1786rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1784rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1787 unsigned long length, unsigned long tail, 1785 unsigned long length, unsigned long tail,
1788 struct buffer_page *commit_page,
1789 struct buffer_page *tail_page, u64 *ts) 1786 struct buffer_page *tail_page, u64 *ts)
1790{ 1787{
1788 struct buffer_page *commit_page = cpu_buffer->commit_page;
1791 struct ring_buffer *buffer = cpu_buffer->buffer; 1789 struct ring_buffer *buffer = cpu_buffer->buffer;
1792 struct buffer_page *next_page; 1790 struct buffer_page *next_page;
1793 int ret; 1791 int ret;
@@ -1868,7 +1866,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1868 * Nested commits always have zero deltas, so 1866 * Nested commits always have zero deltas, so
1869 * just reread the time stamp 1867 * just reread the time stamp
1870 */ 1868 */
1871 *ts = rb_time_stamp(buffer, cpu_buffer->cpu); 1869 *ts = rb_time_stamp(buffer);
1872 next_page->page->time_stamp = *ts; 1870 next_page->page->time_stamp = *ts;
1873 } 1871 }
1874 1872
@@ -1890,13 +1888,10 @@ static struct ring_buffer_event *
1890__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1888__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1891 unsigned type, unsigned long length, u64 *ts) 1889 unsigned type, unsigned long length, u64 *ts)
1892{ 1890{
1893 struct buffer_page *tail_page, *commit_page; 1891 struct buffer_page *tail_page;
1894 struct ring_buffer_event *event; 1892 struct ring_buffer_event *event;
1895 unsigned long tail, write; 1893 unsigned long tail, write;
1896 1894
1897 commit_page = cpu_buffer->commit_page;
1898 /* we just need to protect against interrupts */
1899 barrier();
1900 tail_page = cpu_buffer->tail_page; 1895 tail_page = cpu_buffer->tail_page;
1901 write = local_add_return(length, &tail_page->write); 1896 write = local_add_return(length, &tail_page->write);
1902 1897
@@ -1907,7 +1902,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1907 /* See if we shot pass the end of this buffer page */ 1902 /* See if we shot pass the end of this buffer page */
1908 if (write > BUF_PAGE_SIZE) 1903 if (write > BUF_PAGE_SIZE)
1909 return rb_move_tail(cpu_buffer, length, tail, 1904 return rb_move_tail(cpu_buffer, length, tail,
1910 commit_page, tail_page, ts); 1905 tail_page, ts);
1911 1906
1912 /* We reserved something on the buffer */ 1907 /* We reserved something on the buffer */
1913 1908
@@ -2111,7 +2106,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2111 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 2106 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2112 goto out_fail; 2107 goto out_fail;
2113 2108
2114 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 2109 ts = rb_time_stamp(cpu_buffer->buffer);
2115 2110
2116 /* 2111 /*
2117 * Only the first commit can update the timestamp. 2112 * Only the first commit can update the timestamp.
@@ -2681,7 +2676,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2681EXPORT_SYMBOL_GPL(ring_buffer_entries); 2676EXPORT_SYMBOL_GPL(ring_buffer_entries);
2682 2677
2683/** 2678/**
2684 * ring_buffer_overrun_cpu - get the number of overruns in buffer 2679 * ring_buffer_overruns - get the number of overruns in buffer
2685 * @buffer: The ring buffer 2680 * @buffer: The ring buffer
2686 * 2681 *
2687 * Returns the total number of overruns in the ring buffer 2682 * Returns the total number of overruns in the ring buffer
@@ -2832,7 +2827,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2832 int ret; 2827 int ret;
2833 2828
2834 local_irq_save(flags); 2829 local_irq_save(flags);
2835 __raw_spin_lock(&cpu_buffer->lock); 2830 arch_spin_lock(&cpu_buffer->lock);
2836 2831
2837 again: 2832 again:
2838 /* 2833 /*
@@ -2921,7 +2916,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2921 goto again; 2916 goto again;
2922 2917
2923 out: 2918 out:
2924 __raw_spin_unlock(&cpu_buffer->lock); 2919 arch_spin_unlock(&cpu_buffer->lock);
2925 local_irq_restore(flags); 2920 local_irq_restore(flags);
2926 2921
2927 return reader; 2922 return reader;
@@ -3284,9 +3279,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3284 synchronize_sched(); 3279 synchronize_sched();
3285 3280
3286 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3281 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3287 __raw_spin_lock(&cpu_buffer->lock); 3282 arch_spin_lock(&cpu_buffer->lock);
3288 rb_iter_reset(iter); 3283 rb_iter_reset(iter);
3289 __raw_spin_unlock(&cpu_buffer->lock); 3284 arch_spin_unlock(&cpu_buffer->lock);
3290 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3285 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3291 3286
3292 return iter; 3287 return iter;
@@ -3406,11 +3401,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3406 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3401 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3407 goto out; 3402 goto out;
3408 3403
3409 __raw_spin_lock(&cpu_buffer->lock); 3404 arch_spin_lock(&cpu_buffer->lock);
3410 3405
3411 rb_reset_cpu(cpu_buffer); 3406 rb_reset_cpu(cpu_buffer);
3412 3407
3413 __raw_spin_unlock(&cpu_buffer->lock); 3408 arch_spin_unlock(&cpu_buffer->lock);
3414 3409
3415 out: 3410 out:
3416 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3411 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 573d3cc762c3..b2477caf09c2 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -35,6 +35,28 @@ static int disable_reader;
35module_param(disable_reader, uint, 0644); 35module_param(disable_reader, uint, 0644);
36MODULE_PARM_DESC(disable_reader, "only run producer"); 36MODULE_PARM_DESC(disable_reader, "only run producer");
37 37
38static int write_iteration = 50;
39module_param(write_iteration, uint, 0644);
40MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
41
42static int producer_nice = 19;
43static int consumer_nice = 19;
44
45static int producer_fifo = -1;
46static int consumer_fifo = -1;
47
48module_param(producer_nice, uint, 0644);
49MODULE_PARM_DESC(producer_nice, "nice prio for producer");
50
51module_param(consumer_nice, uint, 0644);
52MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
53
54module_param(producer_fifo, uint, 0644);
55MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
56
57module_param(consumer_fifo, uint, 0644);
58MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
59
38static int read_events; 60static int read_events;
39 61
40static int kill_test; 62static int kill_test;
@@ -208,15 +230,18 @@ static void ring_buffer_producer(void)
208 do { 230 do {
209 struct ring_buffer_event *event; 231 struct ring_buffer_event *event;
210 int *entry; 232 int *entry;
211 233 int i;
212 event = ring_buffer_lock_reserve(buffer, 10); 234
213 if (!event) { 235 for (i = 0; i < write_iteration; i++) {
214 missed++; 236 event = ring_buffer_lock_reserve(buffer, 10);
215 } else { 237 if (!event) {
216 hit++; 238 missed++;
217 entry = ring_buffer_event_data(event); 239 } else {
218 *entry = smp_processor_id(); 240 hit++;
219 ring_buffer_unlock_commit(buffer, event); 241 entry = ring_buffer_event_data(event);
242 *entry = smp_processor_id();
243 ring_buffer_unlock_commit(buffer, event);
244 }
220 } 245 }
221 do_gettimeofday(&end_tv); 246 do_gettimeofday(&end_tv);
222 247
@@ -263,6 +288,27 @@ static void ring_buffer_producer(void)
263 288
264 if (kill_test) 289 if (kill_test)
265 trace_printk("ERROR!\n"); 290 trace_printk("ERROR!\n");
291
292 if (!disable_reader) {
293 if (consumer_fifo < 0)
294 trace_printk("Running Consumer at nice: %d\n",
295 consumer_nice);
296 else
297 trace_printk("Running Consumer at SCHED_FIFO %d\n",
298 consumer_fifo);
299 }
300 if (producer_fifo < 0)
301 trace_printk("Running Producer at nice: %d\n",
302 producer_nice);
303 else
304 trace_printk("Running Producer at SCHED_FIFO %d\n",
305 producer_fifo);
306
307 /* Let the user know that the test is running at low priority */
308 if (producer_fifo < 0 && consumer_fifo < 0 &&
309 producer_nice == 19 && consumer_nice == 19)
310 trace_printk("WARNING!!! This test is running at lowest priority.\n");
311
266 trace_printk("Time: %lld (usecs)\n", time); 312 trace_printk("Time: %lld (usecs)\n", time);
267 trace_printk("Overruns: %lld\n", overruns); 313 trace_printk("Overruns: %lld\n", overruns);
268 if (disable_reader) 314 if (disable_reader)
@@ -392,6 +438,27 @@ static int __init ring_buffer_benchmark_init(void)
392 if (IS_ERR(producer)) 438 if (IS_ERR(producer))
393 goto out_kill; 439 goto out_kill;
394 440
441 /*
442 * Run them as low-prio background tasks by default:
443 */
444 if (!disable_reader) {
445 if (consumer_fifo >= 0) {
446 struct sched_param param = {
447 .sched_priority = consumer_fifo
448 };
449 sched_setscheduler(consumer, SCHED_FIFO, &param);
450 } else
451 set_user_nice(consumer, consumer_nice);
452 }
453
454 if (producer_fifo >= 0) {
455 struct sched_param param = {
456 .sched_priority = consumer_fifo
457 };
458 sched_setscheduler(producer, SCHED_FIFO, &param);
459 } else
460 set_user_nice(producer, producer_nice);
461
395 return 0; 462 return 0;
396 463
397 out_kill: 464 out_kill:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b808177af816..ab2bbb0e9429 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h> 14#include <linux/ring_buffer.h>
15#include <linux/utsrelease.h> 15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h> 16#include <linux/stacktrace.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
@@ -129,7 +129,7 @@ static int tracing_set_tracer(const char *buf);
129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
130static char *default_bootup_tracer; 130static char *default_bootup_tracer;
131 131
132static int __init set_ftrace(char *str) 132static int __init set_cmdline_ftrace(char *str)
133{ 133{
134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
135 default_bootup_tracer = bootup_tracer_buf; 135 default_bootup_tracer = bootup_tracer_buf;
@@ -137,7 +137,7 @@ static int __init set_ftrace(char *str)
137 ring_buffer_expanded = 1; 137 ring_buffer_expanded = 1;
138 return 1; 138 return 1;
139} 139}
140__setup("ftrace=", set_ftrace); 140__setup("ftrace=", set_cmdline_ftrace);
141 141
142static int __init set_ftrace_dump_on_oops(char *str) 142static int __init set_ftrace_dump_on_oops(char *str)
143{ 143{
@@ -313,7 +313,6 @@ static const char *trace_options[] = {
313 "bin", 313 "bin",
314 "block", 314 "block",
315 "stacktrace", 315 "stacktrace",
316 "sched-tree",
317 "trace_printk", 316 "trace_printk",
318 "ftrace_preempt", 317 "ftrace_preempt",
319 "branch", 318 "branch",
@@ -493,15 +492,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
493 * protected by per_cpu spinlocks. But the action of the swap 492 * protected by per_cpu spinlocks. But the action of the swap
494 * needs its own lock. 493 * needs its own lock.
495 * 494 *
496 * This is defined as a raw_spinlock_t in order to help 495 * This is defined as a arch_spinlock_t in order to help
497 * with performance when lockdep debugging is enabled. 496 * with performance when lockdep debugging is enabled.
498 * 497 *
499 * It is also used in other places outside the update_max_tr 498 * It is also used in other places outside the update_max_tr
500 * so it needs to be defined outside of the 499 * so it needs to be defined outside of the
501 * CONFIG_TRACER_MAX_TRACE. 500 * CONFIG_TRACER_MAX_TRACE.
502 */ 501 */
503static raw_spinlock_t ftrace_max_lock = 502static arch_spinlock_t ftrace_max_lock =
504 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
505 504
506#ifdef CONFIG_TRACER_MAX_TRACE 505#ifdef CONFIG_TRACER_MAX_TRACE
507unsigned long __read_mostly tracing_max_latency; 506unsigned long __read_mostly tracing_max_latency;
@@ -555,13 +554,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
555 return; 554 return;
556 555
557 WARN_ON_ONCE(!irqs_disabled()); 556 WARN_ON_ONCE(!irqs_disabled());
558 __raw_spin_lock(&ftrace_max_lock); 557 arch_spin_lock(&ftrace_max_lock);
559 558
560 tr->buffer = max_tr.buffer; 559 tr->buffer = max_tr.buffer;
561 max_tr.buffer = buf; 560 max_tr.buffer = buf;
562 561
563 __update_max_tr(tr, tsk, cpu); 562 __update_max_tr(tr, tsk, cpu);
564 __raw_spin_unlock(&ftrace_max_lock); 563 arch_spin_unlock(&ftrace_max_lock);
565} 564}
566 565
567/** 566/**
@@ -581,7 +580,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
581 return; 580 return;
582 581
583 WARN_ON_ONCE(!irqs_disabled()); 582 WARN_ON_ONCE(!irqs_disabled());
584 __raw_spin_lock(&ftrace_max_lock); 583 arch_spin_lock(&ftrace_max_lock);
585 584
586 ftrace_disable_cpu(); 585 ftrace_disable_cpu();
587 586
@@ -603,7 +602,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
603 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 602 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
604 603
605 __update_max_tr(tr, tsk, cpu); 604 __update_max_tr(tr, tsk, cpu);
606 __raw_spin_unlock(&ftrace_max_lock); 605 arch_spin_unlock(&ftrace_max_lock);
607} 606}
608#endif /* CONFIG_TRACER_MAX_TRACE */ 607#endif /* CONFIG_TRACER_MAX_TRACE */
609 608
@@ -802,7 +801,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
802static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 801static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
803static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 802static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
804static int cmdline_idx; 803static int cmdline_idx;
805static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; 804static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
806 805
807/* temporary disable recording */ 806/* temporary disable recording */
808static atomic_t trace_record_cmdline_disabled __read_mostly; 807static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +914,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
915 * nor do we want to disable interrupts, 914 * nor do we want to disable interrupts,
916 * so if we miss here, then better luck next time. 915 * so if we miss here, then better luck next time.
917 */ 916 */
918 if (!__raw_spin_trylock(&trace_cmdline_lock)) 917 if (!arch_spin_trylock(&trace_cmdline_lock))
919 return; 918 return;
920 919
921 idx = map_pid_to_cmdline[tsk->pid]; 920 idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +939,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
940 939
941 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 940 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
942 941
943 __raw_spin_unlock(&trace_cmdline_lock); 942 arch_spin_unlock(&trace_cmdline_lock);
944} 943}
945 944
946void trace_find_cmdline(int pid, char comm[]) 945void trace_find_cmdline(int pid, char comm[])
@@ -958,14 +957,14 @@ void trace_find_cmdline(int pid, char comm[])
958 } 957 }
959 958
960 preempt_disable(); 959 preempt_disable();
961 __raw_spin_lock(&trace_cmdline_lock); 960 arch_spin_lock(&trace_cmdline_lock);
962 map = map_pid_to_cmdline[pid]; 961 map = map_pid_to_cmdline[pid];
963 if (map != NO_CMDLINE_MAP) 962 if (map != NO_CMDLINE_MAP)
964 strcpy(comm, saved_cmdlines[map]); 963 strcpy(comm, saved_cmdlines[map]);
965 else 964 else
966 strcpy(comm, "<...>"); 965 strcpy(comm, "<...>");
967 966
968 __raw_spin_unlock(&trace_cmdline_lock); 967 arch_spin_unlock(&trace_cmdline_lock);
969 preempt_enable(); 968 preempt_enable();
970} 969}
971 970
@@ -1151,6 +1150,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1151 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1150 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1152} 1151}
1153 1152
1153/**
1154 * trace_dump_stack - record a stack back trace in the trace buffer
1155 */
1156void trace_dump_stack(void)
1157{
1158 unsigned long flags;
1159
1160 if (tracing_disabled || tracing_selftest_running)
1161 return;
1162
1163 local_save_flags(flags);
1164
1165 /* skipping 3 traces, seems to get us at the caller of this function */
1166 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1167}
1168
1154void 1169void
1155ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1170ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1156{ 1171{
@@ -1251,8 +1266,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1251 */ 1266 */
1252int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1267int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1253{ 1268{
1254 static raw_spinlock_t trace_buf_lock = 1269 static arch_spinlock_t trace_buf_lock =
1255 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1270 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1256 static u32 trace_buf[TRACE_BUF_SIZE]; 1271 static u32 trace_buf[TRACE_BUF_SIZE];
1257 1272
1258 struct ftrace_event_call *call = &event_bprint; 1273 struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1298,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1283 1298
1284 /* Lockdep uses trace_printk for lock tracing */ 1299 /* Lockdep uses trace_printk for lock tracing */
1285 local_irq_save(flags); 1300 local_irq_save(flags);
1286 __raw_spin_lock(&trace_buf_lock); 1301 arch_spin_lock(&trace_buf_lock);
1287 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1302 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1288 1303
1289 if (len > TRACE_BUF_SIZE || len < 0) 1304 if (len > TRACE_BUF_SIZE || len < 0)
@@ -1304,7 +1319,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1304 ring_buffer_unlock_commit(buffer, event); 1319 ring_buffer_unlock_commit(buffer, event);
1305 1320
1306out_unlock: 1321out_unlock:
1307 __raw_spin_unlock(&trace_buf_lock); 1322 arch_spin_unlock(&trace_buf_lock);
1308 local_irq_restore(flags); 1323 local_irq_restore(flags);
1309 1324
1310out: 1325out:
@@ -1334,7 +1349,7 @@ int trace_array_printk(struct trace_array *tr,
1334int trace_array_vprintk(struct trace_array *tr, 1349int trace_array_vprintk(struct trace_array *tr,
1335 unsigned long ip, const char *fmt, va_list args) 1350 unsigned long ip, const char *fmt, va_list args)
1336{ 1351{
1337 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1352 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1338 static char trace_buf[TRACE_BUF_SIZE]; 1353 static char trace_buf[TRACE_BUF_SIZE];
1339 1354
1340 struct ftrace_event_call *call = &event_print; 1355 struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1375,9 @@ int trace_array_vprintk(struct trace_array *tr,
1360 1375
1361 pause_graph_tracing(); 1376 pause_graph_tracing();
1362 raw_local_irq_save(irq_flags); 1377 raw_local_irq_save(irq_flags);
1363 __raw_spin_lock(&trace_buf_lock); 1378 arch_spin_lock(&trace_buf_lock);
1364 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1379 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 1380
1366 len = min(len, TRACE_BUF_SIZE-1);
1367 trace_buf[len] = 0;
1368
1369 size = sizeof(*entry) + len + 1; 1381 size = sizeof(*entry) + len + 1;
1370 buffer = tr->buffer; 1382 buffer = tr->buffer;
1371 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1383 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
@@ -1373,15 +1385,15 @@ int trace_array_vprintk(struct trace_array *tr,
1373 if (!event) 1385 if (!event)
1374 goto out_unlock; 1386 goto out_unlock;
1375 entry = ring_buffer_event_data(event); 1387 entry = ring_buffer_event_data(event);
1376 entry->ip = ip; 1388 entry->ip = ip;
1377 1389
1378 memcpy(&entry->buf, trace_buf, len); 1390 memcpy(&entry->buf, trace_buf, len);
1379 entry->buf[len] = 0; 1391 entry->buf[len] = '\0';
1380 if (!filter_check_discard(call, entry, buffer, event)) 1392 if (!filter_check_discard(call, entry, buffer, event))
1381 ring_buffer_unlock_commit(buffer, event); 1393 ring_buffer_unlock_commit(buffer, event);
1382 1394
1383 out_unlock: 1395 out_unlock:
1384 __raw_spin_unlock(&trace_buf_lock); 1396 arch_spin_unlock(&trace_buf_lock);
1385 raw_local_irq_restore(irq_flags); 1397 raw_local_irq_restore(irq_flags);
1386 unpause_graph_tracing(); 1398 unpause_graph_tracing();
1387 out: 1399 out:
@@ -1393,7 +1405,7 @@ int trace_array_vprintk(struct trace_array *tr,
1393 1405
1394int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 1406int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1395{ 1407{
1396 return trace_array_printk(&global_trace, ip, fmt, args); 1408 return trace_array_vprintk(&global_trace, ip, fmt, args);
1397} 1409}
1398EXPORT_SYMBOL_GPL(trace_vprintk); 1410EXPORT_SYMBOL_GPL(trace_vprintk);
1399 1411
@@ -1515,6 +1527,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1515 int i = (int)*pos; 1527 int i = (int)*pos;
1516 void *ent; 1528 void *ent;
1517 1529
1530 WARN_ON_ONCE(iter->leftover);
1531
1518 (*pos)++; 1532 (*pos)++;
1519 1533
1520 /* can't go backwards */ 1534 /* can't go backwards */
@@ -1613,8 +1627,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1613 ; 1627 ;
1614 1628
1615 } else { 1629 } else {
1616 l = *pos - 1; 1630 /*
1617 p = s_next(m, p, &l); 1631 * If we overflowed the seq_file before, then we want
1632 * to just reuse the trace_seq buffer again.
1633 */
1634 if (iter->leftover)
1635 p = iter;
1636 else {
1637 l = *pos - 1;
1638 p = s_next(m, p, &l);
1639 }
1618 } 1640 }
1619 1641
1620 trace_event_read_lock(); 1642 trace_event_read_lock();
@@ -1922,6 +1944,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1922static int s_show(struct seq_file *m, void *v) 1944static int s_show(struct seq_file *m, void *v)
1923{ 1945{
1924 struct trace_iterator *iter = v; 1946 struct trace_iterator *iter = v;
1947 int ret;
1925 1948
1926 if (iter->ent == NULL) { 1949 if (iter->ent == NULL) {
1927 if (iter->tr) { 1950 if (iter->tr) {
@@ -1941,9 +1964,27 @@ static int s_show(struct seq_file *m, void *v)
1941 if (!(trace_flags & TRACE_ITER_VERBOSE)) 1964 if (!(trace_flags & TRACE_ITER_VERBOSE))
1942 print_func_help_header(m); 1965 print_func_help_header(m);
1943 } 1966 }
1967 } else if (iter->leftover) {
1968 /*
1969 * If we filled the seq_file buffer earlier, we
1970 * want to just show it now.
1971 */
1972 ret = trace_print_seq(m, &iter->seq);
1973
1974 /* ret should this time be zero, but you never know */
1975 iter->leftover = ret;
1976
1944 } else { 1977 } else {
1945 print_trace_line(iter); 1978 print_trace_line(iter);
1946 trace_print_seq(m, &iter->seq); 1979 ret = trace_print_seq(m, &iter->seq);
1980 /*
1981 * If we overflow the seq_file buffer, then it will
1982 * ask us for this data again at start up.
1983 * Use that instead.
1984 * ret is 0 if seq_file write succeeded.
1985 * -1 otherwise.
1986 */
1987 iter->leftover = ret;
1947 } 1988 }
1948 1989
1949 return 0; 1990 return 0;
@@ -2253,7 +2294,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2253 mutex_lock(&tracing_cpumask_update_lock); 2294 mutex_lock(&tracing_cpumask_update_lock);
2254 2295
2255 local_irq_disable(); 2296 local_irq_disable();
2256 __raw_spin_lock(&ftrace_max_lock); 2297 arch_spin_lock(&ftrace_max_lock);
2257 for_each_tracing_cpu(cpu) { 2298 for_each_tracing_cpu(cpu) {
2258 /* 2299 /*
2259 * Increase/decrease the disabled counter if we are 2300 * Increase/decrease the disabled counter if we are
@@ -2268,7 +2309,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2268 atomic_dec(&global_trace.data[cpu]->disabled); 2309 atomic_dec(&global_trace.data[cpu]->disabled);
2269 } 2310 }
2270 } 2311 }
2271 __raw_spin_unlock(&ftrace_max_lock); 2312 arch_spin_unlock(&ftrace_max_lock);
2272 local_irq_enable(); 2313 local_irq_enable();
2273 2314
2274 cpumask_copy(tracing_cpumask, tracing_cpumask_new); 2315 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2290,67 +2331,49 @@ static const struct file_operations tracing_cpumask_fops = {
2290 .write = tracing_cpumask_write, 2331 .write = tracing_cpumask_write,
2291}; 2332};
2292 2333
2293static ssize_t 2334static int tracing_trace_options_show(struct seq_file *m, void *v)
2294tracing_trace_options_read(struct file *filp, char __user *ubuf,
2295 size_t cnt, loff_t *ppos)
2296{ 2335{
2297 struct tracer_opt *trace_opts; 2336 struct tracer_opt *trace_opts;
2298 u32 tracer_flags; 2337 u32 tracer_flags;
2299 int len = 0;
2300 char *buf;
2301 int r = 0;
2302 int i; 2338 int i;
2303 2339
2304
2305 /* calculate max size */
2306 for (i = 0; trace_options[i]; i++) {
2307 len += strlen(trace_options[i]);
2308 len += 3; /* "no" and newline */
2309 }
2310
2311 mutex_lock(&trace_types_lock); 2340 mutex_lock(&trace_types_lock);
2312 tracer_flags = current_trace->flags->val; 2341 tracer_flags = current_trace->flags->val;
2313 trace_opts = current_trace->flags->opts; 2342 trace_opts = current_trace->flags->opts;
2314 2343
2315 /*
2316 * Increase the size with names of options specific
2317 * of the current tracer.
2318 */
2319 for (i = 0; trace_opts[i].name; i++) {
2320 len += strlen(trace_opts[i].name);
2321 len += 3; /* "no" and newline */
2322 }
2323
2324 /* +1 for \0 */
2325 buf = kmalloc(len + 1, GFP_KERNEL);
2326 if (!buf) {
2327 mutex_unlock(&trace_types_lock);
2328 return -ENOMEM;
2329 }
2330
2331 for (i = 0; trace_options[i]; i++) { 2344 for (i = 0; trace_options[i]; i++) {
2332 if (trace_flags & (1 << i)) 2345 if (trace_flags & (1 << i))
2333 r += sprintf(buf + r, "%s\n", trace_options[i]); 2346 seq_printf(m, "%s\n", trace_options[i]);
2334 else 2347 else
2335 r += sprintf(buf + r, "no%s\n", trace_options[i]); 2348 seq_printf(m, "no%s\n", trace_options[i]);
2336 } 2349 }
2337 2350
2338 for (i = 0; trace_opts[i].name; i++) { 2351 for (i = 0; trace_opts[i].name; i++) {
2339 if (tracer_flags & trace_opts[i].bit) 2352 if (tracer_flags & trace_opts[i].bit)
2340 r += sprintf(buf + r, "%s\n", 2353 seq_printf(m, "%s\n", trace_opts[i].name);
2341 trace_opts[i].name);
2342 else 2354 else
2343 r += sprintf(buf + r, "no%s\n", 2355 seq_printf(m, "no%s\n", trace_opts[i].name);
2344 trace_opts[i].name);
2345 } 2356 }
2346 mutex_unlock(&trace_types_lock); 2357 mutex_unlock(&trace_types_lock);
2347 2358
2348 WARN_ON(r >= len + 1); 2359 return 0;
2360}
2349 2361
2350 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2362static int __set_tracer_option(struct tracer *trace,
2363 struct tracer_flags *tracer_flags,
2364 struct tracer_opt *opts, int neg)
2365{
2366 int ret;
2351 2367
2352 kfree(buf); 2368 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2353 return r; 2369 if (ret)
2370 return ret;
2371
2372 if (neg)
2373 tracer_flags->val &= ~opts->bit;
2374 else
2375 tracer_flags->val |= opts->bit;
2376 return 0;
2354} 2377}
2355 2378
2356/* Try to assign a tracer specific option */ 2379/* Try to assign a tracer specific option */
@@ -2358,33 +2381,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2358{ 2381{
2359 struct tracer_flags *tracer_flags = trace->flags; 2382 struct tracer_flags *tracer_flags = trace->flags;
2360 struct tracer_opt *opts = NULL; 2383 struct tracer_opt *opts = NULL;
2361 int ret = 0, i = 0; 2384 int i;
2362 int len;
2363 2385
2364 for (i = 0; tracer_flags->opts[i].name; i++) { 2386 for (i = 0; tracer_flags->opts[i].name; i++) {
2365 opts = &tracer_flags->opts[i]; 2387 opts = &tracer_flags->opts[i];
2366 len = strlen(opts->name);
2367 2388
2368 if (strncmp(cmp, opts->name, len) == 0) { 2389 if (strcmp(cmp, opts->name) == 0)
2369 ret = trace->set_flag(tracer_flags->val, 2390 return __set_tracer_option(trace, trace->flags,
2370 opts->bit, !neg); 2391 opts, neg);
2371 break;
2372 }
2373 } 2392 }
2374 /* Not found */
2375 if (!tracer_flags->opts[i].name)
2376 return -EINVAL;
2377
2378 /* Refused to handle */
2379 if (ret)
2380 return ret;
2381
2382 if (neg)
2383 tracer_flags->val &= ~opts->bit;
2384 else
2385 tracer_flags->val |= opts->bit;
2386 2393
2387 return 0; 2394 return -EINVAL;
2388} 2395}
2389 2396
2390static void set_tracer_flags(unsigned int mask, int enabled) 2397static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2404,7 +2411,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2404 size_t cnt, loff_t *ppos) 2411 size_t cnt, loff_t *ppos)
2405{ 2412{
2406 char buf[64]; 2413 char buf[64];
2407 char *cmp = buf; 2414 char *cmp;
2408 int neg = 0; 2415 int neg = 0;
2409 int ret; 2416 int ret;
2410 int i; 2417 int i;
@@ -2416,16 +2423,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2416 return -EFAULT; 2423 return -EFAULT;
2417 2424
2418 buf[cnt] = 0; 2425 buf[cnt] = 0;
2426 cmp = strstrip(buf);
2419 2427
2420 if (strncmp(buf, "no", 2) == 0) { 2428 if (strncmp(cmp, "no", 2) == 0) {
2421 neg = 1; 2429 neg = 1;
2422 cmp += 2; 2430 cmp += 2;
2423 } 2431 }
2424 2432
2425 for (i = 0; trace_options[i]; i++) { 2433 for (i = 0; trace_options[i]; i++) {
2426 int len = strlen(trace_options[i]); 2434 if (strcmp(cmp, trace_options[i]) == 0) {
2427
2428 if (strncmp(cmp, trace_options[i], len) == 0) {
2429 set_tracer_flags(1 << i, !neg); 2435 set_tracer_flags(1 << i, !neg);
2430 break; 2436 break;
2431 } 2437 }
@@ -2440,14 +2446,23 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2440 return ret; 2446 return ret;
2441 } 2447 }
2442 2448
2443 filp->f_pos += cnt; 2449 *ppos += cnt;
2444 2450
2445 return cnt; 2451 return cnt;
2446} 2452}
2447 2453
2454static int tracing_trace_options_open(struct inode *inode, struct file *file)
2455{
2456 if (tracing_disabled)
2457 return -ENODEV;
2458 return single_open(file, tracing_trace_options_show, NULL);
2459}
2460
2448static const struct file_operations tracing_iter_fops = { 2461static const struct file_operations tracing_iter_fops = {
2449 .open = tracing_open_generic, 2462 .open = tracing_trace_options_open,
2450 .read = tracing_trace_options_read, 2463 .read = seq_read,
2464 .llseek = seq_lseek,
2465 .release = single_release,
2451 .write = tracing_trace_options_write, 2466 .write = tracing_trace_options_write,
2452}; 2467};
2453 2468
@@ -2582,7 +2597,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2582 } 2597 }
2583 mutex_unlock(&trace_types_lock); 2598 mutex_unlock(&trace_types_lock);
2584 2599
2585 filp->f_pos += cnt; 2600 *ppos += cnt;
2586 2601
2587 return cnt; 2602 return cnt;
2588} 2603}
@@ -2764,7 +2779,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2764 if (err) 2779 if (err)
2765 return err; 2780 return err;
2766 2781
2767 filp->f_pos += ret; 2782 *ppos += ret;
2768 2783
2769 return ret; 2784 return ret;
2770} 2785}
@@ -2897,6 +2912,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2897 else 2912 else
2898 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); 2913 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2899 2914
2915
2916 if (iter->trace->pipe_close)
2917 iter->trace->pipe_close(iter);
2918
2900 mutex_unlock(&trace_types_lock); 2919 mutex_unlock(&trace_types_lock);
2901 2920
2902 free_cpumask_var(iter->started); 2921 free_cpumask_var(iter->started);
@@ -3103,7 +3122,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3103 __free_page(spd->pages[idx]); 3122 __free_page(spd->pages[idx]);
3104} 3123}
3105 3124
3106static struct pipe_buf_operations tracing_pipe_buf_ops = { 3125static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3107 .can_merge = 0, 3126 .can_merge = 0,
3108 .map = generic_pipe_buf_map, 3127 .map = generic_pipe_buf_map,
3109 .unmap = generic_pipe_buf_unmap, 3128 .unmap = generic_pipe_buf_unmap,
@@ -3299,7 +3318,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3299 } 3318 }
3300 } 3319 }
3301 3320
3302 filp->f_pos += cnt; 3321 *ppos += cnt;
3303 3322
3304 /* If check pages failed, return ENOMEM */ 3323 /* If check pages failed, return ENOMEM */
3305 if (tracing_disabled) 3324 if (tracing_disabled)
@@ -3334,7 +3353,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3334 size_t cnt, loff_t *fpos) 3353 size_t cnt, loff_t *fpos)
3335{ 3354{
3336 char *buf; 3355 char *buf;
3337 char *end;
3338 3356
3339 if (tracing_disabled) 3357 if (tracing_disabled)
3340 return -EINVAL; 3358 return -EINVAL;
@@ -3342,7 +3360,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3342 if (cnt > TRACE_BUF_SIZE) 3360 if (cnt > TRACE_BUF_SIZE)
3343 cnt = TRACE_BUF_SIZE; 3361 cnt = TRACE_BUF_SIZE;
3344 3362
3345 buf = kmalloc(cnt + 1, GFP_KERNEL); 3363 buf = kmalloc(cnt + 2, GFP_KERNEL);
3346 if (buf == NULL) 3364 if (buf == NULL)
3347 return -ENOMEM; 3365 return -ENOMEM;
3348 3366
@@ -3350,35 +3368,31 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3350 kfree(buf); 3368 kfree(buf);
3351 return -EFAULT; 3369 return -EFAULT;
3352 } 3370 }
3371 if (buf[cnt-1] != '\n') {
3372 buf[cnt] = '\n';
3373 buf[cnt+1] = '\0';
3374 } else
3375 buf[cnt] = '\0';
3353 3376
3354 /* Cut from the first nil or newline. */ 3377 cnt = mark_printk("%s", buf);
3355 buf[cnt] = '\0';
3356 end = strchr(buf, '\n');
3357 if (end)
3358 *end = '\0';
3359
3360 cnt = mark_printk("%s\n", buf);
3361 kfree(buf); 3378 kfree(buf);
3362 *fpos += cnt; 3379 *fpos += cnt;
3363 3380
3364 return cnt; 3381 return cnt;
3365} 3382}
3366 3383
3367static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf, 3384static int tracing_clock_show(struct seq_file *m, void *v)
3368 size_t cnt, loff_t *ppos)
3369{ 3385{
3370 char buf[64];
3371 int bufiter = 0;
3372 int i; 3386 int i;
3373 3387
3374 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 3388 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3375 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, 3389 seq_printf(m,
3376 "%s%s%s%s", i ? " " : "", 3390 "%s%s%s%s", i ? " " : "",
3377 i == trace_clock_id ? "[" : "", trace_clocks[i].name, 3391 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3378 i == trace_clock_id ? "]" : ""); 3392 i == trace_clock_id ? "]" : "");
3379 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n"); 3393 seq_putc(m, '\n');
3380 3394
3381 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter); 3395 return 0;
3382} 3396}
3383 3397
3384static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 3398static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3420,6 +3434,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3420 return cnt; 3434 return cnt;
3421} 3435}
3422 3436
3437static int tracing_clock_open(struct inode *inode, struct file *file)
3438{
3439 if (tracing_disabled)
3440 return -ENODEV;
3441 return single_open(file, tracing_clock_show, NULL);
3442}
3443
3423static const struct file_operations tracing_max_lat_fops = { 3444static const struct file_operations tracing_max_lat_fops = {
3424 .open = tracing_open_generic, 3445 .open = tracing_open_generic,
3425 .read = tracing_max_lat_read, 3446 .read = tracing_max_lat_read,
@@ -3458,8 +3479,10 @@ static const struct file_operations tracing_mark_fops = {
3458}; 3479};
3459 3480
3460static const struct file_operations trace_clock_fops = { 3481static const struct file_operations trace_clock_fops = {
3461 .open = tracing_open_generic, 3482 .open = tracing_clock_open,
3462 .read = tracing_clock_read, 3483 .read = seq_read,
3484 .llseek = seq_lseek,
3485 .release = single_release,
3463 .write = tracing_clock_write, 3486 .write = tracing_clock_write,
3464}; 3487};
3465 3488
@@ -3589,7 +3612,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3589} 3612}
3590 3613
3591/* Pipe buffer operations for a buffer. */ 3614/* Pipe buffer operations for a buffer. */
3592static struct pipe_buf_operations buffer_pipe_buf_ops = { 3615static const struct pipe_buf_operations buffer_pipe_buf_ops = {
3593 .can_merge = 0, 3616 .can_merge = 0,
3594 .map = generic_pipe_buf_map, 3617 .map = generic_pipe_buf_map,
3595 .unmap = generic_pipe_buf_unmap, 3618 .unmap = generic_pipe_buf_unmap,
@@ -3730,7 +3753,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3730 3753
3731 s = kmalloc(sizeof(*s), GFP_KERNEL); 3754 s = kmalloc(sizeof(*s), GFP_KERNEL);
3732 if (!s) 3755 if (!s)
3733 return ENOMEM; 3756 return -ENOMEM;
3734 3757
3735 trace_seq_init(s); 3758 trace_seq_init(s);
3736 3759
@@ -3920,39 +3943,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3920 if (ret < 0) 3943 if (ret < 0)
3921 return ret; 3944 return ret;
3922 3945
3923 ret = 0; 3946 if (val != 0 && val != 1)
3924 switch (val) { 3947 return -EINVAL;
3925 case 0:
3926 /* do nothing if already cleared */
3927 if (!(topt->flags->val & topt->opt->bit))
3928 break;
3929
3930 mutex_lock(&trace_types_lock);
3931 if (current_trace->set_flag)
3932 ret = current_trace->set_flag(topt->flags->val,
3933 topt->opt->bit, 0);
3934 mutex_unlock(&trace_types_lock);
3935 if (ret)
3936 return ret;
3937 topt->flags->val &= ~topt->opt->bit;
3938 break;
3939 case 1:
3940 /* do nothing if already set */
3941 if (topt->flags->val & topt->opt->bit)
3942 break;
3943 3948
3949 if (!!(topt->flags->val & topt->opt->bit) != val) {
3944 mutex_lock(&trace_types_lock); 3950 mutex_lock(&trace_types_lock);
3945 if (current_trace->set_flag) 3951 ret = __set_tracer_option(current_trace, topt->flags,
3946 ret = current_trace->set_flag(topt->flags->val, 3952 topt->opt, !val);
3947 topt->opt->bit, 1);
3948 mutex_unlock(&trace_types_lock); 3953 mutex_unlock(&trace_types_lock);
3949 if (ret) 3954 if (ret)
3950 return ret; 3955 return ret;
3951 topt->flags->val |= topt->opt->bit;
3952 break;
3953
3954 default:
3955 return -EINVAL;
3956 } 3956 }
3957 3957
3958 *ppos += cnt; 3958 *ppos += cnt;
@@ -4279,8 +4279,8 @@ trace_printk_seq(struct trace_seq *s)
4279 4279
4280static void __ftrace_dump(bool disable_tracing) 4280static void __ftrace_dump(bool disable_tracing)
4281{ 4281{
4282 static raw_spinlock_t ftrace_dump_lock = 4282 static arch_spinlock_t ftrace_dump_lock =
4283 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 4283 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
4284 /* use static because iter can be a bit big for the stack */ 4284 /* use static because iter can be a bit big for the stack */
4285 static struct trace_iterator iter; 4285 static struct trace_iterator iter;
4286 unsigned int old_userobj; 4286 unsigned int old_userobj;
@@ -4290,7 +4290,7 @@ static void __ftrace_dump(bool disable_tracing)
4290 4290
4291 /* only one dump */ 4291 /* only one dump */
4292 local_irq_save(flags); 4292 local_irq_save(flags);
4293 __raw_spin_lock(&ftrace_dump_lock); 4293 arch_spin_lock(&ftrace_dump_lock);
4294 if (dump_ran) 4294 if (dump_ran)
4295 goto out; 4295 goto out;
4296 4296
@@ -4365,7 +4365,7 @@ static void __ftrace_dump(bool disable_tracing)
4365 } 4365 }
4366 4366
4367 out: 4367 out:
4368 __raw_spin_unlock(&ftrace_dump_lock); 4368 arch_spin_unlock(&ftrace_dump_lock);
4369 local_irq_restore(flags); 4369 local_irq_restore(flags);
4370} 4370}
4371 4371
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 542f45554883..4df6a77eb196 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -98,9 +100,32 @@ struct syscall_trace_enter {
98struct syscall_trace_exit { 100struct syscall_trace_exit {
99 struct trace_entry ent; 101 struct trace_entry ent;
100 int nr; 102 int nr;
101 unsigned long ret; 103 long ret;
102}; 104};
103 105
106struct kprobe_trace_entry {
107 struct trace_entry ent;
108 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111};
112
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent;
119 unsigned long func;
120 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123};
124
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
104/* 129/*
105 * trace_flag_type is an enumeration that holds different 130 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 131 * states when a trace occurs. These are:
@@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void);
209 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
211 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
212 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
213 } while (0) 239 } while (0)
214 240
@@ -246,6 +272,7 @@ struct tracer_flags {
246 * @pipe_open: called when the trace_pipe file is opened 272 * @pipe_open: called when the trace_pipe file is opened
247 * @wait_pipe: override how the user waits for traces on trace_pipe 273 * @wait_pipe: override how the user waits for traces on trace_pipe
248 * @close: called when the trace file is released 274 * @close: called when the trace file is released
275 * @pipe_close: called when the trace_pipe file is released
249 * @read: override the default read callback on trace_pipe 276 * @read: override the default read callback on trace_pipe
250 * @splice_read: override the default splice_read callback on trace_pipe 277 * @splice_read: override the default splice_read callback on trace_pipe
251 * @selftest: selftest to run on boot (see trace_selftest.c) 278 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -264,6 +291,7 @@ struct tracer {
264 void (*pipe_open)(struct trace_iterator *iter); 291 void (*pipe_open)(struct trace_iterator *iter);
265 void (*wait_pipe)(struct trace_iterator *iter); 292 void (*wait_pipe)(struct trace_iterator *iter);
266 void (*close)(struct trace_iterator *iter); 293 void (*close)(struct trace_iterator *iter);
294 void (*pipe_close)(struct trace_iterator *iter);
267 ssize_t (*read)(struct trace_iterator *iter, 295 ssize_t (*read)(struct trace_iterator *iter,
268 struct file *filp, char __user *ubuf, 296 struct file *filp, char __user *ubuf,
269 size_t cnt, loff_t *ppos); 297 size_t cnt, loff_t *ppos);
@@ -364,6 +392,8 @@ int register_tracer(struct tracer *type);
364void unregister_tracer(struct tracer *type); 392void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void); 393int is_tracing_stopped(void);
366 394
395extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396
367extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
368 398
369#ifdef CONFIG_TRACER_MAX_TRACE 399#ifdef CONFIG_TRACER_MAX_TRACE
@@ -438,6 +468,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
438 struct trace_array *tr); 468 struct trace_array *tr);
439extern int trace_selftest_startup_hw_branches(struct tracer *trace, 469extern int trace_selftest_startup_hw_branches(struct tracer *trace,
440 struct trace_array *tr); 470 struct trace_array *tr);
471extern int trace_selftest_startup_ksym(struct tracer *trace,
472 struct trace_array *tr);
441#endif /* CONFIG_FTRACE_STARTUP_TEST */ 473#endif /* CONFIG_FTRACE_STARTUP_TEST */
442 474
443extern void *head_page(struct trace_array_cpu *data); 475extern void *head_page(struct trace_array_cpu *data);
@@ -483,10 +515,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
483 return 0; 515 return 0;
484} 516}
485#else 517#else
486static inline int ftrace_trace_addr(unsigned long addr)
487{
488 return 1;
489}
490static inline int ftrace_graph_addr(unsigned long addr) 518static inline int ftrace_graph_addr(unsigned long addr)
491{ 519{
492 return 1; 520 return 1;
@@ -500,12 +528,12 @@ print_graph_function(struct trace_iterator *iter)
500} 528}
501#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 529#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
502 530
503extern struct pid *ftrace_pid_trace; 531extern struct list_head ftrace_pids;
504 532
505#ifdef CONFIG_FUNCTION_TRACER 533#ifdef CONFIG_FUNCTION_TRACER
506static inline int ftrace_trace_task(struct task_struct *task) 534static inline int ftrace_trace_task(struct task_struct *task)
507{ 535{
508 if (!ftrace_pid_trace) 536 if (list_empty(&ftrace_pids))
509 return 1; 537 return 1;
510 538
511 return test_tsk_trace_trace(task); 539 return test_tsk_trace_trace(task);
@@ -569,18 +597,17 @@ enum trace_iterator_flags {
569 TRACE_ITER_BIN = 0x40, 597 TRACE_ITER_BIN = 0x40,
570 TRACE_ITER_BLOCK = 0x80, 598 TRACE_ITER_BLOCK = 0x80,
571 TRACE_ITER_STACKTRACE = 0x100, 599 TRACE_ITER_STACKTRACE = 0x100,
572 TRACE_ITER_SCHED_TREE = 0x200, 600 TRACE_ITER_PRINTK = 0x200,
573 TRACE_ITER_PRINTK = 0x400, 601 TRACE_ITER_PREEMPTONLY = 0x400,
574 TRACE_ITER_PREEMPTONLY = 0x800, 602 TRACE_ITER_BRANCH = 0x800,
575 TRACE_ITER_BRANCH = 0x1000, 603 TRACE_ITER_ANNOTATE = 0x1000,
576 TRACE_ITER_ANNOTATE = 0x2000, 604 TRACE_ITER_USERSTACKTRACE = 0x2000,
577 TRACE_ITER_USERSTACKTRACE = 0x4000, 605 TRACE_ITER_SYM_USEROBJ = 0x4000,
578 TRACE_ITER_SYM_USEROBJ = 0x8000, 606 TRACE_ITER_PRINTK_MSGONLY = 0x8000,
579 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 607 TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
580 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 608 TRACE_ITER_LATENCY_FMT = 0x20000,
581 TRACE_ITER_LATENCY_FMT = 0x40000, 609 TRACE_ITER_SLEEP_TIME = 0x40000,
582 TRACE_ITER_SLEEP_TIME = 0x80000, 610 TRACE_ITER_GRAPH_TIME = 0x80000,
583 TRACE_ITER_GRAPH_TIME = 0x100000,
584}; 611};
585 612
586/* 613/*
@@ -687,7 +714,6 @@ struct event_filter {
687 int n_preds; 714 int n_preds;
688 struct filter_pred **preds; 715 struct filter_pred **preds;
689 char *filter_string; 716 char *filter_string;
690 bool no_reset;
691}; 717};
692 718
693struct event_subsystem { 719struct event_subsystem {
@@ -699,22 +725,40 @@ struct event_subsystem {
699}; 725};
700 726
701struct filter_pred; 727struct filter_pred;
728struct regex;
702 729
703typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 730typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
704 int val1, int val2); 731 int val1, int val2);
705 732
733typedef int (*regex_match_func)(char *str, struct regex *r, int len);
734
735enum regex_type {
736 MATCH_FULL = 0,
737 MATCH_FRONT_ONLY,
738 MATCH_MIDDLE_ONLY,
739 MATCH_END_ONLY,
740};
741
742struct regex {
743 char pattern[MAX_FILTER_STR_VAL];
744 int len;
745 int field_len;
746 regex_match_func match;
747};
748
706struct filter_pred { 749struct filter_pred {
707 filter_pred_fn_t fn; 750 filter_pred_fn_t fn;
708 u64 val; 751 u64 val;
709 char str_val[MAX_FILTER_STR_VAL]; 752 struct regex regex;
710 int str_len; 753 char *field_name;
711 char *field_name; 754 int offset;
712 int offset; 755 int not;
713 int not; 756 int op;
714 int op; 757 int pop_n;
715 int pop_n;
716}; 758};
717 759
760extern enum regex_type
761filter_parse_regex(char *buff, int len, char **search, int *not);
718extern void print_event_filter(struct ftrace_event_call *call, 762extern void print_event_filter(struct ftrace_event_call *call,
719 struct trace_seq *s); 763 struct trace_seq *s);
720extern int apply_event_filter(struct ftrace_event_call *call, 764extern int apply_event_filter(struct ftrace_event_call *call,
@@ -730,7 +774,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
730 struct ring_buffer *buffer, 774 struct ring_buffer *buffer,
731 struct ring_buffer_event *event) 775 struct ring_buffer_event *event)
732{ 776{
733 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { 777 if (unlikely(call->filter_active) &&
778 !filter_match_preds(call->filter, rec)) {
734 ring_buffer_discard_commit(buffer, event); 779 ring_buffer_discard_commit(buffer, event);
735 return 1; 780 return 1;
736 } 781 }
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 7a7a9fd249a9..4a194f08f88c 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -34,6 +34,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
34 struct trace_array *tr = branch_tracer; 34 struct trace_array *tr = branch_tracer;
35 struct ring_buffer_event *event; 35 struct ring_buffer_event *event;
36 struct trace_branch *entry; 36 struct trace_branch *entry;
37 struct ring_buffer *buffer;
37 unsigned long flags; 38 unsigned long flags;
38 int cpu, pc; 39 int cpu, pc;
39 const char *p; 40 const char *p;
@@ -54,7 +55,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
54 goto out; 55 goto out;
55 56
56 pc = preempt_count(); 57 pc = preempt_count();
57 event = trace_buffer_lock_reserve(tr, TRACE_BRANCH, 58 buffer = tr->buffer;
59 event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH,
58 sizeof(*entry), flags, pc); 60 sizeof(*entry), flags, pc);
59 if (!event) 61 if (!event)
60 goto out; 62 goto out;
@@ -74,8 +76,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
74 entry->line = f->line; 76 entry->line = f->line;
75 entry->correct = val == expect; 77 entry->correct = val == expect;
76 78
77 if (!filter_check_discard(call, entry, tr->buffer, event)) 79 if (!filter_check_discard(call, entry, buffer, event))
78 ring_buffer_unlock_commit(tr->buffer, event); 80 ring_buffer_unlock_commit(buffer, event);
79 81
80 out: 82 out:
81 atomic_dec(&tr->data[cpu]->disabled); 83 atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 20c5f92e28a8..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -20,6 +20,8 @@
20#include <linux/ktime.h> 20#include <linux/ktime.h>
21#include <linux/trace_clock.h> 21#include <linux/trace_clock.h>
22 22
23#include "trace.h"
24
23/* 25/*
24 * trace_clock_local(): the simplest and least coherent tracing clock. 26 * trace_clock_local(): the simplest and least coherent tracing clock.
25 * 27 *
@@ -28,17 +30,17 @@
28 */ 30 */
29u64 notrace trace_clock_local(void) 31u64 notrace trace_clock_local(void)
30{ 32{
31 unsigned long flags;
32 u64 clock; 33 u64 clock;
34 int resched;
33 35
34 /* 36 /*
35 * sched_clock() is an architecture implemented, fast, scalable, 37 * sched_clock() is an architecture implemented, fast, scalable,
36 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
37 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
38 */ 40 */
39 raw_local_irq_save(flags); 41 resched = ftrace_preempt_disable();
40 clock = sched_clock(); 42 clock = sched_clock();
41 raw_local_irq_restore(flags); 43 ftrace_preempt_enable(resched);
42 44
43 return clock; 45 return clock;
44} 46}
@@ -69,10 +71,10 @@ u64 notrace trace_clock(void)
69/* keep prev_time and lock in the same cacheline. */ 71/* keep prev_time and lock in the same cacheline. */
70static struct { 72static struct {
71 u64 prev_time; 73 u64 prev_time;
72 raw_spinlock_t lock; 74 arch_spinlock_t lock;
73} trace_clock_struct ____cacheline_aligned_in_smp = 75} trace_clock_struct ____cacheline_aligned_in_smp =
74 { 76 {
75 .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, 77 .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
76 }; 78 };
77 79
78u64 notrace trace_clock_global(void) 80u64 notrace trace_clock_global(void)
@@ -92,7 +94,7 @@ u64 notrace trace_clock_global(void)
92 if (unlikely(in_nmi())) 94 if (unlikely(in_nmi()))
93 goto out; 95 goto out;
94 96
95 __raw_spin_lock(&trace_clock_struct.lock); 97 arch_spin_lock(&trace_clock_struct.lock);
96 98
97 /* 99 /*
98 * TODO: if this happens often then maybe we should reset 100 * TODO: if this happens often then maybe we should reset
@@ -104,7 +106,7 @@ u64 notrace trace_clock_global(void)
104 106
105 trace_clock_struct.prev_time = now; 107 trace_clock_struct.prev_time = now;
106 108
107 __raw_spin_unlock(&trace_clock_struct.lock); 109 arch_spin_unlock(&trace_clock_struct.lock);
108 110
109 out: 111 out:
110 raw_local_irq_restore(flags); 112 raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index dd44b8768867..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,17 +8,14 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12char *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
19 17
20char *trace_profile_buf_nmi; 18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22 19
23/* Count the events in use (per event id, not per instance) */ 20/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 21static int total_profile_count;
@@ -28,33 +25,38 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
28 char *buf; 25 char *buf;
29 int ret = -ENOMEM; 26 int ret = -ENOMEM;
30 27
31 if (atomic_inc_return(&event->profile_count)) 28 if (event->profile_count++ > 0)
32 return 0; 29 return 0;
33 30
34 if (!total_profile_count++) { 31 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 32 buf = (char *)alloc_percpu(perf_trace_t);
36 if (!buf) 33 if (!buf)
37 goto fail_buf; 34 goto fail_buf;
38 35
39 rcu_assign_pointer(trace_profile_buf, buf); 36 rcu_assign_pointer(perf_trace_buf, buf);
40 37
41 buf = (char *)alloc_percpu(profile_buf_t); 38 buf = (char *)alloc_percpu(perf_trace_t);
42 if (!buf) 39 if (!buf)
43 goto fail_buf_nmi; 40 goto fail_buf_nmi;
44 41
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 42 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 43 }
47 44
48 ret = event->profile_enable(); 45 ret = event->profile_enable(event);
49 if (!ret) 46 if (!ret) {
47 total_profile_count++;
50 return 0; 48 return 0;
49 }
51 50
52 kfree(trace_profile_buf_nmi);
53fail_buf_nmi: 51fail_buf_nmi:
54 kfree(trace_profile_buf); 52 if (!total_profile_count) {
53 free_percpu(perf_trace_buf_nmi);
54 free_percpu(perf_trace_buf);
55 perf_trace_buf_nmi = NULL;
56 perf_trace_buf = NULL;
57 }
55fail_buf: 58fail_buf:
56 total_profile_count--; 59 event->profile_count--;
57 atomic_dec(&event->profile_count);
58 60
59 return ret; 61 return ret;
60} 62}
@@ -81,17 +83,17 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
81{ 83{
82 char *buf, *nmi_buf; 84 char *buf, *nmi_buf;
83 85
84 if (!atomic_add_negative(-1, &event->profile_count)) 86 if (--event->profile_count > 0)
85 return; 87 return;
86 88
87 event->profile_disable(); 89 event->profile_disable(event);
88 90
89 if (!--total_profile_count) { 91 if (!--total_profile_count) {
90 buf = trace_profile_buf; 92 buf = perf_trace_buf;
91 rcu_assign_pointer(trace_profile_buf, NULL); 93 rcu_assign_pointer(perf_trace_buf, NULL);
92 94
93 nmi_buf = trace_profile_buf_nmi; 95 nmi_buf = perf_trace_buf_nmi;
94 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 96 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
95 97
96 /* 98 /*
97 * Ensure every events in profiling have finished before 99 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d128f65778e6..189b09baf4fb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
78 if (ret) \ 78 if (ret) \
79 return ret; 79 return ret;
80 80
81int trace_define_common_fields(struct ftrace_event_call *call) 81static int trace_define_common_fields(struct ftrace_event_call *call)
82{ 82{
83 int ret; 83 int ret;
84 struct trace_entry ent; 84 struct trace_entry ent;
@@ -91,11 +91,8 @@ int trace_define_common_fields(struct ftrace_event_call *call)
91 91
92 return ret; 92 return ret;
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 94
96#ifdef CONFIG_MODULES 95void trace_destroy_fields(struct ftrace_event_call *call)
97
98static void trace_destroy_fields(struct ftrace_event_call *call)
99{ 96{
100 struct ftrace_event_field *field, *next; 97 struct ftrace_event_field *field, *next;
101 98
@@ -107,27 +104,49 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 104 }
108} 105}
109 106
110#endif /* CONFIG_MODULES */ 107int trace_event_raw_init(struct ftrace_event_call *call)
108{
109 int id;
110
111 id = register_ftrace_event(call->event);
112 if (!id)
113 return -ENODEV;
114 call->id = id;
115 INIT_LIST_HEAD(&call->fields);
116
117 return 0;
118}
119EXPORT_SYMBOL_GPL(trace_event_raw_init);
111 120
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 121static int ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 122 int enable)
114{ 123{
124 int ret = 0;
125
115 switch (enable) { 126 switch (enable) {
116 case 0: 127 case 0:
117 if (call->enabled) { 128 if (call->enabled) {
118 call->enabled = 0; 129 call->enabled = 0;
119 tracing_stop_cmdline_record(); 130 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 131 call->unregfunc(call);
121 } 132 }
122 break; 133 break;
123 case 1: 134 case 1:
124 if (!call->enabled) { 135 if (!call->enabled) {
125 call->enabled = 1;
126 tracing_start_cmdline_record(); 136 tracing_start_cmdline_record();
127 call->regfunc(call->data); 137 ret = call->regfunc(call);
138 if (ret) {
139 tracing_stop_cmdline_record();
140 pr_info("event trace: Could not enable event "
141 "%s\n", call->name);
142 break;
143 }
144 call->enabled = 1;
128 } 145 }
129 break; 146 break;
130 } 147 }
148
149 return ret;
131} 150}
132 151
133static void ftrace_clear_events(void) 152static void ftrace_clear_events(void)
@@ -406,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
406 case 0: 425 case 0:
407 case 1: 426 case 1:
408 mutex_lock(&event_mutex); 427 mutex_lock(&event_mutex);
409 ftrace_event_enable_disable(call, val); 428 ret = ftrace_event_enable_disable(call, val);
410 mutex_unlock(&event_mutex); 429 mutex_unlock(&event_mutex);
411 break; 430 break;
412 431
@@ -416,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
416 435
417 *ppos += cnt; 436 *ppos += cnt;
418 437
419 return cnt; 438 return ret ? ret : cnt;
420} 439}
421 440
422static ssize_t 441static ssize_t
@@ -507,7 +526,7 @@ extern char *__bad_type_size(void);
507#define FIELD(type, name) \ 526#define FIELD(type, name) \
508 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ 527 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
509 #type, "common_" #name, offsetof(typeof(field), name), \ 528 #type, "common_" #name, offsetof(typeof(field), name), \
510 sizeof(field.name) 529 sizeof(field.name), is_signed_type(type)
511 530
512static int trace_write_header(struct trace_seq *s) 531static int trace_write_header(struct trace_seq *s)
513{ 532{
@@ -515,17 +534,17 @@ static int trace_write_header(struct trace_seq *s)
515 534
516 /* struct trace_entry */ 535 /* struct trace_entry */
517 return trace_seq_printf(s, 536 return trace_seq_printf(s,
518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 537 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
519 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 538 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
520 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 539 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
521 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 540 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
522 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 541 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
523 "\n", 542 "\n",
524 FIELD(unsigned short, type), 543 FIELD(unsigned short, type),
525 FIELD(unsigned char, flags), 544 FIELD(unsigned char, flags),
526 FIELD(unsigned char, preempt_count), 545 FIELD(unsigned char, preempt_count),
527 FIELD(int, pid), 546 FIELD(int, pid),
528 FIELD(int, lock_depth)); 547 FIELD(int, lock_depth));
529} 548}
530 549
531static ssize_t 550static ssize_t
@@ -878,9 +897,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
878 "'%s/filter' entry\n", name); 897 "'%s/filter' entry\n", name);
879 } 898 }
880 899
881 entry = trace_create_file("enable", 0644, system->entry, 900 trace_create_file("enable", 0644, system->entry,
882 (void *)system->name, 901 (void *)system->name,
883 &ftrace_system_enable_fops); 902 &ftrace_system_enable_fops);
884 903
885 return system->entry; 904 return system->entry;
886} 905}
@@ -892,7 +911,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
892 const struct file_operations *filter, 911 const struct file_operations *filter,
893 const struct file_operations *format) 912 const struct file_operations *format)
894{ 913{
895 struct dentry *entry;
896 int ret; 914 int ret;
897 915
898 /* 916 /*
@@ -910,55 +928,76 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
910 } 928 }
911 929
912 if (call->regfunc) 930 if (call->regfunc)
913 entry = trace_create_file("enable", 0644, call->dir, call, 931 trace_create_file("enable", 0644, call->dir, call,
914 enable); 932 enable);
915 933
916 if (call->id && call->profile_enable) 934 if (call->id && call->profile_enable)
917 entry = trace_create_file("id", 0444, call->dir, call, 935 trace_create_file("id", 0444, call->dir, call,
918 id); 936 id);
919 937
920 if (call->define_fields) { 938 if (call->define_fields) {
921 ret = call->define_fields(call); 939 ret = trace_define_common_fields(call);
940 if (!ret)
941 ret = call->define_fields(call);
922 if (ret < 0) { 942 if (ret < 0) {
923 pr_warning("Could not initialize trace point" 943 pr_warning("Could not initialize trace point"
924 " events/%s\n", call->name); 944 " events/%s\n", call->name);
925 return ret; 945 return ret;
926 } 946 }
927 entry = trace_create_file("filter", 0644, call->dir, call, 947 trace_create_file("filter", 0644, call->dir, call,
928 filter); 948 filter);
929 } 949 }
930 950
931 /* A trace may not want to export its format */ 951 /* A trace may not want to export its format */
932 if (!call->show_format) 952 if (!call->show_format)
933 return 0; 953 return 0;
934 954
935 entry = trace_create_file("format", 0444, call->dir, call, 955 trace_create_file("format", 0444, call->dir, call,
936 format); 956 format);
937 957
938 return 0; 958 return 0;
939} 959}
940 960
941#define for_each_event(event, start, end) \ 961static int __trace_add_event_call(struct ftrace_event_call *call)
942 for (event = start; \ 962{
943 (unsigned long)event < (unsigned long)end; \ 963 struct dentry *d_events;
944 event++) 964 int ret;
945 965
946#ifdef CONFIG_MODULES 966 if (!call->name)
967 return -EINVAL;
947 968
948static LIST_HEAD(ftrace_module_file_list); 969 if (call->raw_init) {
970 ret = call->raw_init(call);
971 if (ret < 0) {
972 if (ret != -ENOSYS)
973 pr_warning("Could not initialize trace "
974 "events/%s\n", call->name);
975 return ret;
976 }
977 }
949 978
950/* 979 d_events = event_trace_events_dir();
951 * Modules must own their file_operations to keep up with 980 if (!d_events)
952 * reference counting. 981 return -ENOENT;
953 */ 982
954struct ftrace_module_file_ops { 983 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
955 struct list_head list; 984 &ftrace_enable_fops, &ftrace_event_filter_fops,
956 struct module *mod; 985 &ftrace_event_format_fops);
957 struct file_operations id; 986 if (!ret)
958 struct file_operations enable; 987 list_add(&call->list, &ftrace_events);
959 struct file_operations format; 988
960 struct file_operations filter; 989 return ret;
961}; 990}
991
992/* Add an additional event_call dynamically */
993int trace_add_event_call(struct ftrace_event_call *call)
994{
995 int ret;
996 mutex_lock(&event_mutex);
997 ret = __trace_add_event_call(call);
998 mutex_unlock(&event_mutex);
999 return ret;
1000}
962 1001
963static void remove_subsystem_dir(const char *name) 1002static void remove_subsystem_dir(const char *name)
964{ 1003{
@@ -986,6 +1025,53 @@ static void remove_subsystem_dir(const char *name)
986 } 1025 }
987} 1026}
988 1027
1028/*
1029 * Must be called under locking both of event_mutex and trace_event_mutex.
1030 */
1031static void __trace_remove_event_call(struct ftrace_event_call *call)
1032{
1033 ftrace_event_enable_disable(call, 0);
1034 if (call->event)
1035 __unregister_ftrace_event(call->event);
1036 debugfs_remove_recursive(call->dir);
1037 list_del(&call->list);
1038 trace_destroy_fields(call);
1039 destroy_preds(call);
1040 remove_subsystem_dir(call->system);
1041}
1042
1043/* Remove an event_call */
1044void trace_remove_event_call(struct ftrace_event_call *call)
1045{
1046 mutex_lock(&event_mutex);
1047 down_write(&trace_event_mutex);
1048 __trace_remove_event_call(call);
1049 up_write(&trace_event_mutex);
1050 mutex_unlock(&event_mutex);
1051}
1052
1053#define for_each_event(event, start, end) \
1054 for (event = start; \
1055 (unsigned long)event < (unsigned long)end; \
1056 event++)
1057
1058#ifdef CONFIG_MODULES
1059
1060static LIST_HEAD(ftrace_module_file_list);
1061
1062/*
1063 * Modules must own their file_operations to keep up with
1064 * reference counting.
1065 */
1066struct ftrace_module_file_ops {
1067 struct list_head list;
1068 struct module *mod;
1069 struct file_operations id;
1070 struct file_operations enable;
1071 struct file_operations format;
1072 struct file_operations filter;
1073};
1074
989static struct ftrace_module_file_ops * 1075static struct ftrace_module_file_ops *
990trace_create_file_ops(struct module *mod) 1076trace_create_file_ops(struct module *mod)
991{ 1077{
@@ -1043,7 +1129,7 @@ static void trace_module_add_events(struct module *mod)
1043 if (!call->name) 1129 if (!call->name)
1044 continue; 1130 continue;
1045 if (call->raw_init) { 1131 if (call->raw_init) {
1046 ret = call->raw_init(); 1132 ret = call->raw_init(call);
1047 if (ret < 0) { 1133 if (ret < 0) {
1048 if (ret != -ENOSYS) 1134 if (ret != -ENOSYS)
1049 pr_warning("Could not initialize trace " 1135 pr_warning("Could not initialize trace "
@@ -1061,10 +1147,11 @@ static void trace_module_add_events(struct module *mod)
1061 return; 1147 return;
1062 } 1148 }
1063 call->mod = mod; 1149 call->mod = mod;
1064 list_add(&call->list, &ftrace_events); 1150 ret = event_create_dir(call, d_events,
1065 event_create_dir(call, d_events, 1151 &file_ops->id, &file_ops->enable,
1066 &file_ops->id, &file_ops->enable, 1152 &file_ops->filter, &file_ops->format);
1067 &file_ops->filter, &file_ops->format); 1153 if (!ret)
1154 list_add(&call->list, &ftrace_events);
1068 } 1155 }
1069} 1156}
1070 1157
@@ -1078,14 +1165,7 @@ static void trace_module_remove_events(struct module *mod)
1078 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1165 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1079 if (call->mod == mod) { 1166 if (call->mod == mod) {
1080 found = true; 1167 found = true;
1081 ftrace_event_enable_disable(call, 0); 1168 __trace_remove_event_call(call);
1082 if (call->event)
1083 __unregister_ftrace_event(call->event);
1084 debugfs_remove_recursive(call->dir);
1085 list_del(&call->list);
1086 trace_destroy_fields(call);
1087 destroy_preds(call);
1088 remove_subsystem_dir(call->system);
1089 } 1169 }
1090 } 1170 }
1091 1171
@@ -1203,7 +1283,7 @@ static __init int event_trace_init(void)
1203 if (!call->name) 1283 if (!call->name)
1204 continue; 1284 continue;
1205 if (call->raw_init) { 1285 if (call->raw_init) {
1206 ret = call->raw_init(); 1286 ret = call->raw_init(call);
1207 if (ret < 0) { 1287 if (ret < 0) {
1208 if (ret != -ENOSYS) 1288 if (ret != -ENOSYS)
1209 pr_warning("Could not initialize trace " 1289 pr_warning("Could not initialize trace "
@@ -1211,10 +1291,12 @@ static __init int event_trace_init(void)
1211 continue; 1291 continue;
1212 } 1292 }
1213 } 1293 }
1214 list_add(&call->list, &ftrace_events); 1294 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1215 event_create_dir(call, d_events, &ftrace_event_id_fops, 1295 &ftrace_enable_fops,
1216 &ftrace_enable_fops, &ftrace_event_filter_fops, 1296 &ftrace_event_filter_fops,
1217 &ftrace_event_format_fops); 1297 &ftrace_event_format_fops);
1298 if (!ret)
1299 list_add(&call->list, &ftrace_events);
1218 } 1300 }
1219 1301
1220 while (true) { 1302 while (true) {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 23245785927f..50504cb228de 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -18,11 +18,10 @@
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */ 19 */
20 20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/ctype.h> 22#include <linux/ctype.h>
25#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h>
26 25
27#include "trace.h" 26#include "trace.h"
28#include "trace_output.h" 27#include "trace_output.h"
@@ -31,6 +30,7 @@ enum filter_op_ids
31{ 30{
32 OP_OR, 31 OP_OR,
33 OP_AND, 32 OP_AND,
33 OP_GLOB,
34 OP_NE, 34 OP_NE,
35 OP_EQ, 35 OP_EQ,
36 OP_LT, 36 OP_LT,
@@ -48,16 +48,17 @@ struct filter_op {
48}; 48};
49 49
50static struct filter_op filter_ops[] = { 50static struct filter_op filter_ops[] = {
51 { OP_OR, "||", 1 }, 51 { OP_OR, "||", 1 },
52 { OP_AND, "&&", 2 }, 52 { OP_AND, "&&", 2 },
53 { OP_NE, "!=", 4 }, 53 { OP_GLOB, "~", 4 },
54 { OP_EQ, "==", 4 }, 54 { OP_NE, "!=", 4 },
55 { OP_LT, "<", 5 }, 55 { OP_EQ, "==", 4 },
56 { OP_LE, "<=", 5 }, 56 { OP_LT, "<", 5 },
57 { OP_GT, ">", 5 }, 57 { OP_LE, "<=", 5 },
58 { OP_GE, ">=", 5 }, 58 { OP_GT, ">", 5 },
59 { OP_NONE, "OP_NONE", 0 }, 59 { OP_GE, ">=", 5 },
60 { OP_OPEN_PAREN, "(", 0 }, 60 { OP_NONE, "OP_NONE", 0 },
61 { OP_OPEN_PAREN, "(", 0 },
61}; 62};
62 63
63enum { 64enum {
@@ -197,9 +198,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
197 char *addr = (char *)(event + pred->offset); 198 char *addr = (char *)(event + pred->offset);
198 int cmp, match; 199 int cmp, match;
199 200
200 cmp = strncmp(addr, pred->str_val, pred->str_len); 201 cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
201 202
202 match = (!cmp) ^ pred->not; 203 match = cmp ^ pred->not;
203 204
204 return match; 205 return match;
205} 206}
@@ -211,9 +212,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
211 char **addr = (char **)(event + pred->offset); 212 char **addr = (char **)(event + pred->offset);
212 int cmp, match; 213 int cmp, match;
213 214
214 cmp = strncmp(*addr, pred->str_val, pred->str_len); 215 cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
215 216
216 match = (!cmp) ^ pred->not; 217 match = cmp ^ pred->not;
217 218
218 return match; 219 return match;
219} 220}
@@ -237,9 +238,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
237 char *addr = (char *)(event + str_loc); 238 char *addr = (char *)(event + str_loc);
238 int cmp, match; 239 int cmp, match;
239 240
240 cmp = strncmp(addr, pred->str_val, str_len); 241 cmp = pred->regex.match(addr, &pred->regex, str_len);
241 242
242 match = (!cmp) ^ pred->not; 243 match = cmp ^ pred->not;
243 244
244 return match; 245 return match;
245} 246}
@@ -250,10 +251,121 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
250 return 0; 251 return 0;
251} 252}
252 253
254/* Basic regex callbacks */
255static int regex_match_full(char *str, struct regex *r, int len)
256{
257 if (strncmp(str, r->pattern, len) == 0)
258 return 1;
259 return 0;
260}
261
262static int regex_match_front(char *str, struct regex *r, int len)
263{
264 if (strncmp(str, r->pattern, len) == 0)
265 return 1;
266 return 0;
267}
268
269static int regex_match_middle(char *str, struct regex *r, int len)
270{
271 if (strstr(str, r->pattern))
272 return 1;
273 return 0;
274}
275
276static int regex_match_end(char *str, struct regex *r, int len)
277{
278 char *ptr = strstr(str, r->pattern);
279
280 if (ptr && (ptr[r->len] == 0))
281 return 1;
282 return 0;
283}
284
285/**
286 * filter_parse_regex - parse a basic regex
287 * @buff: the raw regex
288 * @len: length of the regex
289 * @search: will point to the beginning of the string to compare
290 * @not: tell whether the match will have to be inverted
291 *
292 * This passes in a buffer containing a regex and this function will
293 * set search to point to the search part of the buffer and
294 * return the type of search it is (see enum above).
295 * This does modify buff.
296 *
297 * Returns enum type.
298 * search returns the pointer to use for comparison.
299 * not returns 1 if buff started with a '!'
300 * 0 otherwise.
301 */
302enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
303{
304 int type = MATCH_FULL;
305 int i;
306
307 if (buff[0] == '!') {
308 *not = 1;
309 buff++;
310 len--;
311 } else
312 *not = 0;
313
314 *search = buff;
315
316 for (i = 0; i < len; i++) {
317 if (buff[i] == '*') {
318 if (!i) {
319 *search = buff + 1;
320 type = MATCH_END_ONLY;
321 } else {
322 if (type == MATCH_END_ONLY)
323 type = MATCH_MIDDLE_ONLY;
324 else
325 type = MATCH_FRONT_ONLY;
326 buff[i] = 0;
327 break;
328 }
329 }
330 }
331
332 return type;
333}
334
335static void filter_build_regex(struct filter_pred *pred)
336{
337 struct regex *r = &pred->regex;
338 char *search;
339 enum regex_type type = MATCH_FULL;
340 int not = 0;
341
342 if (pred->op == OP_GLOB) {
343 type = filter_parse_regex(r->pattern, r->len, &search, &not);
344 r->len = strlen(search);
345 memmove(r->pattern, search, r->len+1);
346 }
347
348 switch (type) {
349 case MATCH_FULL:
350 r->match = regex_match_full;
351 break;
352 case MATCH_FRONT_ONLY:
353 r->match = regex_match_front;
354 break;
355 case MATCH_MIDDLE_ONLY:
356 r->match = regex_match_middle;
357 break;
358 case MATCH_END_ONLY:
359 r->match = regex_match_end;
360 break;
361 }
362
363 pred->not ^= not;
364}
365
253/* return 1 if event matches, 0 otherwise (discard) */ 366/* return 1 if event matches, 0 otherwise (discard) */
254int filter_match_preds(struct ftrace_event_call *call, void *rec) 367int filter_match_preds(struct event_filter *filter, void *rec)
255{ 368{
256 struct event_filter *filter = call->filter;
257 int match, top = 0, val1 = 0, val2 = 0; 369 int match, top = 0, val1 = 0, val2 = 0;
258 int stack[MAX_FILTER_PRED]; 370 int stack[MAX_FILTER_PRED];
259 struct filter_pred *pred; 371 struct filter_pred *pred;
@@ -396,7 +508,7 @@ static void filter_clear_pred(struct filter_pred *pred)
396{ 508{
397 kfree(pred->field_name); 509 kfree(pred->field_name);
398 pred->field_name = NULL; 510 pred->field_name = NULL;
399 pred->str_len = 0; 511 pred->regex.len = 0;
400} 512}
401 513
402static int filter_set_pred(struct filter_pred *dest, 514static int filter_set_pred(struct filter_pred *dest,
@@ -426,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
426 filter->preds[i]->fn = filter_pred_none; 538 filter->preds[i]->fn = filter_pred_none;
427} 539}
428 540
429void destroy_preds(struct ftrace_event_call *call) 541static void __free_preds(struct event_filter *filter)
430{ 542{
431 struct event_filter *filter = call->filter;
432 int i; 543 int i;
433 544
434 if (!filter) 545 if (!filter)
@@ -441,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
441 kfree(filter->preds); 552 kfree(filter->preds);
442 kfree(filter->filter_string); 553 kfree(filter->filter_string);
443 kfree(filter); 554 kfree(filter);
555}
556
557void destroy_preds(struct ftrace_event_call *call)
558{
559 __free_preds(call->filter);
444 call->filter = NULL; 560 call->filter = NULL;
561 call->filter_active = 0;
445} 562}
446 563
447static int init_preds(struct ftrace_event_call *call) 564static struct event_filter *__alloc_preds(void)
448{ 565{
449 struct event_filter *filter; 566 struct event_filter *filter;
450 struct filter_pred *pred; 567 struct filter_pred *pred;
451 int i; 568 int i;
452 569
453 if (call->filter) 570 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
454 return 0; 571 if (!filter)
455 572 return ERR_PTR(-ENOMEM);
456 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
457 if (!call->filter)
458 return -ENOMEM;
459 573
460 filter->n_preds = 0; 574 filter->n_preds = 0;
461 575
@@ -471,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
471 filter->preds[i] = pred; 585 filter->preds[i] = pred;
472 } 586 }
473 587
474 return 0; 588 return filter;
475 589
476oom: 590oom:
477 destroy_preds(call); 591 __free_preds(filter);
592 return ERR_PTR(-ENOMEM);
593}
594
595static int init_preds(struct ftrace_event_call *call)
596{
597 if (call->filter)
598 return 0;
599
600 call->filter_active = 0;
601 call->filter = __alloc_preds();
602 if (IS_ERR(call->filter))
603 return PTR_ERR(call->filter);
478 604
479 return -ENOMEM; 605 return 0;
480} 606}
481 607
482static int init_subsystem_preds(struct event_subsystem *system) 608static int init_subsystem_preds(struct event_subsystem *system)
@@ -499,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
499 return 0; 625 return 0;
500} 626}
501 627
502enum { 628static void filter_free_subsystem_preds(struct event_subsystem *system)
503 FILTER_DISABLE_ALL,
504 FILTER_INIT_NO_RESET,
505 FILTER_SKIP_NO_RESET,
506};
507
508static void filter_free_subsystem_preds(struct event_subsystem *system,
509 int flag)
510{ 629{
511 struct ftrace_event_call *call; 630 struct ftrace_event_call *call;
512 631
@@ -517,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
517 if (strcmp(call->system, system->name) != 0) 636 if (strcmp(call->system, system->name) != 0)
518 continue; 637 continue;
519 638
520 if (flag == FILTER_INIT_NO_RESET) {
521 call->filter->no_reset = false;
522 continue;
523 }
524
525 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
526 continue;
527
528 filter_disable_preds(call); 639 filter_disable_preds(call);
529 remove_filter_string(call->filter); 640 remove_filter_string(call->filter);
530 } 641 }
@@ -532,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
532 643
533static int filter_add_pred_fn(struct filter_parse_state *ps, 644static int filter_add_pred_fn(struct filter_parse_state *ps,
534 struct ftrace_event_call *call, 645 struct ftrace_event_call *call,
646 struct event_filter *filter,
535 struct filter_pred *pred, 647 struct filter_pred *pred,
536 filter_pred_fn_t fn) 648 filter_pred_fn_t fn)
537{ 649{
538 struct event_filter *filter = call->filter;
539 int idx, err; 650 int idx, err;
540 651
541 if (filter->n_preds == MAX_FILTER_PRED) { 652 if (filter->n_preds == MAX_FILTER_PRED) {
@@ -550,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
550 return err; 661 return err;
551 662
552 filter->n_preds++; 663 filter->n_preds++;
553 call->filter_active = 1;
554 664
555 return 0; 665 return 0;
556} 666}
@@ -575,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field)
575 685
576static int is_legal_op(struct ftrace_event_field *field, int op) 686static int is_legal_op(struct ftrace_event_field *field, int op)
577{ 687{
578 if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) 688 if (is_string_field(field) &&
689 (op != OP_EQ && op != OP_NE && op != OP_GLOB))
690 return 0;
691 if (!is_string_field(field) && op == OP_GLOB)
579 return 0; 692 return 0;
580 693
581 return 1; 694 return 1;
@@ -626,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
626 739
627static int filter_add_pred(struct filter_parse_state *ps, 740static int filter_add_pred(struct filter_parse_state *ps,
628 struct ftrace_event_call *call, 741 struct ftrace_event_call *call,
742 struct event_filter *filter,
629 struct filter_pred *pred, 743 struct filter_pred *pred,
630 bool dry_run) 744 bool dry_run)
631{ 745{
@@ -660,21 +774,22 @@ static int filter_add_pred(struct filter_parse_state *ps,
660 } 774 }
661 775
662 if (is_string_field(field)) { 776 if (is_string_field(field)) {
663 pred->str_len = field->size; 777 filter_build_regex(pred);
664 778
665 if (field->filter_type == FILTER_STATIC_STRING) 779 if (field->filter_type == FILTER_STATIC_STRING) {
666 fn = filter_pred_string; 780 fn = filter_pred_string;
667 else if (field->filter_type == FILTER_DYN_STRING) 781 pred->regex.field_len = field->size;
782 } else if (field->filter_type == FILTER_DYN_STRING)
668 fn = filter_pred_strloc; 783 fn = filter_pred_strloc;
669 else { 784 else {
670 fn = filter_pred_pchar; 785 fn = filter_pred_pchar;
671 pred->str_len = strlen(pred->str_val); 786 pred->regex.field_len = strlen(pred->regex.pattern);
672 } 787 }
673 } else { 788 } else {
674 if (field->is_signed) 789 if (field->is_signed)
675 ret = strict_strtoll(pred->str_val, 0, &val); 790 ret = strict_strtoll(pred->regex.pattern, 0, &val);
676 else 791 else
677 ret = strict_strtoull(pred->str_val, 0, &val); 792 ret = strict_strtoull(pred->regex.pattern, 0, &val);
678 if (ret) { 793 if (ret) {
679 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 794 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
680 return -EINVAL; 795 return -EINVAL;
@@ -694,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
694 809
695add_pred_fn: 810add_pred_fn:
696 if (!dry_run) 811 if (!dry_run)
697 return filter_add_pred_fn(ps, call, pred, fn); 812 return filter_add_pred_fn(ps, call, filter, pred, fn);
698 return 0;
699}
700
701static int filter_add_subsystem_pred(struct filter_parse_state *ps,
702 struct event_subsystem *system,
703 struct filter_pred *pred,
704 char *filter_string,
705 bool dry_run)
706{
707 struct ftrace_event_call *call;
708 int err = 0;
709 bool fail = true;
710
711 list_for_each_entry(call, &ftrace_events, list) {
712
713 if (!call->define_fields)
714 continue;
715
716 if (strcmp(call->system, system->name))
717 continue;
718
719 if (call->filter->no_reset)
720 continue;
721
722 err = filter_add_pred(ps, call, pred, dry_run);
723 if (err)
724 call->filter->no_reset = true;
725 else
726 fail = false;
727
728 if (!dry_run)
729 replace_filter_string(call->filter, filter_string);
730 }
731
732 if (fail) {
733 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
734 return err;
735 }
736 return 0; 813 return 0;
737} 814}
738 815
@@ -933,8 +1010,9 @@ static void postfix_clear(struct filter_parse_state *ps)
933 1010
934 while (!list_empty(&ps->postfix)) { 1011 while (!list_empty(&ps->postfix)) {
935 elt = list_first_entry(&ps->postfix, struct postfix_elt, list); 1012 elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
936 kfree(elt->operand);
937 list_del(&elt->list); 1013 list_del(&elt->list);
1014 kfree(elt->operand);
1015 kfree(elt);
938 } 1016 }
939} 1017}
940 1018
@@ -1044,8 +1122,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
1044 return NULL; 1122 return NULL;
1045 } 1123 }
1046 1124
1047 strcpy(pred->str_val, operand2); 1125 strcpy(pred->regex.pattern, operand2);
1048 pred->str_len = strlen(operand2); 1126 pred->regex.len = strlen(pred->regex.pattern);
1049 1127
1050 pred->op = op; 1128 pred->op = op;
1051 1129
@@ -1089,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps)
1089 return 0; 1167 return 0;
1090} 1168}
1091 1169
1092static int replace_preds(struct event_subsystem *system, 1170static int replace_preds(struct ftrace_event_call *call,
1093 struct ftrace_event_call *call, 1171 struct event_filter *filter,
1094 struct filter_parse_state *ps, 1172 struct filter_parse_state *ps,
1095 char *filter_string, 1173 char *filter_string,
1096 bool dry_run) 1174 bool dry_run)
@@ -1137,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system,
1137add_pred: 1215add_pred:
1138 if (!pred) 1216 if (!pred)
1139 return -ENOMEM; 1217 return -ENOMEM;
1140 if (call) 1218 err = filter_add_pred(ps, call, filter, pred, dry_run);
1141 err = filter_add_pred(ps, call, pred, false);
1142 else
1143 err = filter_add_subsystem_pred(ps, system, pred,
1144 filter_string, dry_run);
1145 filter_free_pred(pred); 1219 filter_free_pred(pred);
1146 if (err) 1220 if (err)
1147 return err; 1221 return err;
@@ -1152,10 +1226,50 @@ add_pred:
1152 return 0; 1226 return 0;
1153} 1227}
1154 1228
1155int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1229static int replace_system_preds(struct event_subsystem *system,
1230 struct filter_parse_state *ps,
1231 char *filter_string)
1156{ 1232{
1233 struct ftrace_event_call *call;
1234 bool fail = true;
1157 int err; 1235 int err;
1158 1236
1237 list_for_each_entry(call, &ftrace_events, list) {
1238 struct event_filter *filter = call->filter;
1239
1240 if (!call->define_fields)
1241 continue;
1242
1243 if (strcmp(call->system, system->name) != 0)
1244 continue;
1245
1246 /* try to see if the filter can be applied */
1247 err = replace_preds(call, filter, ps, filter_string, true);
1248 if (err)
1249 continue;
1250
1251 /* really apply the filter */
1252 filter_disable_preds(call);
1253 err = replace_preds(call, filter, ps, filter_string, false);
1254 if (err)
1255 filter_disable_preds(call);
1256 else {
1257 call->filter_active = 1;
1258 replace_filter_string(filter, filter_string);
1259 }
1260 fail = false;
1261 }
1262
1263 if (fail) {
1264 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1265 return -EINVAL;
1266 }
1267 return 0;
1268}
1269
1270int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1271{
1272 int err;
1159 struct filter_parse_state *ps; 1273 struct filter_parse_state *ps;
1160 1274
1161 mutex_lock(&event_mutex); 1275 mutex_lock(&event_mutex);
@@ -1167,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1167 if (!strcmp(strstrip(filter_string), "0")) { 1281 if (!strcmp(strstrip(filter_string), "0")) {
1168 filter_disable_preds(call); 1282 filter_disable_preds(call);
1169 remove_filter_string(call->filter); 1283 remove_filter_string(call->filter);
1170 mutex_unlock(&event_mutex); 1284 goto out_unlock;
1171 return 0;
1172 } 1285 }
1173 1286
1174 err = -ENOMEM; 1287 err = -ENOMEM;
@@ -1186,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1186 goto out; 1299 goto out;
1187 } 1300 }
1188 1301
1189 err = replace_preds(NULL, call, ps, filter_string, false); 1302 err = replace_preds(call, call->filter, ps, filter_string, false);
1190 if (err) 1303 if (err)
1191 append_filter_err(ps, call->filter); 1304 append_filter_err(ps, call->filter);
1192 1305 else
1306 call->filter_active = 1;
1193out: 1307out:
1194 filter_opstack_clear(ps); 1308 filter_opstack_clear(ps);
1195 postfix_clear(ps); 1309 postfix_clear(ps);
@@ -1204,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1204 char *filter_string) 1318 char *filter_string)
1205{ 1319{
1206 int err; 1320 int err;
1207
1208 struct filter_parse_state *ps; 1321 struct filter_parse_state *ps;
1209 1322
1210 mutex_lock(&event_mutex); 1323 mutex_lock(&event_mutex);
@@ -1214,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1214 goto out_unlock; 1327 goto out_unlock;
1215 1328
1216 if (!strcmp(strstrip(filter_string), "0")) { 1329 if (!strcmp(strstrip(filter_string), "0")) {
1217 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); 1330 filter_free_subsystem_preds(system);
1218 remove_filter_string(system->filter); 1331 remove_filter_string(system->filter);
1219 mutex_unlock(&event_mutex); 1332 goto out_unlock;
1220 return 0;
1221 } 1333 }
1222 1334
1223 err = -ENOMEM; 1335 err = -ENOMEM;
@@ -1234,31 +1346,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1234 goto out; 1346 goto out;
1235 } 1347 }
1236 1348
1237 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); 1349 err = replace_system_preds(system, ps, filter_string);
1238 1350 if (err)
1239 /* try to see the filter can be applied to which events */
1240 err = replace_preds(system, NULL, ps, filter_string, true);
1241 if (err) {
1242 append_filter_err(ps, system->filter); 1351 append_filter_err(ps, system->filter);
1243 goto out; 1352
1353out:
1354 filter_opstack_clear(ps);
1355 postfix_clear(ps);
1356 kfree(ps);
1357out_unlock:
1358 mutex_unlock(&event_mutex);
1359
1360 return err;
1361}
1362
1363#ifdef CONFIG_EVENT_PROFILE
1364
1365void ftrace_profile_free_filter(struct perf_event *event)
1366{
1367 struct event_filter *filter = event->filter;
1368
1369 event->filter = NULL;
1370 __free_preds(filter);
1371}
1372
1373int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1374 char *filter_str)
1375{
1376 int err;
1377 struct event_filter *filter;
1378 struct filter_parse_state *ps;
1379 struct ftrace_event_call *call = NULL;
1380
1381 mutex_lock(&event_mutex);
1382
1383 list_for_each_entry(call, &ftrace_events, list) {
1384 if (call->id == event_id)
1385 break;
1244 } 1386 }
1245 1387
1246 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); 1388 err = -EINVAL;
1389 if (!call)
1390 goto out_unlock;
1247 1391
1248 /* really apply the filter to the events */ 1392 err = -EEXIST;
1249 err = replace_preds(system, NULL, ps, filter_string, false); 1393 if (event->filter)
1250 if (err) { 1394 goto out_unlock;
1251 append_filter_err(ps, system->filter); 1395
1252 filter_free_subsystem_preds(system, 2); 1396 filter = __alloc_preds();
1397 if (IS_ERR(filter)) {
1398 err = PTR_ERR(filter);
1399 goto out_unlock;
1253 } 1400 }
1254 1401
1255out: 1402 err = -ENOMEM;
1403 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1404 if (!ps)
1405 goto free_preds;
1406
1407 parse_init(ps, filter_ops, filter_str);
1408 err = filter_parse(ps);
1409 if (err)
1410 goto free_ps;
1411
1412 err = replace_preds(call, filter, ps, filter_str, false);
1413 if (!err)
1414 event->filter = filter;
1415
1416free_ps:
1256 filter_opstack_clear(ps); 1417 filter_opstack_clear(ps);
1257 postfix_clear(ps); 1418 postfix_clear(ps);
1258 kfree(ps); 1419 kfree(ps);
1420
1421free_preds:
1422 if (err)
1423 __free_preds(filter);
1424
1259out_unlock: 1425out_unlock:
1260 mutex_unlock(&event_mutex); 1426 mutex_unlock(&event_mutex);
1261 1427
1262 return err; 1428 return err;
1263} 1429}
1264 1430
1431#endif /* CONFIG_EVENT_PROFILE */
1432
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 9753fcc61bc5..d4fa5dc1ee4e 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -48,11 +48,11 @@
48struct ____ftrace_##name { \ 48struct ____ftrace_##name { \
49 tstruct \ 49 tstruct \
50}; \ 50}; \
51static void __used ____ftrace_check_##name(void) \ 51static void __always_unused ____ftrace_check_##name(void) \
52{ \ 52{ \
53 struct ____ftrace_##name *__entry = NULL; \ 53 struct ____ftrace_##name *__entry = NULL; \
54 \ 54 \
55 /* force cmpile-time check on F_printk() */ \ 55 /* force compile-time check on F_printk() */ \
56 printk(print); \ 56 printk(print); \
57} 57}
58 58
@@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void) \
66#undef __field 66#undef __field
67#define __field(type, item) \ 67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\n", \ 69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \ 70 offsetof(typeof(field), item), \
71 sizeof(field.item)); \ 71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \ 72 if (!ret) \
73 return 0; 73 return 0;
74 74
75#undef __field_desc 75#undef __field_desc
76#define __field_desc(type, container, item) \ 76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\n", \ 78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \ 79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item)); \ 80 sizeof(field.container.item), \
81 is_signed_type(type)); \
81 if (!ret) \ 82 if (!ret) \
82 return 0; 83 return 0;
83 84
84#undef __array 85#undef __array
85#define __array(type, item, len) \ 86#define __array(type, item, len) \
86 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
87 "offset:%zu;\tsize:%zu;\n", \ 88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
88 offsetof(typeof(field), item), \ 89 offsetof(typeof(field), item), \
89 sizeof(field.item)); \ 90 sizeof(field.item), is_signed_type(type)); \
90 if (!ret) \ 91 if (!ret) \
91 return 0; 92 return 0;
92 93
93#undef __array_desc 94#undef __array_desc
94#define __array_desc(type, container, item, len) \ 95#define __array_desc(type, container, item, len) \
95 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
96 "offset:%zu;\tsize:%zu;\n", \ 97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
97 offsetof(typeof(field), container.item), \ 98 offsetof(typeof(field), container.item), \
98 sizeof(field.container.item)); \ 99 sizeof(field.container.item), \
100 is_signed_type(type)); \
99 if (!ret) \ 101 if (!ret) \
100 return 0; 102 return 0;
101 103
102#undef __dynamic_array 104#undef __dynamic_array
103#define __dynamic_array(type, item) \ 105#define __dynamic_array(type, item) \
104 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
105 "offset:%zu;\tsize:0;\n", \ 107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
106 offsetof(typeof(field), item)); \ 108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
107 if (!ret) \ 110 if (!ret) \
108 return 0; 111 return 0;
109 112
@@ -131,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
131 134
132#include "trace_entries.h" 135#include "trace_entries.h"
133 136
134
135#undef __field 137#undef __field
136#define __field(type, item) \ 138#define __field(type, item) \
137 ret = trace_define_field(event_call, #type, #item, \ 139 ret = trace_define_field(event_call, #type, #item, \
@@ -156,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
156 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 158 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
157 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 159 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
158 offsetof(typeof(field), item), \ 160 offsetof(typeof(field), item), \
159 sizeof(field.item), 0, FILTER_OTHER); \ 161 sizeof(field.item), \
162 is_signed_type(type), FILTER_OTHER); \
160 if (ret) \ 163 if (ret) \
161 return ret; 164 return ret;
162 165
@@ -166,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
166 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 169 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
167 offsetof(typeof(field), \ 170 offsetof(typeof(field), \
168 container.item), \ 171 container.item), \
169 sizeof(field.container.item), 0, \ 172 sizeof(field.container.item), \
170 FILTER_OTHER); \ 173 is_signed_type(type), FILTER_OTHER); \
171 if (ret) \ 174 if (ret) \
172 return ret; 175 return ret;
173 176
@@ -182,10 +185,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
182 struct struct_name field; \ 185 struct struct_name field; \
183 int ret; \ 186 int ret; \
184 \ 187 \
185 ret = trace_define_common_fields(event_call); \
186 if (ret) \
187 return ret; \
188 \
189 tstruct; \ 188 tstruct; \
190 \ 189 \
191 return ret; \ 190 return ret; \
@@ -193,6 +192,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
193 192
194#include "trace_entries.h" 193#include "trace_entries.h"
195 194
195static int ftrace_raw_init_event(struct ftrace_event_call *call)
196{
197 INIT_LIST_HEAD(&call->fields);
198 return 0;
199}
196 200
197#undef __field 201#undef __field
198#define __field(type, item) 202#define __field(type, item)
@@ -211,7 +215,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
211 215
212#undef FTRACE_ENTRY 216#undef FTRACE_ENTRY
213#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 217#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
214static int ftrace_raw_init_event_##call(void); \
215 \ 218 \
216struct ftrace_event_call __used \ 219struct ftrace_event_call __used \
217__attribute__((__aligned__(4))) \ 220__attribute__((__aligned__(4))) \
@@ -219,14 +222,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
219 .name = #call, \ 222 .name = #call, \
220 .id = type, \ 223 .id = type, \
221 .system = __stringify(TRACE_SYSTEM), \ 224 .system = __stringify(TRACE_SYSTEM), \
222 .raw_init = ftrace_raw_init_event_##call, \ 225 .raw_init = ftrace_raw_init_event, \
223 .show_format = ftrace_format_##call, \ 226 .show_format = ftrace_format_##call, \
224 .define_fields = ftrace_define_fields_##call, \ 227 .define_fields = ftrace_define_fields_##call, \
225}; \ 228}; \
226static int ftrace_raw_init_event_##call(void) \
227{ \
228 INIT_LIST_HEAD(&event_##call.fields); \
229 return 0; \
230} \
231 229
232#include "trace_entries.h" 230#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8614e3241ff8..9d976f3249a3 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h" 15#include "trace_output.h"
16 16
17struct fgraph_data { 17struct fgraph_cpu_data {
18 pid_t last_pid; 18 pid_t last_pid;
19 int depth; 19 int depth;
20 int ignore;
21};
22
23struct fgraph_data {
24 struct fgraph_cpu_data *cpu_data;
25
26 /* Place to preserve last processed entry. */
27 struct ftrace_graph_ent_entry ent;
28 struct ftrace_graph_ret_entry ret;
29 int failed;
30 int cpu;
20}; 31};
21 32
22#define TRACE_GRAPH_INDENT 2 33#define TRACE_GRAPH_INDENT 2
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
384 if (!data) 395 if (!data)
385 return TRACE_TYPE_HANDLED; 396 return TRACE_TYPE_HANDLED;
386 397
387 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 398 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
388 399
389 if (*last_pid == pid) 400 if (*last_pid == pid)
390 return TRACE_TYPE_HANDLED; 401 return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
435get_return_for_leaf(struct trace_iterator *iter, 446get_return_for_leaf(struct trace_iterator *iter,
436 struct ftrace_graph_ent_entry *curr) 447 struct ftrace_graph_ent_entry *curr)
437{ 448{
438 struct ring_buffer_iter *ring_iter; 449 struct fgraph_data *data = iter->private;
450 struct ring_buffer_iter *ring_iter = NULL;
439 struct ring_buffer_event *event; 451 struct ring_buffer_event *event;
440 struct ftrace_graph_ret_entry *next; 452 struct ftrace_graph_ret_entry *next;
441 453
442 ring_iter = iter->buffer_iter[iter->cpu]; 454 /*
455 * If the previous output failed to write to the seq buffer,
456 * then we just reuse the data from before.
457 */
458 if (data && data->failed) {
459 curr = &data->ent;
460 next = &data->ret;
461 } else {
443 462
444 /* First peek to compare current entry and the next one */ 463 ring_iter = iter->buffer_iter[iter->cpu];
445 if (ring_iter) 464
446 event = ring_buffer_iter_peek(ring_iter, NULL); 465 /* First peek to compare current entry and the next one */
447 else { 466 if (ring_iter)
448 /* We need to consume the current entry to see the next one */ 467 event = ring_buffer_iter_peek(ring_iter, NULL);
449 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 468 else {
450 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 469 /*
451 NULL); 470 * We need to consume the current entry to see
452 } 471 * the next one.
472 */
473 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
474 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
475 NULL);
476 }
453 477
454 if (!event) 478 if (!event)
455 return NULL; 479 return NULL;
480
481 next = ring_buffer_event_data(event);
456 482
457 next = ring_buffer_event_data(event); 483 if (data) {
484 /*
485 * Save current and next entries for later reference
486 * if the output fails.
487 */
488 data->ent = *curr;
489 data->ret = *next;
490 }
491 }
458 492
459 if (next->ent.type != TRACE_GRAPH_RET) 493 if (next->ent.type != TRACE_GRAPH_RET)
460 return NULL; 494 return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
640 674
641 if (data) { 675 if (data) {
642 int cpu = iter->cpu; 676 int cpu = iter->cpu;
643 int *depth = &(per_cpu_ptr(data, cpu)->depth); 677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
644 678
645 /* 679 /*
646 * Comments display at + 1 to depth. Since 680 * Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
688 722
689 if (data) { 723 if (data) {
690 int cpu = iter->cpu; 724 int cpu = iter->cpu;
691 int *depth = &(per_cpu_ptr(data, cpu)->depth); 725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
692 726
693 *depth = call->depth; 727 *depth = call->depth;
694 } 728 }
@@ -782,19 +816,34 @@ static enum print_line_t
782print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 816print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
783 struct trace_iterator *iter) 817 struct trace_iterator *iter)
784{ 818{
785 int cpu = iter->cpu; 819 struct fgraph_data *data = iter->private;
786 struct ftrace_graph_ent *call = &field->graph_ent; 820 struct ftrace_graph_ent *call = &field->graph_ent;
787 struct ftrace_graph_ret_entry *leaf_ret; 821 struct ftrace_graph_ret_entry *leaf_ret;
822 static enum print_line_t ret;
823 int cpu = iter->cpu;
788 824
789 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 825 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
790 return TRACE_TYPE_PARTIAL_LINE; 826 return TRACE_TYPE_PARTIAL_LINE;
791 827
792 leaf_ret = get_return_for_leaf(iter, field); 828 leaf_ret = get_return_for_leaf(iter, field);
793 if (leaf_ret) 829 if (leaf_ret)
794 return print_graph_entry_leaf(iter, field, leaf_ret, s); 830 ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
795 else 831 else
796 return print_graph_entry_nested(iter, field, s, cpu); 832 ret = print_graph_entry_nested(iter, field, s, cpu);
797 833
834 if (data) {
835 /*
836 * If we failed to write our output, then we need to make
837 * note of it. Because we already consumed our entry.
838 */
839 if (s->full) {
840 data->failed = 1;
841 data->cpu = cpu;
842 } else
843 data->failed = 0;
844 }
845
846 return ret;
798} 847}
799 848
800static enum print_line_t 849static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
810 859
811 if (data) { 860 if (data) {
812 int cpu = iter->cpu; 861 int cpu = iter->cpu;
813 int *depth = &(per_cpu_ptr(data, cpu)->depth); 862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
814 863
815 /* 864 /*
816 * Comments display at + 1 to depth. This is the 865 * Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
873 int i; 922 int i;
874 923
875 if (data) 924 if (data)
876 depth = per_cpu_ptr(data, iter->cpu)->depth; 925 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
877 926
878 if (print_graph_prologue(iter, s, 0, 0)) 927 if (print_graph_prologue(iter, s, 0, 0))
879 return TRACE_TYPE_PARTIAL_LINE; 928 return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
941enum print_line_t 990enum print_line_t
942print_graph_function(struct trace_iterator *iter) 991print_graph_function(struct trace_iterator *iter)
943{ 992{
993 struct ftrace_graph_ent_entry *field;
994 struct fgraph_data *data = iter->private;
944 struct trace_entry *entry = iter->ent; 995 struct trace_entry *entry = iter->ent;
945 struct trace_seq *s = &iter->seq; 996 struct trace_seq *s = &iter->seq;
997 int cpu = iter->cpu;
998 int ret;
999
1000 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1001 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1002 return TRACE_TYPE_HANDLED;
1003 }
1004
1005 /*
1006 * If the last output failed, there's a possibility we need
1007 * to print out the missing entry which would never go out.
1008 */
1009 if (data && data->failed) {
1010 field = &data->ent;
1011 iter->cpu = data->cpu;
1012 ret = print_graph_entry(field, s, iter);
1013 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1014 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1015 ret = TRACE_TYPE_NO_CONSUME;
1016 }
1017 iter->cpu = cpu;
1018 return ret;
1019 }
946 1020
947 switch (entry->type) { 1021 switch (entry->type) {
948 case TRACE_GRAPH_ENT: { 1022 case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
952 * sizeof(struct ftrace_graph_ent_entry) is very small, 1026 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack. 1027 * it can be safely saved at the stack.
954 */ 1028 */
955 struct ftrace_graph_ent_entry *field, saved; 1029 struct ftrace_graph_ent_entry saved;
956 trace_assign_type(field, entry); 1030 trace_assign_type(field, entry);
957 saved = *field; 1031 saved = *field;
958 return print_graph_entry(&saved, s, iter); 1032 return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
1030static void graph_trace_open(struct trace_iterator *iter) 1104static void graph_trace_open(struct trace_iterator *iter)
1031{ 1105{
1032 /* pid and depth on the last trace processed */ 1106 /* pid and depth on the last trace processed */
1033 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1107 struct fgraph_data *data;
1034 int cpu; 1108 int cpu;
1035 1109
1110 iter->private = NULL;
1111
1112 data = kzalloc(sizeof(*data), GFP_KERNEL);
1036 if (!data) 1113 if (!data)
1037 pr_warning("function graph tracer: not enough memory\n"); 1114 goto out_err;
1038 else 1115
1039 for_each_possible_cpu(cpu) { 1116 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
1040 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1117 if (!data->cpu_data)
1041 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1118 goto out_err_free;
1042 *pid = -1; 1119
1043 *depth = 0; 1120 for_each_possible_cpu(cpu) {
1044 } 1121 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1122 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1123 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1124 *pid = -1;
1125 *depth = 0;
1126 *ignore = 0;
1127 }
1045 1128
1046 iter->private = data; 1129 iter->private = data;
1130
1131 return;
1132
1133 out_err_free:
1134 kfree(data);
1135 out_err:
1136 pr_warning("function graph tracer: not enough memory\n");
1047} 1137}
1048 1138
1049static void graph_trace_close(struct trace_iterator *iter) 1139static void graph_trace_close(struct trace_iterator *iter)
1050{ 1140{
1051 free_percpu(iter->private); 1141 struct fgraph_data *data = iter->private;
1142
1143 if (data) {
1144 free_percpu(data->cpu_data);
1145 kfree(data);
1146 }
1052} 1147}
1053 1148
1054static struct tracer graph_trace __read_mostly = { 1149static struct tracer graph_trace __read_mostly = {
1055 .name = "function_graph", 1150 .name = "function_graph",
1056 .open = graph_trace_open, 1151 .open = graph_trace_open,
1152 .pipe_open = graph_trace_open,
1057 .close = graph_trace_close, 1153 .close = graph_trace_close,
1154 .pipe_close = graph_trace_close,
1058 .wait_pipe = poll_wait_pipe, 1155 .wait_pipe = poll_wait_pipe,
1059 .init = graph_trace_init, 1156 .init = graph_trace_init,
1060 .reset = graph_trace_reset, 1157 .reset = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index adaf7a39d0dc..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -166,6 +166,7 @@ void trace_hw_branch(u64 from, u64 to)
166 struct ftrace_event_call *call = &event_hw_branch; 166 struct ftrace_event_call *call = &event_hw_branch;
167 struct trace_array *tr = hw_branch_trace; 167 struct trace_array *tr = hw_branch_trace;
168 struct ring_buffer_event *event; 168 struct ring_buffer_event *event;
169 struct ring_buffer *buf;
169 struct hw_branch_entry *entry; 170 struct hw_branch_entry *entry;
170 unsigned long irq1; 171 unsigned long irq1;
171 int cpu; 172 int cpu;
@@ -181,7 +182,8 @@ void trace_hw_branch(u64 from, u64 to)
181 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) 182 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
182 goto out; 183 goto out;
183 184
184 event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES, 185 buf = tr->buffer;
186 event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
185 sizeof(*entry), 0, 0); 187 sizeof(*entry), 0, 0);
186 if (!event) 188 if (!event)
187 goto out; 189 goto out;
@@ -190,8 +192,8 @@ void trace_hw_branch(u64 from, u64 to)
190 entry->ent.type = TRACE_HW_BRANCHES; 192 entry->ent.type = TRACE_HW_BRANCHES;
191 entry->from = from; 193 entry->from = from;
192 entry->to = to; 194 entry->to = to;
193 if (!filter_check_discard(call, entry, tr->buffer, event)) 195 if (!filter_check_discard(call, entry, buf, event))
194 trace_buffer_unlock_commit(tr, event, 0, 0); 196 trace_buffer_unlock_commit(buf, event, 0, 0);
195 197
196 out: 198 out:
197 atomic_dec(&tr->data[cpu]->disabled); 199 atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
151 goto out_unlock; 151 goto out_unlock;
152 152
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc);
154 156
155 if (data->critical_sequence != max_sequence) 157 if (data->critical_sequence != max_sequence)
156 goto out_unlock; 158 goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..6ea90c0e2c96
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1553 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy)
101{
102 return regs_return_value(regs);
103}
104
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
106 void *dummy)
107{
108 return kernel_stack_pointer(regs);
109}
110
111/* Memory fetching by symbol */
112struct symbol_cache {
113 char *symbol;
114 long offset;
115 unsigned long addr;
116};
117
118static unsigned long update_symbol_cache(struct symbol_cache *sc)
119{
120 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
121 if (sc->addr)
122 sc->addr += sc->offset;
123 return sc->addr;
124}
125
126static void free_symbol_cache(struct symbol_cache *sc)
127{
128 kfree(sc->symbol);
129 kfree(sc);
130}
131
132static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
133{
134 struct symbol_cache *sc;
135
136 if (!sym || strlen(sym) == 0)
137 return NULL;
138 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
139 if (!sc)
140 return NULL;
141
142 sc->symbol = kstrdup(sym, GFP_KERNEL);
143 if (!sc->symbol) {
144 kfree(sc);
145 return NULL;
146 }
147 sc->offset = offset;
148
149 update_symbol_cache(sc);
150 return sc;
151}
152
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
154{
155 struct symbol_cache *sc = data;
156
157 if (sc->addr)
158 return fetch_memory(regs, (void *)sc->addr);
159 else
160 return 0;
161}
162
163/* Special indirect memory access interface */
164struct indirect_fetch_data {
165 struct fetch_func orig;
166 long offset;
167};
168
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
170{
171 struct indirect_fetch_data *ind = data;
172 unsigned long addr;
173
174 addr = call_fetch(&ind->orig, regs);
175 if (addr) {
176 addr += ind->offset;
177 return fetch_memory(regs, (void *)addr);
178 } else
179 return 0;
180}
181
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
183{
184 if (data->orig.func == fetch_indirect)
185 free_indirect_fetch_data(data->orig.data);
186 else if (data->orig.func == fetch_symbol)
187 free_symbol_cache(data->orig.data);
188 kfree(data);
189}
190
191/**
192 * Kprobe event core functions
193 */
194
195struct probe_arg {
196 struct fetch_func fetch;
197 const char *name;
198};
199
200/* Flags for trace_probe */
201#define TP_FLAG_TRACE 1
202#define TP_FLAG_PROFILE 2
203
204struct trace_probe {
205 struct list_head list;
206 struct kretprobe rp; /* Use rp.kp for kprobe use */
207 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */
210 struct ftrace_event_call call;
211 struct trace_event event;
212 unsigned int nr_args;
213 struct probe_arg args[];
214};
215
216#define SIZEOF_TRACE_PROBE(n) \
217 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n)))
219
220static __kprobes int probe_is_return(struct trace_probe *tp)
221{
222 return tp->rp.handler != NULL;
223}
224
225static __kprobes const char *probe_symbol(struct trace_probe *tp)
226{
227 return tp->symbol ? tp->symbol : "unknown";
228}
229
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 if (sc->offset)
247 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
248 sc->offset);
249 else
250 ret = snprintf(buf, n, "@%s", sc->symbol);
251 } else if (ff->func == fetch_retvalue)
252 ret = snprintf(buf, n, "$retval");
253 else if (ff->func == fetch_stack_address)
254 ret = snprintf(buf, n, "$stack");
255 else if (ff->func == fetch_indirect) {
256 struct indirect_fetch_data *id = ff->data;
257 size_t l = 0;
258 ret = snprintf(buf, n, "%+ld(", id->offset);
259 if (ret >= n)
260 goto end;
261 l += ret;
262 ret = probe_arg_string(buf + l, n - l, &id->orig);
263 if (ret < 0)
264 goto end;
265 l += ret;
266 ret = snprintf(buf + l, n - l, ")");
267 ret += l;
268 }
269end:
270 if (ret >= n)
271 return -ENOSPC;
272 return ret;
273}
274
275static int register_probe_event(struct trace_probe *tp);
276static void unregister_probe_event(struct trace_probe *tp);
277
278static DEFINE_MUTEX(probe_lock);
279static LIST_HEAD(probe_list);
280
281static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
282static int kretprobe_dispatcher(struct kretprobe_instance *ri,
283 struct pt_regs *regs);
284
285/* Check the name is good for event/group */
286static int check_event_name(const char *name)
287{
288 if (!isalpha(*name) && *name != '_')
289 return 0;
290 while (*++name != '\0') {
291 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
292 return 0;
293 }
294 return 1;
295}
296
297/*
298 * Allocate new trace_probe and initialize it (including kprobes).
299 */
300static struct trace_probe *alloc_trace_probe(const char *group,
301 const char *event,
302 void *addr,
303 const char *symbol,
304 unsigned long offs,
305 int nargs, int is_return)
306{
307 struct trace_probe *tp;
308 int ret = -ENOMEM;
309
310 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
311 if (!tp)
312 return ERR_PTR(ret);
313
314 if (symbol) {
315 tp->symbol = kstrdup(symbol, GFP_KERNEL);
316 if (!tp->symbol)
317 goto error;
318 tp->rp.kp.symbol_name = tp->symbol;
319 tp->rp.kp.offset = offs;
320 } else
321 tp->rp.kp.addr = addr;
322
323 if (is_return)
324 tp->rp.handler = kretprobe_dispatcher;
325 else
326 tp->rp.kp.pre_handler = kprobe_dispatcher;
327
328 if (!event || !check_event_name(event)) {
329 ret = -EINVAL;
330 goto error;
331 }
332
333 tp->call.name = kstrdup(event, GFP_KERNEL);
334 if (!tp->call.name)
335 goto error;
336
337 if (!group || !check_event_name(group)) {
338 ret = -EINVAL;
339 goto error;
340 }
341
342 tp->call.system = kstrdup(group, GFP_KERNEL);
343 if (!tp->call.system)
344 goto error;
345
346 INIT_LIST_HEAD(&tp->list);
347 return tp;
348error:
349 kfree(tp->call.name);
350 kfree(tp->symbol);
351 kfree(tp);
352 return ERR_PTR(ret);
353}
354
355static void free_probe_arg(struct probe_arg *arg)
356{
357 if (arg->fetch.func == fetch_symbol)
358 free_symbol_cache(arg->fetch.data);
359 else if (arg->fetch.func == fetch_indirect)
360 free_indirect_fetch_data(arg->fetch.data);
361 kfree(arg->name);
362}
363
364static void free_trace_probe(struct trace_probe *tp)
365{
366 int i;
367
368 for (i = 0; i < tp->nr_args; i++)
369 free_probe_arg(&tp->args[i]);
370
371 kfree(tp->call.system);
372 kfree(tp->call.name);
373 kfree(tp->symbol);
374 kfree(tp);
375}
376
377static struct trace_probe *find_probe_event(const char *event,
378 const char *group)
379{
380 struct trace_probe *tp;
381
382 list_for_each_entry(tp, &probe_list, list)
383 if (strcmp(tp->call.name, event) == 0 &&
384 strcmp(tp->call.system, group) == 0)
385 return tp;
386 return NULL;
387}
388
389/* Unregister a trace_probe and probe_event: call with locking probe_lock */
390static void unregister_trace_probe(struct trace_probe *tp)
391{
392 if (probe_is_return(tp))
393 unregister_kretprobe(&tp->rp);
394 else
395 unregister_kprobe(&tp->rp.kp);
396 list_del(&tp->list);
397 unregister_probe_event(tp);
398}
399
400/* Register a trace_probe and probe_event */
401static int register_trace_probe(struct trace_probe *tp)
402{
403 struct trace_probe *old_tp;
404 int ret;
405
406 mutex_lock(&probe_lock);
407
408 /* register as an event */
409 old_tp = find_probe_event(tp->call.name, tp->call.system);
410 if (old_tp) {
411 /* delete old event */
412 unregister_trace_probe(old_tp);
413 free_trace_probe(old_tp);
414 }
415 ret = register_probe_event(tp);
416 if (ret) {
417 pr_warning("Faild to register probe event(%d)\n", ret);
418 goto end;
419 }
420
421 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
422 if (probe_is_return(tp))
423 ret = register_kretprobe(&tp->rp);
424 else
425 ret = register_kprobe(&tp->rp.kp);
426
427 if (ret) {
428 pr_warning("Could not insert probe(%d)\n", ret);
429 if (ret == -EILSEQ) {
430 pr_warning("Probing address(0x%p) is not an "
431 "instruction boundary.\n",
432 tp->rp.kp.addr);
433 ret = -EINVAL;
434 }
435 unregister_probe_event(tp);
436 } else
437 list_add_tail(&tp->list, &probe_list);
438end:
439 mutex_unlock(&probe_lock);
440 return ret;
441}
442
443/* Split symbol and offset. */
444static int split_symbol_offset(char *symbol, unsigned long *offset)
445{
446 char *tmp;
447 int ret;
448
449 if (!offset)
450 return -EINVAL;
451
452 tmp = strchr(symbol, '+');
453 if (tmp) {
454 /* skip sign because strict_strtol doesn't accept '+' */
455 ret = strict_strtoul(tmp + 1, 0, offset);
456 if (ret)
457 return ret;
458 *tmp = '\0';
459 } else
460 *offset = 0;
461 return 0;
462}
463
464#define PARAM_MAX_ARGS 16
465#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
466
467static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
468{
469 int ret = 0;
470 unsigned long param;
471
472 if (strcmp(arg, "retval") == 0) {
473 if (is_return) {
474 ff->func = fetch_retvalue;
475 ff->data = NULL;
476 } else
477 ret = -EINVAL;
478 } else if (strncmp(arg, "stack", 5) == 0) {
479 if (arg[5] == '\0') {
480 ff->func = fetch_stack_address;
481 ff->data = NULL;
482 } else if (isdigit(arg[5])) {
483 ret = strict_strtoul(arg + 5, 10, &param);
484 if (ret || param > PARAM_MAX_STACK)
485 ret = -EINVAL;
486 else {
487 ff->func = fetch_stack;
488 ff->data = (void *)param;
489 }
490 } else
491 ret = -EINVAL;
492 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
493 ret = strict_strtoul(arg + 3, 10, &param);
494 if (ret || param > PARAM_MAX_ARGS)
495 ret = -EINVAL;
496 else {
497 ff->func = fetch_argument;
498 ff->data = (void *)param;
499 }
500 } else
501 ret = -EINVAL;
502 return ret;
503}
504
505/* Recursive argument parser */
506static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
507{
508 int ret = 0;
509 unsigned long param;
510 long offset;
511 char *tmp;
512
513 switch (arg[0]) {
514 case '$':
515 ret = parse_probe_vars(arg + 1, ff, is_return);
516 break;
517 case '%': /* named register */
518 ret = regs_query_register_offset(arg + 1);
519 if (ret >= 0) {
520 ff->func = fetch_register;
521 ff->data = (void *)(unsigned long)ret;
522 ret = 0;
523 }
524 break;
525 case '@': /* memory or symbol */
526 if (isdigit(arg[1])) {
527 ret = strict_strtoul(arg + 1, 0, &param);
528 if (ret)
529 break;
530 ff->func = fetch_memory;
531 ff->data = (void *)param;
532 } else {
533 ret = split_symbol_offset(arg + 1, &offset);
534 if (ret)
535 break;
536 ff->data = alloc_symbol_cache(arg + 1, offset);
537 if (ff->data)
538 ff->func = fetch_symbol;
539 else
540 ret = -EINVAL;
541 }
542 break;
543 case '+': /* indirect memory */
544 case '-':
545 tmp = strchr(arg, '(');
546 if (!tmp) {
547 ret = -EINVAL;
548 break;
549 }
550 *tmp = '\0';
551 ret = strict_strtol(arg + 1, 0, &offset);
552 if (ret)
553 break;
554 if (arg[0] == '-')
555 offset = -offset;
556 arg = tmp + 1;
557 tmp = strrchr(arg, ')');
558 if (tmp) {
559 struct indirect_fetch_data *id;
560 *tmp = '\0';
561 id = kzalloc(sizeof(struct indirect_fetch_data),
562 GFP_KERNEL);
563 if (!id)
564 return -ENOMEM;
565 id->offset = offset;
566 ret = __parse_probe_arg(arg, &id->orig, is_return);
567 if (ret)
568 kfree(id);
569 else {
570 ff->func = fetch_indirect;
571 ff->data = (void *)id;
572 }
573 } else
574 ret = -EINVAL;
575 break;
576 default:
577 /* TODO: support custom handler */
578 ret = -EINVAL;
579 }
580 return ret;
581}
582
583/* String length checking wrapper */
584static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
585{
586 if (strlen(arg) > MAX_ARGSTR_LEN) {
587 pr_info("Argument is too long.: %s\n", arg);
588 return -ENOSPC;
589 }
590 return __parse_probe_arg(arg, ff, is_return);
591}
592
593/* Return 1 if name is reserved or already used by another argument */
594static int conflict_field_name(const char *name,
595 struct probe_arg *args, int narg)
596{
597 int i;
598 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
599 if (strcmp(reserved_field_names[i], name) == 0)
600 return 1;
601 for (i = 0; i < narg; i++)
602 if (strcmp(args[i].name, name) == 0)
603 return 1;
604 return 0;
605}
606
607static int create_trace_probe(int argc, char **argv)
608{
609 /*
610 * Argument syntax:
611 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
612 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
613 * Fetch args:
614 * $argN : fetch Nth of function argument. (N:0-)
615 * $retval : fetch return value
616 * $stack : fetch stack address
617 * $stackN : fetch Nth of stack (N:0-)
618 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
619 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
620 * %REG : fetch register REG
621 * Indirect memory fetch:
622 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
623 * Alias name of args:
624 * NAME=FETCHARG : set NAME as alias of FETCHARG.
625 */
626 struct trace_probe *tp;
627 int i, ret = 0;
628 int is_return = 0, is_delete = 0;
629 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
630 unsigned long offset = 0;
631 void *addr = NULL;
632 char buf[MAX_EVENT_NAME_LEN];
633
634 /* argc must be >= 1 */
635 if (argv[0][0] == 'p')
636 is_return = 0;
637 else if (argv[0][0] == 'r')
638 is_return = 1;
639 else if (argv[0][0] == '-')
640 is_delete = 1;
641 else {
642 pr_info("Probe definition must be started with 'p', 'r' or"
643 " '-'.\n");
644 return -EINVAL;
645 }
646
647 if (argv[0][1] == ':') {
648 event = &argv[0][2];
649 if (strchr(event, '/')) {
650 group = event;
651 event = strchr(group, '/') + 1;
652 event[-1] = '\0';
653 if (strlen(group) == 0) {
654 pr_info("Group name is not specifiled\n");
655 return -EINVAL;
656 }
657 }
658 if (strlen(event) == 0) {
659 pr_info("Event name is not specifiled\n");
660 return -EINVAL;
661 }
662 }
663 if (!group)
664 group = KPROBE_EVENT_SYSTEM;
665
666 if (is_delete) {
667 if (!event) {
668 pr_info("Delete command needs an event name.\n");
669 return -EINVAL;
670 }
671 tp = find_probe_event(event, group);
672 if (!tp) {
673 pr_info("Event %s/%s doesn't exist.\n", group, event);
674 return -ENOENT;
675 }
676 /* delete an event */
677 unregister_trace_probe(tp);
678 free_trace_probe(tp);
679 return 0;
680 }
681
682 if (argc < 2) {
683 pr_info("Probe point is not specified.\n");
684 return -EINVAL;
685 }
686 if (isdigit(argv[1][0])) {
687 if (is_return) {
688 pr_info("Return probe point must be a symbol.\n");
689 return -EINVAL;
690 }
691 /* an address specified */
692 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
693 if (ret) {
694 pr_info("Failed to parse address.\n");
695 return ret;
696 }
697 } else {
698 /* a symbol specified */
699 symbol = argv[1];
700 /* TODO: support .init module functions */
701 ret = split_symbol_offset(symbol, &offset);
702 if (ret) {
703 pr_info("Failed to parse symbol.\n");
704 return ret;
705 }
706 if (offset && is_return) {
707 pr_info("Return probe must be used without offset.\n");
708 return -EINVAL;
709 }
710 }
711 argc -= 2; argv += 2;
712
713 /* setup a probe */
714 if (!event) {
715 /* Make a new event name */
716 if (symbol)
717 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
718 is_return ? 'r' : 'p', symbol, offset);
719 else
720 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
721 is_return ? 'r' : 'p', addr);
722 event = buf;
723 }
724 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
725 is_return);
726 if (IS_ERR(tp)) {
727 pr_info("Failed to allocate trace_probe.(%d)\n",
728 (int)PTR_ERR(tp));
729 return PTR_ERR(tp);
730 }
731
732 /* parse arguments */
733 ret = 0;
734 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
735 /* Parse argument name */
736 arg = strchr(argv[i], '=');
737 if (arg)
738 *arg++ = '\0';
739 else
740 arg = argv[i];
741
742 if (conflict_field_name(argv[i], tp->args, i)) {
743 pr_info("Argument%d name '%s' conflicts with "
744 "another field.\n", i, argv[i]);
745 ret = -EINVAL;
746 goto error;
747 }
748
749 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
750 if (!tp->args[i].name) {
751 pr_info("Failed to allocate argument%d name '%s'.\n",
752 i, argv[i]);
753 ret = -ENOMEM;
754 goto error;
755 }
756
757 /* Parse fetch argument */
758 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
759 if (ret) {
760 pr_info("Parse error at argument%d. (%d)\n", i, ret);
761 kfree(tp->args[i].name);
762 goto error;
763 }
764
765 tp->nr_args++;
766 }
767
768 ret = register_trace_probe(tp);
769 if (ret)
770 goto error;
771 return 0;
772
773error:
774 free_trace_probe(tp);
775 return ret;
776}
777
778static void cleanup_all_probes(void)
779{
780 struct trace_probe *tp;
781
782 mutex_lock(&probe_lock);
783 /* TODO: Use batch unregistration */
784 while (!list_empty(&probe_list)) {
785 tp = list_entry(probe_list.next, struct trace_probe, list);
786 unregister_trace_probe(tp);
787 free_trace_probe(tp);
788 }
789 mutex_unlock(&probe_lock);
790}
791
792
793/* Probes listing interfaces */
794static void *probes_seq_start(struct seq_file *m, loff_t *pos)
795{
796 mutex_lock(&probe_lock);
797 return seq_list_start(&probe_list, *pos);
798}
799
800static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
801{
802 return seq_list_next(v, &probe_list, pos);
803}
804
805static void probes_seq_stop(struct seq_file *m, void *v)
806{
807 mutex_unlock(&probe_lock);
808}
809
810static int probes_seq_show(struct seq_file *m, void *v)
811{
812 struct trace_probe *tp = v;
813 int i, ret;
814 char buf[MAX_ARGSTR_LEN + 1];
815
816 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
817 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
818
819 if (!tp->symbol)
820 seq_printf(m, " 0x%p", tp->rp.kp.addr);
821 else if (tp->rp.kp.offset)
822 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
823 else
824 seq_printf(m, " %s", probe_symbol(tp));
825
826 for (i = 0; i < tp->nr_args; i++) {
827 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
828 if (ret < 0) {
829 pr_warning("Argument%d decoding error(%d).\n", i, ret);
830 return ret;
831 }
832 seq_printf(m, " %s=%s", tp->args[i].name, buf);
833 }
834 seq_printf(m, "\n");
835 return 0;
836}
837
838static const struct seq_operations probes_seq_op = {
839 .start = probes_seq_start,
840 .next = probes_seq_next,
841 .stop = probes_seq_stop,
842 .show = probes_seq_show
843};
844
845static int probes_open(struct inode *inode, struct file *file)
846{
847 if ((file->f_mode & FMODE_WRITE) &&
848 (file->f_flags & O_TRUNC))
849 cleanup_all_probes();
850
851 return seq_open(file, &probes_seq_op);
852}
853
854static int command_trace_probe(const char *buf)
855{
856 char **argv;
857 int argc = 0, ret = 0;
858
859 argv = argv_split(GFP_KERNEL, buf, &argc);
860 if (!argv)
861 return -ENOMEM;
862
863 if (argc)
864 ret = create_trace_probe(argc, argv);
865
866 argv_free(argv);
867 return ret;
868}
869
870#define WRITE_BUFSIZE 128
871
872static ssize_t probes_write(struct file *file, const char __user *buffer,
873 size_t count, loff_t *ppos)
874{
875 char *kbuf, *tmp;
876 int ret;
877 size_t done;
878 size_t size;
879
880 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
881 if (!kbuf)
882 return -ENOMEM;
883
884 ret = done = 0;
885 while (done < count) {
886 size = count - done;
887 if (size >= WRITE_BUFSIZE)
888 size = WRITE_BUFSIZE - 1;
889 if (copy_from_user(kbuf, buffer + done, size)) {
890 ret = -EFAULT;
891 goto out;
892 }
893 kbuf[size] = '\0';
894 tmp = strchr(kbuf, '\n');
895 if (tmp) {
896 *tmp = '\0';
897 size = tmp - kbuf + 1;
898 } else if (done + size < count) {
899 pr_warning("Line length is too long: "
900 "Should be less than %d.", WRITE_BUFSIZE);
901 ret = -EINVAL;
902 goto out;
903 }
904 done += size;
905 /* Remove comments */
906 tmp = strchr(kbuf, '#');
907 if (tmp)
908 *tmp = '\0';
909
910 ret = command_trace_probe(kbuf);
911 if (ret)
912 goto out;
913 }
914 ret = done;
915out:
916 kfree(kbuf);
917 return ret;
918}
919
920static const struct file_operations kprobe_events_ops = {
921 .owner = THIS_MODULE,
922 .open = probes_open,
923 .read = seq_read,
924 .llseek = seq_lseek,
925 .release = seq_release,
926 .write = probes_write,
927};
928
929/* Probes profiling interfaces */
930static int probes_profile_seq_show(struct seq_file *m, void *v)
931{
932 struct trace_probe *tp = v;
933
934 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
935 tp->rp.kp.nmissed);
936
937 return 0;
938}
939
940static const struct seq_operations profile_seq_op = {
941 .start = probes_seq_start,
942 .next = probes_seq_next,
943 .stop = probes_seq_stop,
944 .show = probes_profile_seq_show
945};
946
947static int profile_open(struct inode *inode, struct file *file)
948{
949 return seq_open(file, &profile_seq_op);
950}
951
952static const struct file_operations kprobe_profile_ops = {
953 .owner = THIS_MODULE,
954 .open = profile_open,
955 .read = seq_read,
956 .llseek = seq_lseek,
957 .release = seq_release,
958};
959
960/* Kprobe handler */
961static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
962{
963 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
964 struct kprobe_trace_entry *entry;
965 struct ring_buffer_event *event;
966 struct ring_buffer *buffer;
967 int size, i, pc;
968 unsigned long irq_flags;
969 struct ftrace_event_call *call = &tp->call;
970
971 tp->nhit++;
972
973 local_save_flags(irq_flags);
974 pc = preempt_count();
975
976 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
977
978 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
979 irq_flags, pc);
980 if (!event)
981 return 0;
982
983 entry = ring_buffer_event_data(event);
984 entry->nargs = tp->nr_args;
985 entry->ip = (unsigned long)kp->addr;
986 for (i = 0; i < tp->nr_args; i++)
987 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
988
989 if (!filter_current_check_discard(buffer, call, entry, event))
990 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
991 return 0;
992}
993
994/* Kretprobe handler */
995static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
996 struct pt_regs *regs)
997{
998 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
999 struct kretprobe_trace_entry *entry;
1000 struct ring_buffer_event *event;
1001 struct ring_buffer *buffer;
1002 int size, i, pc;
1003 unsigned long irq_flags;
1004 struct ftrace_event_call *call = &tp->call;
1005
1006 local_save_flags(irq_flags);
1007 pc = preempt_count();
1008
1009 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1010
1011 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
1012 irq_flags, pc);
1013 if (!event)
1014 return 0;
1015
1016 entry = ring_buffer_event_data(event);
1017 entry->nargs = tp->nr_args;
1018 entry->func = (unsigned long)tp->rp.kp.addr;
1019 entry->ret_ip = (unsigned long)ri->ret_addr;
1020 for (i = 0; i < tp->nr_args; i++)
1021 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1022
1023 if (!filter_current_check_discard(buffer, call, entry, event))
1024 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1025
1026 return 0;
1027}
1028
1029/* Event entry printers */
1030enum print_line_t
1031print_kprobe_event(struct trace_iterator *iter, int flags)
1032{
1033 struct kprobe_trace_entry *field;
1034 struct trace_seq *s = &iter->seq;
1035 struct trace_event *event;
1036 struct trace_probe *tp;
1037 int i;
1038
1039 field = (struct kprobe_trace_entry *)iter->ent;
1040 event = ftrace_find_event(field->ent.type);
1041 tp = container_of(event, struct trace_probe, event);
1042
1043 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1044 goto partial;
1045
1046 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1047 goto partial;
1048
1049 if (!trace_seq_puts(s, ")"))
1050 goto partial;
1051
1052 for (i = 0; i < field->nargs; i++)
1053 if (!trace_seq_printf(s, " %s=%lx",
1054 tp->args[i].name, field->args[i]))
1055 goto partial;
1056
1057 if (!trace_seq_puts(s, "\n"))
1058 goto partial;
1059
1060 return TRACE_TYPE_HANDLED;
1061partial:
1062 return TRACE_TYPE_PARTIAL_LINE;
1063}
1064
1065enum print_line_t
1066print_kretprobe_event(struct trace_iterator *iter, int flags)
1067{
1068 struct kretprobe_trace_entry *field;
1069 struct trace_seq *s = &iter->seq;
1070 struct trace_event *event;
1071 struct trace_probe *tp;
1072 int i;
1073
1074 field = (struct kretprobe_trace_entry *)iter->ent;
1075 event = ftrace_find_event(field->ent.type);
1076 tp = container_of(event, struct trace_probe, event);
1077
1078 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1079 goto partial;
1080
1081 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1082 goto partial;
1083
1084 if (!trace_seq_puts(s, " <- "))
1085 goto partial;
1086
1087 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1088 goto partial;
1089
1090 if (!trace_seq_puts(s, ")"))
1091 goto partial;
1092
1093 for (i = 0; i < field->nargs; i++)
1094 if (!trace_seq_printf(s, " %s=%lx",
1095 tp->args[i].name, field->args[i]))
1096 goto partial;
1097
1098 if (!trace_seq_puts(s, "\n"))
1099 goto partial;
1100
1101 return TRACE_TYPE_HANDLED;
1102partial:
1103 return TRACE_TYPE_PARTIAL_LINE;
1104}
1105
1106static int probe_event_enable(struct ftrace_event_call *call)
1107{
1108 struct trace_probe *tp = (struct trace_probe *)call->data;
1109
1110 tp->flags |= TP_FLAG_TRACE;
1111 if (probe_is_return(tp))
1112 return enable_kretprobe(&tp->rp);
1113 else
1114 return enable_kprobe(&tp->rp.kp);
1115}
1116
1117static void probe_event_disable(struct ftrace_event_call *call)
1118{
1119 struct trace_probe *tp = (struct trace_probe *)call->data;
1120
1121 tp->flags &= ~TP_FLAG_TRACE;
1122 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1123 if (probe_is_return(tp))
1124 disable_kretprobe(&tp->rp);
1125 else
1126 disable_kprobe(&tp->rp.kp);
1127 }
1128}
1129
1130static int probe_event_raw_init(struct ftrace_event_call *event_call)
1131{
1132 INIT_LIST_HEAD(&event_call->fields);
1133
1134 return 0;
1135}
1136
1137#undef DEFINE_FIELD
1138#define DEFINE_FIELD(type, item, name, is_signed) \
1139 do { \
1140 ret = trace_define_field(event_call, #type, name, \
1141 offsetof(typeof(field), item), \
1142 sizeof(field.item), is_signed, \
1143 FILTER_OTHER); \
1144 if (ret) \
1145 return ret; \
1146 } while (0)
1147
1148static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1149{
1150 int ret, i;
1151 struct kprobe_trace_entry field;
1152 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1153
1154 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1155 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1156 /* Set argument names as fields */
1157 for (i = 0; i < tp->nr_args; i++)
1158 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1159 return 0;
1160}
1161
1162static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1163{
1164 int ret, i;
1165 struct kretprobe_trace_entry field;
1166 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1167
1168 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1169 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1170 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1171 /* Set argument names as fields */
1172 for (i = 0; i < tp->nr_args; i++)
1173 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1174 return 0;
1175}
1176
1177static int __probe_event_show_format(struct trace_seq *s,
1178 struct trace_probe *tp, const char *fmt,
1179 const char *arg)
1180{
1181 int i;
1182
1183 /* Show format */
1184 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1185 return 0;
1186
1187 for (i = 0; i < tp->nr_args; i++)
1188 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1189 return 0;
1190
1191 if (!trace_seq_printf(s, "\", %s", arg))
1192 return 0;
1193
1194 for (i = 0; i < tp->nr_args; i++)
1195 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1196 return 0;
1197
1198 return trace_seq_puts(s, "\n");
1199}
1200
1201#undef SHOW_FIELD
1202#define SHOW_FIELD(type, item, name) \
1203 do { \
1204 ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \
1205 "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
1206 (unsigned int)offsetof(typeof(field), item),\
1207 (unsigned int)sizeof(type), \
1208 is_signed_type(type)); \
1209 if (!ret) \
1210 return 0; \
1211 } while (0)
1212
1213static int kprobe_event_show_format(struct ftrace_event_call *call,
1214 struct trace_seq *s)
1215{
1216 struct kprobe_trace_entry field __attribute__((unused));
1217 int ret, i;
1218 struct trace_probe *tp = (struct trace_probe *)call->data;
1219
1220 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
1221 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1222
1223 /* Show fields */
1224 for (i = 0; i < tp->nr_args; i++)
1225 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1226 trace_seq_puts(s, "\n");
1227
1228 return __probe_event_show_format(s, tp, "(%lx)",
1229 "REC->" FIELD_STRING_IP);
1230}
1231
1232static int kretprobe_event_show_format(struct ftrace_event_call *call,
1233 struct trace_seq *s)
1234{
1235 struct kretprobe_trace_entry field __attribute__((unused));
1236 int ret, i;
1237 struct trace_probe *tp = (struct trace_probe *)call->data;
1238
1239 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
1240 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
1241 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1242
1243 /* Show fields */
1244 for (i = 0; i < tp->nr_args; i++)
1245 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1246 trace_seq_puts(s, "\n");
1247
1248 return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1249 "REC->" FIELD_STRING_FUNC
1250 ", REC->" FIELD_STRING_RETIP);
1251}
1252
1253#ifdef CONFIG_EVENT_PROFILE
1254
1255/* Kprobe profile handler */
1256static __kprobes int kprobe_profile_func(struct kprobe *kp,
1257 struct pt_regs *regs)
1258{
1259 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1260 struct ftrace_event_call *call = &tp->call;
1261 struct kprobe_trace_entry *entry;
1262 struct trace_entry *ent;
1263 int size, __size, i, pc, __cpu;
1264 unsigned long irq_flags;
1265 char *trace_buf;
1266 char *raw_data;
1267 int rctx;
1268
1269 pc = preempt_count();
1270 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1271 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1272 size -= sizeof(u32);
1273 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1274 "profile buffer not large enough"))
1275 return 0;
1276
1277 /*
1278 * Protect the non nmi buffer
1279 * This also protects the rcu read side
1280 */
1281 local_irq_save(irq_flags);
1282
1283 rctx = perf_swevent_get_recursion_context();
1284 if (rctx < 0)
1285 goto end_recursion;
1286
1287 __cpu = smp_processor_id();
1288
1289 if (in_nmi())
1290 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1291 else
1292 trace_buf = rcu_dereference(perf_trace_buf);
1293
1294 if (!trace_buf)
1295 goto end;
1296
1297 raw_data = per_cpu_ptr(trace_buf, __cpu);
1298
1299 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1300 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1301 entry = (struct kprobe_trace_entry *)raw_data;
1302 ent = &entry->ent;
1303
1304 tracing_generic_entry_update(ent, irq_flags, pc);
1305 ent->type = call->id;
1306 entry->nargs = tp->nr_args;
1307 entry->ip = (unsigned long)kp->addr;
1308 for (i = 0; i < tp->nr_args; i++)
1309 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1310 perf_tp_event(call->id, entry->ip, 1, entry, size);
1311
1312end:
1313 perf_swevent_put_recursion_context(rctx);
1314end_recursion:
1315 local_irq_restore(irq_flags);
1316
1317 return 0;
1318}
1319
1320/* Kretprobe profile handler */
1321static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1322 struct pt_regs *regs)
1323{
1324 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1325 struct ftrace_event_call *call = &tp->call;
1326 struct kretprobe_trace_entry *entry;
1327 struct trace_entry *ent;
1328 int size, __size, i, pc, __cpu;
1329 unsigned long irq_flags;
1330 char *trace_buf;
1331 char *raw_data;
1332 int rctx;
1333
1334 pc = preempt_count();
1335 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1336 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1337 size -= sizeof(u32);
1338 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1339 "profile buffer not large enough"))
1340 return 0;
1341
1342 /*
1343 * Protect the non nmi buffer
1344 * This also protects the rcu read side
1345 */
1346 local_irq_save(irq_flags);
1347
1348 rctx = perf_swevent_get_recursion_context();
1349 if (rctx < 0)
1350 goto end_recursion;
1351
1352 __cpu = smp_processor_id();
1353
1354 if (in_nmi())
1355 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1356 else
1357 trace_buf = rcu_dereference(perf_trace_buf);
1358
1359 if (!trace_buf)
1360 goto end;
1361
1362 raw_data = per_cpu_ptr(trace_buf, __cpu);
1363
1364 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1365 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1366 entry = (struct kretprobe_trace_entry *)raw_data;
1367 ent = &entry->ent;
1368
1369 tracing_generic_entry_update(ent, irq_flags, pc);
1370 ent->type = call->id;
1371 entry->nargs = tp->nr_args;
1372 entry->func = (unsigned long)tp->rp.kp.addr;
1373 entry->ret_ip = (unsigned long)ri->ret_addr;
1374 for (i = 0; i < tp->nr_args; i++)
1375 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1376 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1377
1378end:
1379 perf_swevent_put_recursion_context(rctx);
1380end_recursion:
1381 local_irq_restore(irq_flags);
1382
1383 return 0;
1384}
1385
1386static int probe_profile_enable(struct ftrace_event_call *call)
1387{
1388 struct trace_probe *tp = (struct trace_probe *)call->data;
1389
1390 tp->flags |= TP_FLAG_PROFILE;
1391
1392 if (probe_is_return(tp))
1393 return enable_kretprobe(&tp->rp);
1394 else
1395 return enable_kprobe(&tp->rp.kp);
1396}
1397
1398static void probe_profile_disable(struct ftrace_event_call *call)
1399{
1400 struct trace_probe *tp = (struct trace_probe *)call->data;
1401
1402 tp->flags &= ~TP_FLAG_PROFILE;
1403
1404 if (!(tp->flags & TP_FLAG_TRACE)) {
1405 if (probe_is_return(tp))
1406 disable_kretprobe(&tp->rp);
1407 else
1408 disable_kprobe(&tp->rp.kp);
1409 }
1410}
1411#endif /* CONFIG_EVENT_PROFILE */
1412
1413
1414static __kprobes
1415int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1416{
1417 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1418
1419 if (tp->flags & TP_FLAG_TRACE)
1420 kprobe_trace_func(kp, regs);
1421#ifdef CONFIG_EVENT_PROFILE
1422 if (tp->flags & TP_FLAG_PROFILE)
1423 kprobe_profile_func(kp, regs);
1424#endif /* CONFIG_EVENT_PROFILE */
1425 return 0; /* We don't tweek kernel, so just return 0 */
1426}
1427
1428static __kprobes
1429int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1430{
1431 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1432
1433 if (tp->flags & TP_FLAG_TRACE)
1434 kretprobe_trace_func(ri, regs);
1435#ifdef CONFIG_EVENT_PROFILE
1436 if (tp->flags & TP_FLAG_PROFILE)
1437 kretprobe_profile_func(ri, regs);
1438#endif /* CONFIG_EVENT_PROFILE */
1439 return 0; /* We don't tweek kernel, so just return 0 */
1440}
1441
1442static int register_probe_event(struct trace_probe *tp)
1443{
1444 struct ftrace_event_call *call = &tp->call;
1445 int ret;
1446
1447 /* Initialize ftrace_event_call */
1448 if (probe_is_return(tp)) {
1449 tp->event.trace = print_kretprobe_event;
1450 call->raw_init = probe_event_raw_init;
1451 call->show_format = kretprobe_event_show_format;
1452 call->define_fields = kretprobe_event_define_fields;
1453 } else {
1454 tp->event.trace = print_kprobe_event;
1455 call->raw_init = probe_event_raw_init;
1456 call->show_format = kprobe_event_show_format;
1457 call->define_fields = kprobe_event_define_fields;
1458 }
1459 call->event = &tp->event;
1460 call->id = register_ftrace_event(&tp->event);
1461 if (!call->id)
1462 return -ENODEV;
1463 call->enabled = 0;
1464 call->regfunc = probe_event_enable;
1465 call->unregfunc = probe_event_disable;
1466
1467#ifdef CONFIG_EVENT_PROFILE
1468 call->profile_enable = probe_profile_enable;
1469 call->profile_disable = probe_profile_disable;
1470#endif
1471 call->data = tp;
1472 ret = trace_add_event_call(call);
1473 if (ret) {
1474 pr_info("Failed to register kprobe event: %s\n", call->name);
1475 unregister_ftrace_event(&tp->event);
1476 }
1477 return ret;
1478}
1479
1480static void unregister_probe_event(struct trace_probe *tp)
1481{
1482 /* tp->event is unregistered in trace_remove_event_call() */
1483 trace_remove_event_call(&tp->call);
1484}
1485
1486/* Make a debugfs interface for controling probe points */
1487static __init int init_kprobe_trace(void)
1488{
1489 struct dentry *d_tracer;
1490 struct dentry *entry;
1491
1492 d_tracer = tracing_init_dentry();
1493 if (!d_tracer)
1494 return 0;
1495
1496 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1497 NULL, &kprobe_events_ops);
1498
1499 /* Event list interface */
1500 if (!entry)
1501 pr_warning("Could not create debugfs "
1502 "'kprobe_events' entry\n");
1503
1504 /* Profile interface */
1505 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1506 NULL, &kprobe_profile_ops);
1507
1508 if (!entry)
1509 pr_warning("Could not create debugfs "
1510 "'kprobe_profile' entry\n");
1511 return 0;
1512}
1513fs_initcall(init_kprobe_trace);
1514
1515
1516#ifdef CONFIG_FTRACE_STARTUP_TEST
1517
1518static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1519 int a4, int a5, int a6)
1520{
1521 return a1 + a2 + a3 + a4 + a5 + a6;
1522}
1523
1524static __init int kprobe_trace_self_tests_init(void)
1525{
1526 int ret;
1527 int (*target)(int, int, int, int, int, int);
1528
1529 target = kprobe_trace_selftest_target;
1530
1531 pr_info("Testing kprobe tracing: ");
1532
1533 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1534 "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
1535 if (WARN_ON_ONCE(ret))
1536 pr_warning("error enabling function entry\n");
1537
1538 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1539 "$retval");
1540 if (WARN_ON_ONCE(ret))
1541 pr_warning("error enabling function return\n");
1542
1543 ret = target(1, 2, 3, 4, 5, 6);
1544
1545 cleanup_all_probes();
1546
1547 pr_cont("OK\n");
1548 return 0;
1549}
1550
1551late_initcall(kprobe_trace_self_tests_init);
1552
1553#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..94103cdcf9d8
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,519 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace.h"
30
31#include <linux/hw_breakpoint.h>
32#include <asm/hw_breakpoint.h>
33
34#include <asm/atomic.h>
35
36/*
37 * For now, let us restrict the no. of symbols traced simultaneously to number
38 * of available hardware breakpoint registers.
39 */
40#define KSYM_TRACER_MAX HBP_NUM
41
42#define KSYM_TRACER_OP_LEN 3 /* rw- */
43
44struct trace_ksym {
45 struct perf_event **ksym_hbp;
46 struct perf_event_attr attr;
47#ifdef CONFIG_PROFILE_KSYM_TRACER
48 atomic64_t counter;
49#endif
50 struct hlist_node ksym_hlist;
51};
52
53static struct trace_array *ksym_trace_array;
54
55static unsigned int ksym_filter_entry_count;
56static unsigned int ksym_tracing_enabled;
57
58static HLIST_HEAD(ksym_filter_head);
59
60static DEFINE_MUTEX(ksym_tracer_mutex);
61
62#ifdef CONFIG_PROFILE_KSYM_TRACER
63
64#define MAX_UL_INT 0xffffffff
65
66void ksym_collect_stats(unsigned long hbp_hit_addr)
67{
68 struct hlist_node *node;
69 struct trace_ksym *entry;
70
71 rcu_read_lock();
72 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
73 if (entry->attr.bp_addr == hbp_hit_addr) {
74 atomic64_inc(&entry->counter);
75 break;
76 }
77 }
78 rcu_read_unlock();
79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81
82void ksym_hbp_handler(struct perf_event *hbp, int nmi,
83 struct perf_sample_data *data,
84 struct pt_regs *regs)
85{
86 struct ring_buffer_event *event;
87 struct ksym_trace_entry *entry;
88 struct ring_buffer *buffer;
89 int pc;
90
91 if (!ksym_tracing_enabled)
92 return;
93
94 buffer = ksym_trace_array->buffer;
95
96 pc = preempt_count();
97
98 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
99 sizeof(*entry), 0, pc);
100 if (!event)
101 return;
102
103 entry = ring_buffer_event_data(event);
104 entry->ip = instruction_pointer(regs);
105 entry->type = hw_breakpoint_type(hbp);
106 entry->addr = hw_breakpoint_addr(hbp);
107 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
108
109#ifdef CONFIG_PROFILE_KSYM_TRACER
110 ksym_collect_stats(hw_breakpoint_addr(hbp));
111#endif /* CONFIG_PROFILE_KSYM_TRACER */
112
113 trace_buffer_unlock_commit(buffer, event, 0, pc);
114}
115
116/* Valid access types are represented as
117 *
118 * rw- : Set Read/Write Access Breakpoint
119 * -w- : Set Write Access Breakpoint
120 * --- : Clear Breakpoints
121 * --x : Set Execution Break points (Not available yet)
122 *
123 */
124static int ksym_trace_get_access_type(char *str)
125{
126 int access = 0;
127
128 if (str[0] == 'r')
129 access |= HW_BREAKPOINT_R;
130
131 if (str[1] == 'w')
132 access |= HW_BREAKPOINT_W;
133
134 if (str[2] == 'x')
135 access |= HW_BREAKPOINT_X;
136
137 switch (access) {
138 case HW_BREAKPOINT_R:
139 case HW_BREAKPOINT_W:
140 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
141 return access;
142 default:
143 return -EINVAL;
144 }
145}
146
147/*
148 * There can be several possible malformed requests and we attempt to capture
149 * all of them. We enumerate some of the rules
150 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
151 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
152 * <module>:<ksym_name>:<op>.
153 * 2. No delimiter symbol ':' in the input string
154 * 3. Spurious operator symbols or symbols not in their respective positions
155 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
156 * 5. Kernel symbol not a part of /proc/kallsyms
157 * 6. Duplicate requests
158 */
159static int parse_ksym_trace_str(char *input_string, char **ksymname,
160 unsigned long *addr)
161{
162 int ret;
163
164 *ksymname = strsep(&input_string, ":");
165 *addr = kallsyms_lookup_name(*ksymname);
166
167 /* Check for malformed request: (2), (1) and (5) */
168 if ((!input_string) ||
169 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
170 (*addr == 0))
171 return -EINVAL;;
172
173 ret = ksym_trace_get_access_type(input_string);
174
175 return ret;
176}
177
178int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
179{
180 struct trace_ksym *entry;
181 int ret = -ENOMEM;
182
183 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
184 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
185 " new requests for tracing can be accepted now.\n",
186 KSYM_TRACER_MAX);
187 return -ENOSPC;
188 }
189
190 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
191 if (!entry)
192 return -ENOMEM;
193
194 hw_breakpoint_init(&entry->attr);
195
196 entry->attr.bp_type = op;
197 entry->attr.bp_addr = addr;
198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
199
200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
201 ksym_hbp_handler);
202
203 if (IS_ERR(entry->ksym_hbp)) {
204 ret = PTR_ERR(entry->ksym_hbp);
205 printk(KERN_INFO "ksym_tracer request failed. Try again"
206 " later!!\n");
207 goto err;
208 }
209
210 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
211 ksym_filter_entry_count++;
212
213 return 0;
214
215err:
216 kfree(entry);
217
218 return ret;
219}
220
221static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
222 size_t count, loff_t *ppos)
223{
224 struct trace_ksym *entry;
225 struct hlist_node *node;
226 struct trace_seq *s;
227 ssize_t cnt = 0;
228 int ret;
229
230 s = kmalloc(sizeof(*s), GFP_KERNEL);
231 if (!s)
232 return -ENOMEM;
233 trace_seq_init(s);
234
235 mutex_lock(&ksym_tracer_mutex);
236
237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
238 ret = trace_seq_printf(s, "%pS:",
239 (void *)(unsigned long)entry->attr.bp_addr);
240 if (entry->attr.bp_type == HW_BREAKPOINT_R)
241 ret = trace_seq_puts(s, "r--\n");
242 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
243 ret = trace_seq_puts(s, "-w-\n");
244 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
245 ret = trace_seq_puts(s, "rw-\n");
246 WARN_ON_ONCE(!ret);
247 }
248
249 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
250
251 mutex_unlock(&ksym_tracer_mutex);
252
253 kfree(s);
254
255 return cnt;
256}
257
258static void __ksym_trace_reset(void)
259{
260 struct trace_ksym *entry;
261 struct hlist_node *node, *node1;
262
263 mutex_lock(&ksym_tracer_mutex);
264 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
265 ksym_hlist) {
266 unregister_wide_hw_breakpoint(entry->ksym_hbp);
267 ksym_filter_entry_count--;
268 hlist_del_rcu(&(entry->ksym_hlist));
269 synchronize_rcu();
270 kfree(entry);
271 }
272 mutex_unlock(&ksym_tracer_mutex);
273}
274
275static ssize_t ksym_trace_filter_write(struct file *file,
276 const char __user *buffer,
277 size_t count, loff_t *ppos)
278{
279 struct trace_ksym *entry;
280 struct hlist_node *node;
281 char *buf, *input_string, *ksymname = NULL;
282 unsigned long ksym_addr = 0;
283 int ret, op, changed = 0;
284
285 buf = kzalloc(count + 1, GFP_KERNEL);
286 if (!buf)
287 return -ENOMEM;
288
289 ret = -EFAULT;
290 if (copy_from_user(buf, buffer, count))
291 goto out;
292
293 buf[count] = '\0';
294 input_string = strstrip(buf);
295
296 /*
297 * Clear all breakpoints if:
298 * 1: echo > ksym_trace_filter
299 * 2: echo 0 > ksym_trace_filter
300 * 3: echo "*:---" > ksym_trace_filter
301 */
302 if (!input_string[0] || !strcmp(input_string, "0") ||
303 !strcmp(input_string, "*:---")) {
304 __ksym_trace_reset();
305 ret = 0;
306 goto out;
307 }
308
309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
310 if (ret < 0)
311 goto out;
312
313 mutex_lock(&ksym_tracer_mutex);
314
315 ret = -EINVAL;
316 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
317 if (entry->attr.bp_addr == ksym_addr) {
318 /* Check for malformed request: (6) */
319 if (entry->attr.bp_type != op)
320 changed = 1;
321 else
322 goto out_unlock;
323 break;
324 }
325 }
326 if (changed) {
327 unregister_wide_hw_breakpoint(entry->ksym_hbp);
328 entry->attr.bp_type = op;
329 ret = 0;
330 if (op > 0) {
331 entry->ksym_hbp =
332 register_wide_hw_breakpoint(&entry->attr,
333 ksym_hbp_handler);
334 if (IS_ERR(entry->ksym_hbp))
335 ret = PTR_ERR(entry->ksym_hbp);
336 else
337 goto out_unlock;
338 }
339 /* Error or "symbol:---" case: drop it */
340 ksym_filter_entry_count--;
341 hlist_del_rcu(&(entry->ksym_hlist));
342 synchronize_rcu();
343 kfree(entry);
344 goto out_unlock;
345 } else {
346 /* Check for malformed request: (4) */
347 if (op)
348 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
349 }
350out_unlock:
351 mutex_unlock(&ksym_tracer_mutex);
352out:
353 kfree(buf);
354 return !ret ? count : ret;
355}
356
357static const struct file_operations ksym_tracing_fops = {
358 .open = tracing_open_generic,
359 .read = ksym_trace_filter_read,
360 .write = ksym_trace_filter_write,
361};
362
363static void ksym_trace_reset(struct trace_array *tr)
364{
365 ksym_tracing_enabled = 0;
366 __ksym_trace_reset();
367}
368
369static int ksym_trace_init(struct trace_array *tr)
370{
371 int cpu, ret = 0;
372
373 for_each_online_cpu(cpu)
374 tracing_reset(tr, cpu);
375 ksym_tracing_enabled = 1;
376 ksym_trace_array = tr;
377
378 return ret;
379}
380
381static void ksym_trace_print_header(struct seq_file *m)
382{
383 seq_puts(m,
384 "# TASK-PID CPU# Symbol "
385 "Type Function\n");
386 seq_puts(m,
387 "# | | | "
388 " | |\n");
389}
390
391static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
392{
393 struct trace_entry *entry = iter->ent;
394 struct trace_seq *s = &iter->seq;
395 struct ksym_trace_entry *field;
396 char str[KSYM_SYMBOL_LEN];
397 int ret;
398
399 if (entry->type != TRACE_KSYM)
400 return TRACE_TYPE_UNHANDLED;
401
402 trace_assign_type(field, entry);
403
404 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
405 entry->pid, iter->cpu, (char *)field->addr);
406 if (!ret)
407 return TRACE_TYPE_PARTIAL_LINE;
408
409 switch (field->type) {
410 case HW_BREAKPOINT_R:
411 ret = trace_seq_printf(s, " R ");
412 break;
413 case HW_BREAKPOINT_W:
414 ret = trace_seq_printf(s, " W ");
415 break;
416 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
417 ret = trace_seq_printf(s, " RW ");
418 break;
419 default:
420 return TRACE_TYPE_PARTIAL_LINE;
421 }
422
423 if (!ret)
424 return TRACE_TYPE_PARTIAL_LINE;
425
426 sprint_symbol(str, field->ip);
427 ret = trace_seq_printf(s, "%s\n", str);
428 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE;
430
431 return TRACE_TYPE_HANDLED;
432}
433
434struct tracer ksym_tracer __read_mostly =
435{
436 .name = "ksym_tracer",
437 .init = ksym_trace_init,
438 .reset = ksym_trace_reset,
439#ifdef CONFIG_FTRACE_SELFTEST
440 .selftest = trace_selftest_startup_ksym,
441#endif
442 .print_header = ksym_trace_print_header,
443 .print_line = ksym_trace_output
444};
445
446#ifdef CONFIG_PROFILE_KSYM_TRACER
447static int ksym_profile_show(struct seq_file *m, void *v)
448{
449 struct hlist_node *node;
450 struct trace_ksym *entry;
451 int access_type = 0;
452 char fn_name[KSYM_NAME_LEN];
453
454 seq_puts(m, " Access Type ");
455 seq_puts(m, " Symbol Counter\n");
456 seq_puts(m, " ----------- ");
457 seq_puts(m, " ------ -------\n");
458
459 rcu_read_lock();
460 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
461
462 access_type = entry->attr.bp_type;
463
464 switch (access_type) {
465 case HW_BREAKPOINT_R:
466 seq_puts(m, " R ");
467 break;
468 case HW_BREAKPOINT_W:
469 seq_puts(m, " W ");
470 break;
471 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
472 seq_puts(m, " RW ");
473 break;
474 default:
475 seq_puts(m, " NA ");
476 }
477
478 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
479 seq_printf(m, " %-36s", fn_name);
480 else
481 seq_printf(m, " %-36s", "<NA>");
482 seq_printf(m, " %15llu\n",
483 (unsigned long long)atomic64_read(&entry->counter));
484 }
485 rcu_read_unlock();
486
487 return 0;
488}
489
490static int ksym_profile_open(struct inode *node, struct file *file)
491{
492 return single_open(file, ksym_profile_show, NULL);
493}
494
495static const struct file_operations ksym_profile_fops = {
496 .open = ksym_profile_open,
497 .read = seq_read,
498 .llseek = seq_lseek,
499 .release = single_release,
500};
501#endif /* CONFIG_PROFILE_KSYM_TRACER */
502
503__init static int init_ksym_trace(void)
504{
505 struct dentry *d_tracer;
506
507 d_tracer = tracing_init_dentry();
508
509 trace_create_file("ksym_trace_filter", 0644, d_tracer,
510 NULL, &ksym_tracing_fops);
511
512#ifdef CONFIG_PROFILE_KSYM_TRACER
513 trace_create_file("ksym_profile", 0444, d_tracer,
514 NULL, &ksym_profile_fops);
515#endif
516
517 return register_tracer(&ksym_tracer);
518}
519device_initcall(init_ksym_trace);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index f572f44c6e1e..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -69,6 +77,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
69 * @s: trace sequence descriptor 77 * @s: trace sequence descriptor
70 * @fmt: printf format string 78 * @fmt: printf format string
71 * 79 *
80 * It returns 0 if the trace oversizes the buffer's free
81 * space, 1 otherwise.
82 *
72 * The tracer may use either sequence operations or its own 83 * The tracer may use either sequence operations or its own
73 * copy to user routines. To simplify formating of a trace 84 * copy to user routines. To simplify formating of a trace
74 * trace_seq_printf is used to store strings into a special 85 * trace_seq_printf is used to store strings into a special
@@ -82,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
82 va_list ap; 93 va_list ap;
83 int ret; 94 int ret;
84 95
85 if (!len) 96 if (s->full || !len)
86 return 0; 97 return 0;
87 98
88 va_start(ap, fmt); 99 va_start(ap, fmt);
@@ -90,12 +101,14 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
90 va_end(ap); 101 va_end(ap);
91 102
92 /* If we can't write it all, don't bother writing anything */ 103 /* If we can't write it all, don't bother writing anything */
93 if (ret >= len) 104 if (ret >= len) {
105 s->full = 1;
94 return 0; 106 return 0;
107 }
95 108
96 s->len += ret; 109 s->len += ret;
97 110
98 return len; 111 return 1;
99} 112}
100EXPORT_SYMBOL_GPL(trace_seq_printf); 113EXPORT_SYMBOL_GPL(trace_seq_printf);
101 114
@@ -116,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
116 int len = (PAGE_SIZE - 1) - s->len; 129 int len = (PAGE_SIZE - 1) - s->len;
117 int ret; 130 int ret;
118 131
119 if (!len) 132 if (s->full || !len)
120 return 0; 133 return 0;
121 134
122 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 135 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
123 136
124 /* If we can't write it all, don't bother writing anything */ 137 /* If we can't write it all, don't bother writing anything */
125 if (ret >= len) 138 if (ret >= len) {
139 s->full = 1;
126 return 0; 140 return 0;
141 }
127 142
128 s->len += ret; 143 s->len += ret;
129 144
@@ -136,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
136 int len = (PAGE_SIZE - 1) - s->len; 151 int len = (PAGE_SIZE - 1) - s->len;
137 int ret; 152 int ret;
138 153
139 if (!len) 154 if (s->full || !len)
140 return 0; 155 return 0;
141 156
142 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 157 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
143 158
144 /* If we can't write it all, don't bother writing anything */ 159 /* If we can't write it all, don't bother writing anything */
145 if (ret >= len) 160 if (ret >= len) {
161 s->full = 1;
146 return 0; 162 return 0;
163 }
147 164
148 s->len += ret; 165 s->len += ret;
149 166
@@ -164,9 +181,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
164{ 181{
165 int len = strlen(str); 182 int len = strlen(str);
166 183
167 if (len > ((PAGE_SIZE - 1) - s->len)) 184 if (s->full)
168 return 0; 185 return 0;
169 186
187 if (len > ((PAGE_SIZE - 1) - s->len)) {
188 s->full = 1;
189 return 0;
190 }
191
170 memcpy(s->buffer + s->len, str, len); 192 memcpy(s->buffer + s->len, str, len);
171 s->len += len; 193 s->len += len;
172 194
@@ -175,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
175 197
176int trace_seq_putc(struct trace_seq *s, unsigned char c) 198int trace_seq_putc(struct trace_seq *s, unsigned char c)
177{ 199{
178 if (s->len >= (PAGE_SIZE - 1)) 200 if (s->full)
179 return 0; 201 return 0;
180 202
203 if (s->len >= (PAGE_SIZE - 1)) {
204 s->full = 1;
205 return 0;
206 }
207
181 s->buffer[s->len++] = c; 208 s->buffer[s->len++] = c;
182 209
183 return 1; 210 return 1;
@@ -185,8 +212,13 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
185 212
186int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
187{ 214{
188 if (len > ((PAGE_SIZE - 1) - s->len)) 215 if (s->full)
216 return 0;
217
218 if (len > ((PAGE_SIZE - 1) - s->len)) {
219 s->full = 1;
189 return 0; 220 return 0;
221 }
190 222
191 memcpy(s->buffer + s->len, mem, len); 223 memcpy(s->buffer + s->len, mem, len);
192 s->len += len; 224 s->len += len;
@@ -200,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
200 const unsigned char *data = mem; 232 const unsigned char *data = mem;
201 int i, j; 233 int i, j;
202 234
235 if (s->full)
236 return 0;
237
203#ifdef __BIG_ENDIAN 238#ifdef __BIG_ENDIAN
204 for (i = 0, j = 0; i < len; i++) { 239 for (i = 0, j = 0; i < len; i++) {
205#else 240#else
@@ -217,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
217{ 252{
218 void *ret; 253 void *ret;
219 254
220 if (len > ((PAGE_SIZE - 1) - s->len)) 255 if (s->full)
256 return 0;
257
258 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1;
221 return NULL; 260 return NULL;
261 }
222 262
223 ret = s->buffer + s->len; 263 ret = s->buffer + s->len;
224 s->len += len; 264 s->len += len;
@@ -230,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
230{ 270{
231 unsigned char *p; 271 unsigned char *p;
232 272
233 if (s->len >= (PAGE_SIZE - 1)) 273 if (s->full)
274 return 0;
275
276 if (s->len >= (PAGE_SIZE - 1)) {
277 s->full = 1;
234 return 0; 278 return 0;
279 }
280
235 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 281 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
236 if (!IS_ERR(p)) { 282 if (!IS_ERR(p)) {
237 p = mangle_path(s->buffer + s->len, p, "\n"); 283 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -244,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
244 return 1; 290 return 1;
245 } 291 }
246 292
293 s->full = 1;
247 return 0; 294 return 0;
248} 295}
249 296
@@ -370,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
370 unsigned long vmstart = 0; 417 unsigned long vmstart = 0;
371 int ret = 1; 418 int ret = 1;
372 419
420 if (s->full)
421 return 0;
422
373 if (mm) { 423 if (mm) {
374 const struct vm_area_struct *vma; 424 const struct vm_area_struct *vma;
375 425
@@ -486,16 +536,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
486 hardirq ? 'h' : softirq ? 's' : '.')) 536 hardirq ? 'h' : softirq ? 's' : '.'))
487 return 0; 537 return 0;
488 538
489 if (entry->lock_depth < 0) 539 if (entry->preempt_count)
490 ret = trace_seq_putc(s, '.'); 540 ret = trace_seq_printf(s, "%x", entry->preempt_count);
491 else 541 else
492 ret = trace_seq_printf(s, "%d", entry->lock_depth); 542 ret = trace_seq_putc(s, '.');
543
493 if (!ret) 544 if (!ret)
494 return 0; 545 return 0;
495 546
496 if (entry->preempt_count) 547 if (entry->lock_depth < 0)
497 return trace_seq_printf(s, "%x", entry->preempt_count); 548 return trace_seq_putc(s, '.');
498 return trace_seq_putc(s, '.'); 549
550 return trace_seq_printf(s, "%d", entry->lock_depth);
499} 551}
500 552
501static int 553static int
@@ -883,7 +935,7 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
883 trace_assign_type(field, iter->ent); 935 trace_assign_type(field, iter->ent);
884 936
885 if (!S) 937 if (!S)
886 task_state_char(field->prev_state); 938 S = task_state_char(field->prev_state);
887 T = task_state_char(field->next_state); 939 T = task_state_char(field->next_state);
888 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", 940 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
889 field->prev_pid, 941 field->prev_pid,
@@ -918,7 +970,7 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
918 trace_assign_type(field, iter->ent); 970 trace_assign_type(field, iter->ent);
919 971
920 if (!S) 972 if (!S)
921 task_state_char(field->prev_state); 973 S = task_state_char(field->prev_state);
922 T = task_state_char(field->next_state); 974 T = task_state_char(field->next_state);
923 975
924 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); 976 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int wakeup_current_cpu;
28static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
29static int wakeup_rt; 29static int wakeup_rt;
30 30
31static raw_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void __wakeup_reset(struct trace_array *tr); 34static void __wakeup_reset(struct trace_array *tr);
35 35
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
143 goto out; 143 goto out;
144 144
145 local_irq_save(flags); 145 local_irq_save(flags);
146 __raw_spin_lock(&wakeup_lock); 146 arch_spin_lock(&wakeup_lock);
147 147
148 /* We could race with grabbing wakeup_lock */ 148 /* We could race with grabbing wakeup_lock */
149 if (unlikely(!tracer_enabled || next != wakeup_task)) 149 if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
169 169
170out_unlock: 170out_unlock:
171 __wakeup_reset(wakeup_trace); 171 __wakeup_reset(wakeup_trace);
172 __raw_spin_unlock(&wakeup_lock); 172 arch_spin_unlock(&wakeup_lock);
173 local_irq_restore(flags); 173 local_irq_restore(flags);
174out: 174out:
175 atomic_dec(&wakeup_trace->data[cpu]->disabled); 175 atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
193 tracing_reset_online_cpus(tr); 193 tracing_reset_online_cpus(tr);
194 194
195 local_irq_save(flags); 195 local_irq_save(flags);
196 __raw_spin_lock(&wakeup_lock); 196 arch_spin_lock(&wakeup_lock);
197 __wakeup_reset(tr); 197 __wakeup_reset(tr);
198 __raw_spin_unlock(&wakeup_lock); 198 arch_spin_unlock(&wakeup_lock);
199 local_irq_restore(flags); 199 local_irq_restore(flags);
200} 200}
201 201
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
225 goto out; 225 goto out;
226 226
227 /* interrupts should be off from try_to_wake_up */ 227 /* interrupts should be off from try_to_wake_up */
228 __raw_spin_lock(&wakeup_lock); 228 arch_spin_lock(&wakeup_lock);
229 229
230 /* check for races. */ 230 /* check for races. */
231 if (!tracer_enabled || p->prio >= wakeup_prio) 231 if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 256
257out_locked: 257out_locked:
258 __raw_spin_unlock(&wakeup_lock); 258 arch_spin_unlock(&wakeup_lock);
259out: 259out:
260 atomic_dec(&wakeup_trace->data[cpu]->disabled); 260 atomic_dec(&wakeup_trace->data[cpu]->disabled);
261} 261}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -66,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
66 67
67 /* Don't allow flipping of max traces now */ 68 /* Don't allow flipping of max traces now */
68 local_irq_save(flags); 69 local_irq_save(flags);
69 __raw_spin_lock(&ftrace_max_lock); 70 arch_spin_lock(&ftrace_max_lock);
70 71
71 cnt = ring_buffer_entries(tr->buffer); 72 cnt = ring_buffer_entries(tr->buffer);
72 73
@@ -84,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
84 break; 85 break;
85 } 86 }
86 tracing_on(); 87 tracing_on();
87 __raw_spin_unlock(&ftrace_max_lock); 88 arch_spin_unlock(&ftrace_max_lock);
88 local_irq_restore(flags); 89 local_irq_restore(flags);
89 90
90 if (count) 91 if (count)
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..678a5120ee30 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
27}; 27};
28 28
29static unsigned long max_stack_size; 29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 32
33static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
54 return; 54 return;
55 55
56 local_irq_save(flags); 56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock); 57 arch_spin_lock(&max_stack_lock);
58 58
59 /* a race could have already updated it */ 59 /* a race could have already updated it */
60 if (this_size <= max_stack_size) 60 if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
103 } 103 }
104 104
105 out: 105 out:
106 __raw_spin_unlock(&max_stack_lock); 106 arch_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags); 107 local_irq_restore(flags);
108} 108}
109 109
@@ -171,9 +171,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 171 return ret;
172 172
173 local_irq_save(flags); 173 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock); 174 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 175 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock); 176 arch_spin_unlock(&max_stack_lock);
177 local_irq_restore(flags); 177 local_irq_restore(flags);
178 178
179 return count; 179 return count;
@@ -207,7 +207,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
207static void *t_start(struct seq_file *m, loff_t *pos) 207static void *t_start(struct seq_file *m, loff_t *pos)
208{ 208{
209 local_irq_disable(); 209 local_irq_disable();
210 __raw_spin_lock(&max_stack_lock); 210 arch_spin_lock(&max_stack_lock);
211 211
212 if (*pos == 0) 212 if (*pos == 0)
213 return SEQ_START_TOKEN; 213 return SEQ_START_TOKEN;
@@ -217,7 +217,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 217
218static void t_stop(struct seq_file *m, void *p) 218static void t_stop(struct seq_file *m, void *p)
219{ 219{
220 __raw_spin_unlock(&max_stack_lock); 220 arch_spin_unlock(&max_stack_lock);
221 local_irq_enable(); 221 local_irq_enable();
222} 222}
223 223
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 9fbce6c9d2e1..75289f372dd2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -14,6 +14,43 @@ static int sys_refcount_exit;
14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16 16
17extern unsigned long __start_syscalls_metadata[];
18extern unsigned long __stop_syscalls_metadata[];
19
20static struct syscall_metadata **syscalls_metadata;
21
22static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
23{
24 struct syscall_metadata *start;
25 struct syscall_metadata *stop;
26 char str[KSYM_SYMBOL_LEN];
27
28
29 start = (struct syscall_metadata *)__start_syscalls_metadata;
30 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
32
33 for ( ; start < stop; start++) {
34 /*
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
38 * mismatch.
39 */
40 if (start->name && !strcmp(start->name + 3, str + 3))
41 return start;
42 }
43 return NULL;
44}
45
46static struct syscall_metadata *syscall_nr_to_meta(int nr)
47{
48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
49 return NULL;
50
51 return syscalls_metadata[nr];
52}
53
17enum print_line_t 54enum print_line_t
18print_syscall_enter(struct trace_iterator *iter, int flags) 55print_syscall_enter(struct trace_iterator *iter, int flags)
19{ 56{
@@ -30,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
30 if (!entry) 67 if (!entry)
31 goto end; 68 goto end;
32 69
33 if (entry->enter_id != ent->type) { 70 if (entry->enter_event->id != ent->type) {
34 WARN_ON_ONCE(1); 71 WARN_ON_ONCE(1);
35 goto end; 72 goto end;
36 } 73 }
@@ -85,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
85 return TRACE_TYPE_HANDLED; 122 return TRACE_TYPE_HANDLED;
86 } 123 }
87 124
88 if (entry->exit_id != ent->type) { 125 if (entry->exit_event->id != ent->type) {
89 WARN_ON_ONCE(1); 126 WARN_ON_ONCE(1);
90 return TRACE_TYPE_UNHANDLED; 127 return TRACE_TYPE_UNHANDLED;
91 } 128 }
@@ -103,24 +140,19 @@ extern char *__bad_type_size(void);
103#define SYSCALL_FIELD(type, name) \ 140#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \ 141 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \ 142 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 143 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type)
107 145
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
109{ 147{
110 int i; 148 int i;
111 int nr;
112 int ret; 149 int ret;
113 struct syscall_metadata *entry; 150 struct syscall_metadata *entry = call->data;
114 struct syscall_trace_enter trace; 151 struct syscall_trace_enter trace;
115 int offset = offsetof(struct syscall_trace_enter, args); 152 int offset = offsetof(struct syscall_trace_enter, args);
116 153
117 nr = syscall_name_to_nr(call->data); 154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
118 entry = syscall_nr_to_meta(nr); 155 "\tsigned:%u;\n",
119
120 if (!entry)
121 return 0;
122
123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
124 SYSCALL_FIELD(int, nr)); 156 SYSCALL_FIELD(int, nr));
125 if (!ret) 157 if (!ret)
126 return 0; 158 return 0;
@@ -130,8 +162,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
130 entry->args[i]); 162 entry->args[i]);
131 if (!ret) 163 if (!ret)
132 return 0; 164 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, 165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
134 sizeof(unsigned long)); 166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
135 if (!ret) 169 if (!ret)
136 return 0; 170 return 0;
137 offset += sizeof(unsigned long); 171 offset += sizeof(unsigned long);
@@ -163,10 +197,12 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
163 struct syscall_trace_exit trace; 197 struct syscall_trace_exit trace;
164 198
165 ret = trace_seq_printf(s, 199 ret = trace_seq_printf(s,
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
168 SYSCALL_FIELD(int, nr), 204 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(unsigned long, ret)); 205 SYSCALL_FIELD(long, ret));
170 if (!ret) 206 if (!ret)
171 return 0; 207 return 0;
172 208
@@ -176,19 +212,12 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
176int syscall_enter_define_fields(struct ftrace_event_call *call) 212int syscall_enter_define_fields(struct ftrace_event_call *call)
177{ 213{
178 struct syscall_trace_enter trace; 214 struct syscall_trace_enter trace;
179 struct syscall_metadata *meta; 215 struct syscall_metadata *meta = call->data;
180 int ret; 216 int ret;
181 int nr;
182 int i; 217 int i;
183 int offset = offsetof(typeof(trace), args); 218 int offset = offsetof(typeof(trace), args);
184 219
185 nr = syscall_name_to_nr(call->data); 220 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
186 meta = syscall_nr_to_meta(nr);
187
188 if (!meta)
189 return 0;
190
191 ret = trace_define_common_fields(call);
192 if (ret) 221 if (ret)
193 return ret; 222 return ret;
194 223
@@ -208,11 +237,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
208 struct syscall_trace_exit trace; 237 struct syscall_trace_exit trace;
209 int ret; 238 int ret;
210 239
211 ret = trace_define_common_fields(call); 240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
212 if (ret) 241 if (ret)
213 return ret; 242 return ret;
214 243
215 ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0, 244 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
216 FILTER_OTHER); 245 FILTER_OTHER);
217 246
218 return ret; 247 return ret;
@@ -239,8 +268,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
239 268
240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 269 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
241 270
242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 271 event = trace_current_buffer_lock_reserve(&buffer,
243 size, 0, 0); 272 sys_data->enter_event->id, size, 0, 0);
244 if (!event) 273 if (!event)
245 return; 274 return;
246 275
@@ -271,8 +300,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
271 if (!sys_data) 300 if (!sys_data)
272 return; 301 return;
273 302
274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 303 event = trace_current_buffer_lock_reserve(&buffer,
275 sizeof(*entry), 0, 0); 304 sys_data->exit_event->id, sizeof(*entry), 0, 0);
276 if (!event) 305 if (!event)
277 return; 306 return;
278 307
@@ -285,23 +314,18 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
285 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 314 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
286} 315}
287 316
288int reg_event_syscall_enter(void *ptr) 317int reg_event_syscall_enter(struct ftrace_event_call *call)
289{ 318{
290 int ret = 0; 319 int ret = 0;
291 int num; 320 int num;
292 char *name;
293 321
294 name = (char *)ptr; 322 num = ((struct syscall_metadata *)call->data)->syscall_nr;
295 num = syscall_name_to_nr(name);
296 if (num < 0 || num >= NR_syscalls) 323 if (num < 0 || num >= NR_syscalls)
297 return -ENOSYS; 324 return -ENOSYS;
298 mutex_lock(&syscall_trace_lock); 325 mutex_lock(&syscall_trace_lock);
299 if (!sys_refcount_enter) 326 if (!sys_refcount_enter)
300 ret = register_trace_sys_enter(ftrace_syscall_enter); 327 ret = register_trace_sys_enter(ftrace_syscall_enter);
301 if (ret) { 328 if (!ret) {
302 pr_info("event trace: Could not activate"
303 "syscall entry trace point");
304 } else {
305 set_bit(num, enabled_enter_syscalls); 329 set_bit(num, enabled_enter_syscalls);
306 sys_refcount_enter++; 330 sys_refcount_enter++;
307 } 331 }
@@ -309,13 +333,11 @@ int reg_event_syscall_enter(void *ptr)
309 return ret; 333 return ret;
310} 334}
311 335
312void unreg_event_syscall_enter(void *ptr) 336void unreg_event_syscall_enter(struct ftrace_event_call *call)
313{ 337{
314 int num; 338 int num;
315 char *name;
316 339
317 name = (char *)ptr; 340 num = ((struct syscall_metadata *)call->data)->syscall_nr;
318 num = syscall_name_to_nr(name);
319 if (num < 0 || num >= NR_syscalls) 341 if (num < 0 || num >= NR_syscalls)
320 return; 342 return;
321 mutex_lock(&syscall_trace_lock); 343 mutex_lock(&syscall_trace_lock);
@@ -326,23 +348,18 @@ void unreg_event_syscall_enter(void *ptr)
326 mutex_unlock(&syscall_trace_lock); 348 mutex_unlock(&syscall_trace_lock);
327} 349}
328 350
329int reg_event_syscall_exit(void *ptr) 351int reg_event_syscall_exit(struct ftrace_event_call *call)
330{ 352{
331 int ret = 0; 353 int ret = 0;
332 int num; 354 int num;
333 char *name;
334 355
335 name = (char *)ptr; 356 num = ((struct syscall_metadata *)call->data)->syscall_nr;
336 num = syscall_name_to_nr(name);
337 if (num < 0 || num >= NR_syscalls) 357 if (num < 0 || num >= NR_syscalls)
338 return -ENOSYS; 358 return -ENOSYS;
339 mutex_lock(&syscall_trace_lock); 359 mutex_lock(&syscall_trace_lock);
340 if (!sys_refcount_exit) 360 if (!sys_refcount_exit)
341 ret = register_trace_sys_exit(ftrace_syscall_exit); 361 ret = register_trace_sys_exit(ftrace_syscall_exit);
342 if (ret) { 362 if (!ret) {
343 pr_info("event trace: Could not activate"
344 "syscall exit trace point");
345 } else {
346 set_bit(num, enabled_exit_syscalls); 363 set_bit(num, enabled_exit_syscalls);
347 sys_refcount_exit++; 364 sys_refcount_exit++;
348 } 365 }
@@ -350,13 +367,11 @@ int reg_event_syscall_exit(void *ptr)
350 return ret; 367 return ret;
351} 368}
352 369
353void unreg_event_syscall_exit(void *ptr) 370void unreg_event_syscall_exit(struct ftrace_event_call *call)
354{ 371{
355 int num; 372 int num;
356 char *name;
357 373
358 name = (char *)ptr; 374 num = ((struct syscall_metadata *)call->data)->syscall_nr;
359 num = syscall_name_to_nr(name);
360 if (num < 0 || num >= NR_syscalls) 375 if (num < 0 || num >= NR_syscalls)
361 return; 376 return;
362 mutex_lock(&syscall_trace_lock); 377 mutex_lock(&syscall_trace_lock);
@@ -367,13 +382,44 @@ void unreg_event_syscall_exit(void *ptr)
367 mutex_unlock(&syscall_trace_lock); 382 mutex_unlock(&syscall_trace_lock);
368} 383}
369 384
370struct trace_event event_syscall_enter = { 385int init_syscall_trace(struct ftrace_event_call *call)
371 .trace = print_syscall_enter, 386{
372}; 387 int id;
388
389 id = register_ftrace_event(call->event);
390 if (!id)
391 return -ENODEV;
392 call->id = id;
393 INIT_LIST_HEAD(&call->fields);
394 return 0;
395}
396
397int __init init_ftrace_syscalls(void)
398{
399 struct syscall_metadata *meta;
400 unsigned long addr;
401 int i;
402
403 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
404 NR_syscalls, GFP_KERNEL);
405 if (!syscalls_metadata) {
406 WARN_ON(1);
407 return -ENOMEM;
408 }
409
410 for (i = 0; i < NR_syscalls; i++) {
411 addr = arch_syscall_addr(i);
412 meta = find_syscall_meta(addr);
413 if (!meta)
414 continue;
415
416 meta->syscall_nr = i;
417 syscalls_metadata[i] = meta;
418 }
373 419
374struct trace_event event_syscall_exit = { 420 return 0;
375 .trace = print_syscall_exit, 421}
376}; 422core_initcall(init_ftrace_syscalls);
377 423
378#ifdef CONFIG_EVENT_PROFILE 424#ifdef CONFIG_EVENT_PROFILE
379 425
@@ -387,8 +433,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
387 struct syscall_metadata *sys_data; 433 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec; 434 struct syscall_trace_enter *rec;
389 unsigned long flags; 435 unsigned long flags;
436 char *trace_buf;
390 char *raw_data; 437 char *raw_data;
391 int syscall_nr; 438 int syscall_nr;
439 int rctx;
392 int size; 440 int size;
393 int cpu; 441 int cpu;
394 442
@@ -412,41 +460,42 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
412 /* Protect the per cpu buffer, begin the rcu read side */ 460 /* Protect the per cpu buffer, begin the rcu read side */
413 local_irq_save(flags); 461 local_irq_save(flags);
414 462
463 rctx = perf_swevent_get_recursion_context();
464 if (rctx < 0)
465 goto end_recursion;
466
415 cpu = smp_processor_id(); 467 cpu = smp_processor_id();
416 468
417 if (in_nmi()) 469 trace_buf = rcu_dereference(perf_trace_buf);
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421 470
422 if (!raw_data) 471 if (!trace_buf)
423 goto end; 472 goto end;
424 473
425 raw_data = per_cpu_ptr(raw_data, cpu); 474 raw_data = per_cpu_ptr(trace_buf, cpu);
426 475
427 /* zero the dead bytes from align to not leak stack to user */ 476 /* zero the dead bytes from align to not leak stack to user */
428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 477 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
429 478
430 rec = (struct syscall_trace_enter *) raw_data; 479 rec = (struct syscall_trace_enter *) raw_data;
431 tracing_generic_entry_update(&rec->ent, 0, 0); 480 tracing_generic_entry_update(&rec->ent, 0, 0);
432 rec->ent.type = sys_data->enter_id; 481 rec->ent.type = sys_data->enter_event->id;
433 rec->nr = syscall_nr; 482 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 483 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args); 484 (unsigned long *)&rec->args);
436 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 485 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
437 486
438end: 487end:
488 perf_swevent_put_recursion_context(rctx);
489end_recursion:
439 local_irq_restore(flags); 490 local_irq_restore(flags);
440} 491}
441 492
442int reg_prof_syscall_enter(char *name) 493int prof_sysenter_enable(struct ftrace_event_call *call)
443{ 494{
444 int ret = 0; 495 int ret = 0;
445 int num; 496 int num;
446 497
447 num = syscall_name_to_nr(name); 498 num = ((struct syscall_metadata *)call->data)->syscall_nr;
448 if (num < 0 || num >= NR_syscalls)
449 return -ENOSYS;
450 499
451 mutex_lock(&syscall_trace_lock); 500 mutex_lock(&syscall_trace_lock);
452 if (!sys_prof_refcount_enter) 501 if (!sys_prof_refcount_enter)
@@ -462,13 +511,11 @@ int reg_prof_syscall_enter(char *name)
462 return ret; 511 return ret;
463} 512}
464 513
465void unreg_prof_syscall_enter(char *name) 514void prof_sysenter_disable(struct ftrace_event_call *call)
466{ 515{
467 int num; 516 int num;
468 517
469 num = syscall_name_to_nr(name); 518 num = ((struct syscall_metadata *)call->data)->syscall_nr;
470 if (num < 0 || num >= NR_syscalls)
471 return;
472 519
473 mutex_lock(&syscall_trace_lock); 520 mutex_lock(&syscall_trace_lock);
474 sys_prof_refcount_enter--; 521 sys_prof_refcount_enter--;
@@ -484,7 +531,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
484 struct syscall_trace_exit *rec; 531 struct syscall_trace_exit *rec;
485 unsigned long flags; 532 unsigned long flags;
486 int syscall_nr; 533 int syscall_nr;
534 char *trace_buf;
487 char *raw_data; 535 char *raw_data;
536 int rctx;
488 int size; 537 int size;
489 int cpu; 538 int cpu;
490 539
@@ -510,17 +559,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
510 559
511 /* Protect the per cpu buffer, begin the rcu read side */ 560 /* Protect the per cpu buffer, begin the rcu read side */
512 local_irq_save(flags); 561 local_irq_save(flags);
562
563 rctx = perf_swevent_get_recursion_context();
564 if (rctx < 0)
565 goto end_recursion;
566
513 cpu = smp_processor_id(); 567 cpu = smp_processor_id();
514 568
515 if (in_nmi()) 569 trace_buf = rcu_dereference(perf_trace_buf);
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519 570
520 if (!raw_data) 571 if (!trace_buf)
521 goto end; 572 goto end;
522 573
523 raw_data = per_cpu_ptr(raw_data, cpu); 574 raw_data = per_cpu_ptr(trace_buf, cpu);
524 575
525 /* zero the dead bytes from align to not leak stack to user */ 576 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 577 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -528,24 +579,24 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
528 rec = (struct syscall_trace_exit *)raw_data; 579 rec = (struct syscall_trace_exit *)raw_data;
529 580
530 tracing_generic_entry_update(&rec->ent, 0, 0); 581 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id; 582 rec->ent.type = sys_data->exit_event->id;
532 rec->nr = syscall_nr; 583 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs); 584 rec->ret = syscall_get_return_value(current, regs);
534 585
535 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 586 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
536 587
537end: 588end:
589 perf_swevent_put_recursion_context(rctx);
590end_recursion:
538 local_irq_restore(flags); 591 local_irq_restore(flags);
539} 592}
540 593
541int reg_prof_syscall_exit(char *name) 594int prof_sysexit_enable(struct ftrace_event_call *call)
542{ 595{
543 int ret = 0; 596 int ret = 0;
544 int num; 597 int num;
545 598
546 num = syscall_name_to_nr(name); 599 num = ((struct syscall_metadata *)call->data)->syscall_nr;
547 if (num < 0 || num >= NR_syscalls)
548 return -ENOSYS;
549 600
550 mutex_lock(&syscall_trace_lock); 601 mutex_lock(&syscall_trace_lock);
551 if (!sys_prof_refcount_exit) 602 if (!sys_prof_refcount_exit)
@@ -561,13 +612,11 @@ int reg_prof_syscall_exit(char *name)
561 return ret; 612 return ret;
562} 613}
563 614
564void unreg_prof_syscall_exit(char *name) 615void prof_sysexit_disable(struct ftrace_event_call *call)
565{ 616{
566 int num; 617 int num;
567 618
568 num = syscall_name_to_nr(name); 619 num = ((struct syscall_metadata *)call->data)->syscall_nr;
569 if (num < 0 || num >= NR_syscalls)
570 return;
571 620
572 mutex_lock(&syscall_trace_lock); 621 mutex_lock(&syscall_trace_lock);
573 sys_prof_refcount_exit--; 622 sys_prof_refcount_exit--;
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index f6693969287d..a7974a552ca9 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = {
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
95 .address = backtrace_address, 95 .address = backtrace_address,
96 .walk_stack = print_context_stack,
96}; 97};
97 98
98static int 99static int