aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig147
-rw-r--r--kernel/trace/Makefile6
-rw-r--r--kernel/trace/blktrace.c5
-rw-r--r--kernel/trace/ftrace.c510
-rw-r--r--kernel/trace/power-traces.c2
-rw-r--r--kernel/trace/ring_buffer.c98
-rw-r--r--kernel/trace/ring_buffer_benchmark.c86
-rw-r--r--kernel/trace/trace.c463
-rw-r--r--kernel/trace/trace.h113
-rw-r--r--kernel/trace/trace_branch.c19
-rw-r--r--kernel/trace/trace_clock.c16
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_event_profile.c91
-rw-r--r--kernel/trace/trace_events.c285
-rw-r--r--kernel/trace/trace_events_filter.c438
-rw-r--r--kernel/trace/trace_export.c113
-rw-r--r--kernel/trace/trace_functions_graph.c241
-rw-r--r--kernel/trace/trace_hw_branches.c51
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c1487
-rw-r--r--kernel/trace/trace_ksym.c519
-rw-r--r--kernel/trace/trace_output.c75
-rw-r--r--kernel/trace/trace_sched_wakeup.c16
-rw-r--r--kernel/trace/trace_selftest.c59
-rw-r--r--kernel/trace/trace_stack.c40
-rw-r--r--kernel/trace/trace_syscalls.c340
-rw-r--r--kernel/trace/trace_sysprof.c1
27 files changed, 4046 insertions, 1193 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..13e13d428cd3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,39 +12,37 @@ config NOP_TRACER
12config HAVE_FTRACE_NMI_ENTER 12config HAVE_FTRACE_NMI_ENTER
13 bool 13 bool
14 help 14 help
15 See Documentation/trace/ftrace-implementation.txt 15 See Documentation/trace/ftrace-design.txt
16 16
17config HAVE_FUNCTION_TRACER 17config HAVE_FUNCTION_TRACER
18 bool 18 bool
19 help 19 help
20 See Documentation/trace/ftrace-implementation.txt 20 See Documentation/trace/ftrace-design.txt
21 21
22config HAVE_FUNCTION_GRAPH_TRACER 22config HAVE_FUNCTION_GRAPH_TRACER
23 bool 23 bool
24 help 24 help
25 See Documentation/trace/ftrace-implementation.txt 25 See Documentation/trace/ftrace-design.txt
26 26
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
36 help 34 help
37 See Documentation/trace/ftrace-implementation.txt 35 See Documentation/trace/ftrace-design.txt
38 36
39config HAVE_DYNAMIC_FTRACE 37config HAVE_DYNAMIC_FTRACE
40 bool 38 bool
41 help 39 help
42 See Documentation/trace/ftrace-implementation.txt 40 See Documentation/trace/ftrace-design.txt
43 41
44config HAVE_FTRACE_MCOUNT_RECORD 42config HAVE_FTRACE_MCOUNT_RECORD
45 bool 43 bool
46 help 44 help
47 See Documentation/trace/ftrace-implementation.txt 45 See Documentation/trace/ftrace-design.txt
48 46
49config HAVE_HW_BRANCH_TRACER 47config HAVE_HW_BRANCH_TRACER
50 bool 48 bool
@@ -52,7 +50,7 @@ config HAVE_HW_BRANCH_TRACER
52config HAVE_SYSCALL_TRACEPOINTS 50config HAVE_SYSCALL_TRACEPOINTS
53 bool 51 bool
54 help 52 help
55 See Documentation/trace/ftrace-implementation.txt 53 See Documentation/trace/ftrace-design.txt
56 54
57config TRACER_MAX_TRACE 55config TRACER_MAX_TRACE
58 bool 56 bool
@@ -83,7 +81,7 @@ config RING_BUFFER_ALLOW_SWAP
83# This allows those options to appear when no other tracer is selected. But the 81# This allows those options to appear when no other tracer is selected. But the
84# options do not appear when something else selects it. We need the two options 82# options do not appear when something else selects it. We need the two options
85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 83# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
86# hidding of the automatic options. 84# hiding of the automatic options.
87 85
88config TRACING 86config TRACING
89 bool 87 bool
@@ -119,7 +117,7 @@ menuconfig FTRACE
119 bool "Tracers" 117 bool "Tracers"
120 default y if DEBUG_KERNEL 118 default y if DEBUG_KERNEL
121 help 119 help
122 Enable the kernel tracing infrastructure. 120 Enable the kernel tracing infrastructure.
123 121
124if FTRACE 122if FTRACE
125 123
@@ -133,7 +131,7 @@ config FUNCTION_TRACER
133 help 131 help
134 Enable the kernel to trace every kernel function. This is done 132 Enable the kernel to trace every kernel function. This is done
135 by using a compiler feature to insert a small, 5-byte No-Operation 133 by using a compiler feature to insert a small, 5-byte No-Operation
136 instruction to the beginning of every kernel function, which NOP 134 instruction at the beginning of every kernel function, which NOP
137 sequence is then dynamically patched into a tracer call when 135 sequence is then dynamically patched into a tracer call when
138 tracing is enabled by the administrator. If it's runtime disabled 136 tracing is enabled by the administrator. If it's runtime disabled
139 (the bootup default), then the overhead of the instructions is very 137 (the bootup default), then the overhead of the instructions is very
@@ -150,7 +148,7 @@ config FUNCTION_GRAPH_TRACER
150 and its entry. 148 and its entry.
151 Its first purpose is to trace the duration of functions and 149 Its first purpose is to trace the duration of functions and
152 draw a call graph for each thread with some information like 150 draw a call graph for each thread with some information like
153 the return value. This is done by setting the current return 151 the return value. This is done by setting the current return
154 address on the current task structure into a stack of calls. 152 address on the current task structure into a stack of calls.
155 153
156 154
@@ -173,7 +171,7 @@ config IRQSOFF_TRACER
173 171
174 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 172 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
175 173
176 (Note that kernel size and overhead increases with this option 174 (Note that kernel size and overhead increase with this option
177 enabled. This option and the preempt-off timing option can be 175 enabled. This option and the preempt-off timing option can be
178 used together or separately.) 176 used together or separately.)
179 177
@@ -186,7 +184,7 @@ config PREEMPT_TRACER
186 select TRACER_MAX_TRACE 184 select TRACER_MAX_TRACE
187 select RING_BUFFER_ALLOW_SWAP 185 select RING_BUFFER_ALLOW_SWAP
188 help 186 help
189 This option measures the time spent in preemption off critical 187 This option measures the time spent in preemption-off critical
190 sections, with microsecond accuracy. 188 sections, with microsecond accuracy.
191 189
192 The default measurement method is a maximum search, which is 190 The default measurement method is a maximum search, which is
@@ -195,7 +193,7 @@ config PREEMPT_TRACER
195 193
196 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 194 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
197 195
198 (Note that kernel size and overhead increases with this option 196 (Note that kernel size and overhead increase with this option
199 enabled. This option and the irqs-off timing option can be 197 enabled. This option and the irqs-off timing option can be
200 used together or separately.) 198 used together or separately.)
201 199
@@ -222,7 +220,7 @@ config ENABLE_DEFAULT_TRACERS
222 depends on !GENERIC_TRACER 220 depends on !GENERIC_TRACER
223 select TRACING 221 select TRACING
224 help 222 help
225 This tracer hooks to various trace points in the kernel 223 This tracer hooks to various trace points in the kernel,
226 allowing the user to pick and choose which trace point they 224 allowing the user to pick and choose which trace point they
227 want to trace. It also includes the sched_switch tracer plugin. 225 want to trace. It also includes the sched_switch tracer plugin.
228 226
@@ -265,19 +263,19 @@ choice
265 The likely/unlikely profiler only looks at the conditions that 263 The likely/unlikely profiler only looks at the conditions that
266 are annotated with a likely or unlikely macro. 264 are annotated with a likely or unlikely macro.
267 265
268 The "all branch" profiler will profile every if statement in the 266 The "all branch" profiler will profile every if-statement in the
269 kernel. This profiler will also enable the likely/unlikely 267 kernel. This profiler will also enable the likely/unlikely
270 profiler as well. 268 profiler.
271 269
272 Either of the above profilers add a bit of overhead to the system. 270 Either of the above profilers adds a bit of overhead to the system.
273 If unsure choose "No branch profiling". 271 If unsure, choose "No branch profiling".
274 272
275config BRANCH_PROFILE_NONE 273config BRANCH_PROFILE_NONE
276 bool "No branch profiling" 274 bool "No branch profiling"
277 help 275 help
278 No branch profiling. Branch profiling adds a bit of overhead. 276 No branch profiling. Branch profiling adds a bit of overhead.
279 Only enable it if you want to analyse the branching behavior. 277 Only enable it if you want to analyse the branching behavior.
280 Otherwise keep it disabled. 278 Otherwise keep it disabled.
281 279
282config PROFILE_ANNOTATED_BRANCHES 280config PROFILE_ANNOTATED_BRANCHES
283 bool "Trace likely/unlikely profiler" 281 bool "Trace likely/unlikely profiler"
@@ -288,7 +286,7 @@ config PROFILE_ANNOTATED_BRANCHES
288 286
289 /sys/kernel/debug/tracing/profile_annotated_branch 287 /sys/kernel/debug/tracing/profile_annotated_branch
290 288
291 Note: this will add a significant overhead, only turn this 289 Note: this will add a significant overhead; only turn this
292 on if you need to profile the system's use of these macros. 290 on if you need to profile the system's use of these macros.
293 291
294config PROFILE_ALL_BRANCHES 292config PROFILE_ALL_BRANCHES
@@ -305,7 +303,7 @@ config PROFILE_ALL_BRANCHES
305 303
306 This configuration, when enabled, will impose a great overhead 304 This configuration, when enabled, will impose a great overhead
307 on the system. This should only be enabled when the system 305 on the system. This should only be enabled when the system
308 is to be analyzed 306 is to be analyzed in much detail.
309endchoice 307endchoice
310 308
311config TRACING_BRANCHES 309config TRACING_BRANCHES
@@ -330,15 +328,27 @@ config BRANCH_TRACER
330 328
331 Say N if unsure. 329 Say N if unsure.
332 330
333config POWER_TRACER 331config KSYM_TRACER
334 bool "Trace power consumption behavior" 332 bool "Trace read and write access on kernel memory locations"
335 depends on X86 333 depends on HAVE_HW_BREAKPOINT
336 select GENERIC_TRACER 334 select TRACING
335 help
336 This tracer helps find read and write operations on any given kernel
337 symbol i.e. /proc/kallsyms.
338
339config PROFILE_KSYM_TRACER
340 bool "Profile all kernel memory accesses on 'watched' variables"
341 depends on KSYM_TRACER
337 help 342 help
338 This tracer helps developers to analyze and optimize the kernels 343 This tracer profiles kernel accesses on variables watched through the
339 power management decisions, specifically the C-state and P-state 344 ksym tracer ftrace plugin. Depending upon the hardware, all read
340 behavior. 345 and write operations on kernel variables can be monitored for
346 accesses.
347
348 The results will be displayed in:
349 /debugfs/tracing/profile_ksym
341 350
351 Say N if unsure.
342 352
343config STACK_TRACER 353config STACK_TRACER
344 bool "Trace max stack" 354 bool "Trace max stack"
@@ -370,14 +380,14 @@ config HW_BRANCH_TRACER
370 select GENERIC_TRACER 380 select GENERIC_TRACER
371 help 381 help
372 This tracer records all branches on the system in a circular 382 This tracer records all branches on the system in a circular
373 buffer giving access to the last N branches for each cpu. 383 buffer, giving access to the last N branches for each cpu.
374 384
375config KMEMTRACE 385config KMEMTRACE
376 bool "Trace SLAB allocations" 386 bool "Trace SLAB allocations"
377 select GENERIC_TRACER 387 select GENERIC_TRACER
378 help 388 help
379 kmemtrace provides tracing for slab allocator functions, such as 389 kmemtrace provides tracing for slab allocator functions, such as
380 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected 390 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
381 data is then fed to the userspace application in order to analyse 391 data is then fed to the userspace application in order to analyse
382 allocation hotspots, internal fragmentation and so on, making it 392 allocation hotspots, internal fragmentation and so on, making it
383 possible to see how well an allocator performs, as well as debug 393 possible to see how well an allocator performs, as well as debug
@@ -396,15 +406,15 @@ config WORKQUEUE_TRACER
396 bool "Trace workqueues" 406 bool "Trace workqueues"
397 select GENERIC_TRACER 407 select GENERIC_TRACER
398 help 408 help
399 The workqueue tracer provides some statistical informations 409 The workqueue tracer provides some statistical information
400 about each cpu workqueue thread such as the number of the 410 about each cpu workqueue thread such as the number of the
401 works inserted and executed since their creation. It can help 411 works inserted and executed since their creation. It can help
402 to evaluate the amount of work each of them have to perform. 412 to evaluate the amount of work each of them has to perform.
403 For example it can help a developer to decide whether he should 413 For example it can help a developer to decide whether he should
404 choose a per cpu workqueue instead of a singlethreaded one. 414 choose a per-cpu workqueue instead of a singlethreaded one.
405 415
406config BLK_DEV_IO_TRACE 416config BLK_DEV_IO_TRACE
407 bool "Support for tracing block io actions" 417 bool "Support for tracing block IO actions"
408 depends on SYSFS 418 depends on SYSFS
409 depends on BLOCK 419 depends on BLOCK
410 select RELAY 420 select RELAY
@@ -428,38 +438,55 @@ config BLK_DEV_IO_TRACE
428 438
429 If unsure, say N. 439 If unsure, say N.
430 440
441config KPROBE_EVENT
442 depends on KPROBES
443 depends on HAVE_REGS_AND_STACK_ACCESS_API
444 bool "Enable kprobes-based dynamic events"
445 select TRACING
446 default y
447 help
448 This allows the user to add tracing events (similar to tracepoints)
449 on the fly via the ftrace interface. See
450 Documentation/trace/kprobetrace.txt for more details.
451
452 Those events can be inserted wherever kprobes can probe, and record
453 various register and memory values.
454
455 This option is also required by perf-probe subcommand of perf tools.
456 If you want to use perf tools, this option is strongly recommended.
457
431config DYNAMIC_FTRACE 458config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 459 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 460 depends on FUNCTION_TRACER
434 depends on HAVE_DYNAMIC_FTRACE 461 depends on HAVE_DYNAMIC_FTRACE
435 default y 462 default y
436 help 463 help
437 This option will modify all the calls to ftrace dynamically 464 This option will modify all the calls to ftrace dynamically
438 (will patch them out of the binary image and replaces them 465 (will patch them out of the binary image and replace them
439 with a No-Op instruction) as they are called. A table is 466 with a No-Op instruction) as they are called. A table is
440 created to dynamically enable them again. 467 created to dynamically enable them again.
441 468
442 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise 469 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
443 has native performance as long as no tracing is active. 470 otherwise has native performance as long as no tracing is active.
444 471
445 The changes to the code are done by a kernel thread that 472 The changes to the code are done by a kernel thread that
446 wakes up once a second and checks to see if any ftrace calls 473 wakes up once a second and checks to see if any ftrace calls
447 were made. If so, it runs stop_machine (stops all CPUS) 474 were made. If so, it runs stop_machine (stops all CPUS)
448 and modifies the code to jump over the call to ftrace. 475 and modifies the code to jump over the call to ftrace.
449 476
450config FUNCTION_PROFILER 477config FUNCTION_PROFILER
451 bool "Kernel function profiler" 478 bool "Kernel function profiler"
452 depends on FUNCTION_TRACER 479 depends on FUNCTION_TRACER
453 default n 480 default n
454 help 481 help
455 This option enables the kernel function profiler. A file is created 482 This option enables the kernel function profiler. A file is created
456 in debugfs called function_profile_enabled which defaults to zero. 483 in debugfs called function_profile_enabled which defaults to zero.
457 When a 1 is echoed into this file profiling begins, and when a 484 When a 1 is echoed into this file profiling begins, and when a
458 zero is entered, profiling stops. A file in the trace_stats 485 zero is entered, profiling stops. A "functions" file is created in
459 directory called functions, that show the list of functions that 486 the trace_stats directory; this file shows the list of functions that
460 have been hit and their counters. 487 have been hit and their counters.
461 488
462 If in doubt, say N 489 If in doubt, say N.
463 490
464config FTRACE_MCOUNT_RECORD 491config FTRACE_MCOUNT_RECORD
465 def_bool y 492 def_bool y
@@ -518,8 +545,8 @@ config RING_BUFFER_BENCHMARK
518 tristate "Ring buffer benchmark stress tester" 545 tristate "Ring buffer benchmark stress tester"
519 depends on RING_BUFFER 546 depends on RING_BUFFER
520 help 547 help
521 This option creates a test to stress the ring buffer and bench mark it. 548 This option creates a test to stress the ring buffer and benchmark it.
522 It creates its own ring buffer such that it will not interfer with 549 It creates its own ring buffer such that it will not interfere with
523 any other users of the ring buffer (such as ftrace). It then creates 550 any other users of the ring buffer (such as ftrace). It then creates
524 a producer and consumer that will run for 10 seconds and sleep for 551 a producer and consumer that will run for 10 seconds and sleep for
525 10 seconds. Each interval it will print out the number of events 552 10 seconds. Each interval it will print out the number of events
@@ -528,7 +555,7 @@ config RING_BUFFER_BENCHMARK
528 It does not disable interrupts or raise its priority, so it may be 555 It does not disable interrupts or raise its priority, so it may be
529 affected by processes that are running. 556 affected by processes that are running.
530 557
531 If unsure, say N 558 If unsure, say N.
532 559
533endif # FTRACE 560endif # FTRACE
534 561
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..d00c6fe23f54 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,8 +51,12 @@ endif
51obj-$(CONFIG_EVENT_TRACING) += trace_events.o 51obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
56endif
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
59obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 60obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 61
58libftrace-y := ftrace.o 62libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d9d6206e0b14..07f945a99430 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -540,9 +540,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
540 if (ret) 540 if (ret)
541 return ret; 541 return ret;
542 542
543 if (copy_to_user(arg, &buts, sizeof(buts))) 543 if (copy_to_user(arg, &buts, sizeof(buts))) {
544 blk_trace_remove(q);
544 return -EFAULT; 545 return -EFAULT;
545 546 }
546 return 0; 547 return 0;
547} 548}
548EXPORT_SYMBOL_GPL(blk_trace_setup); 549EXPORT_SYMBOL_GPL(blk_trace_setup);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6dc4e5ef7a01..83783579378f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,7 +22,6 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
28#include <linux/ctype.h> 27#include <linux/ctype.h>
@@ -60,6 +59,13 @@ static int last_ftrace_enabled;
60/* Quick disabling of function tracer. */ 59/* Quick disabling of function tracer. */
61int function_trace_stop; 60int function_trace_stop;
62 61
62/* List for set_ftrace_pid's pids. */
63LIST_HEAD(ftrace_pids);
64struct ftrace_pid {
65 struct list_head list;
66 struct pid *pid;
67};
68
63/* 69/*
64 * ftrace_disabled is set when an anomaly is discovered. 70 * ftrace_disabled is set when an anomaly is discovered.
65 * ftrace_disabled is much stronger than ftrace_enabled. 71 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -78,6 +84,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
78ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 84ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
79ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 85ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
80 86
87#ifdef CONFIG_FUNCTION_GRAPH_TRACER
88static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
89#endif
90
81static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 91static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
82{ 92{
83 struct ftrace_ops *op = ftrace_list; 93 struct ftrace_ops *op = ftrace_list;
@@ -155,7 +165,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 else 165 else
156 func = ftrace_list_func; 166 func = ftrace_list_func;
157 167
158 if (ftrace_pid_trace) { 168 if (!list_empty(&ftrace_pids)) {
159 set_ftrace_pid_function(func); 169 set_ftrace_pid_function(func);
160 func = ftrace_pid_func; 170 func = ftrace_pid_func;
161 } 171 }
@@ -203,7 +213,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
203 if (ftrace_list->next == &ftrace_list_end) { 213 if (ftrace_list->next == &ftrace_list_end) {
204 ftrace_func_t func = ftrace_list->func; 214 ftrace_func_t func = ftrace_list->func;
205 215
206 if (ftrace_pid_trace) { 216 if (!list_empty(&ftrace_pids)) {
207 set_ftrace_pid_function(func); 217 set_ftrace_pid_function(func);
208 func = ftrace_pid_func; 218 func = ftrace_pid_func;
209 } 219 }
@@ -231,7 +241,7 @@ static void ftrace_update_pid_func(void)
231 func = __ftrace_trace_function; 241 func = __ftrace_trace_function;
232#endif 242#endif
233 243
234 if (ftrace_pid_trace) { 244 if (!list_empty(&ftrace_pids)) {
235 set_ftrace_pid_function(func); 245 set_ftrace_pid_function(func);
236 func = ftrace_pid_func; 246 func = ftrace_pid_func;
237 } else { 247 } else {
@@ -821,8 +831,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
821} 831}
822#endif /* CONFIG_FUNCTION_PROFILER */ 832#endif /* CONFIG_FUNCTION_PROFILER */
823 833
824/* set when tracing only a pid */
825struct pid *ftrace_pid_trace;
826static struct pid * const ftrace_swapper_pid = &init_struct_pid; 834static struct pid * const ftrace_swapper_pid = &init_struct_pid;
827 835
828#ifdef CONFIG_DYNAMIC_FTRACE 836#ifdef CONFIG_DYNAMIC_FTRACE
@@ -889,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
889 } \ 897 } \
890 } 898 }
891 899
892#ifdef CONFIG_KPROBES
893
894static int frozen_record_count;
895
896static inline void freeze_record(struct dyn_ftrace *rec)
897{
898 if (!(rec->flags & FTRACE_FL_FROZEN)) {
899 rec->flags |= FTRACE_FL_FROZEN;
900 frozen_record_count++;
901 }
902}
903
904static inline void unfreeze_record(struct dyn_ftrace *rec)
905{
906 if (rec->flags & FTRACE_FL_FROZEN) {
907 rec->flags &= ~FTRACE_FL_FROZEN;
908 frozen_record_count--;
909 }
910}
911
912static inline int record_frozen(struct dyn_ftrace *rec)
913{
914 return rec->flags & FTRACE_FL_FROZEN;
915}
916#else
917# define freeze_record(rec) ({ 0; })
918# define unfreeze_record(rec) ({ 0; })
919# define record_frozen(rec) ({ 0; })
920#endif /* CONFIG_KPROBES */
921
922static void ftrace_free_rec(struct dyn_ftrace *rec) 900static void ftrace_free_rec(struct dyn_ftrace *rec)
923{ 901{
924 rec->freelist = ftrace_free_records; 902 rec->freelist = ftrace_free_records;
@@ -1016,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip)
1016} 994}
1017 995
1018 996
997/* Return 1 if the address range is reserved for ftrace */
998int ftrace_text_reserved(void *start, void *end)
999{
1000 struct dyn_ftrace *rec;
1001 struct ftrace_page *pg;
1002
1003 do_for_each_ftrace_rec(pg, rec) {
1004 if (rec->ip <= (unsigned long)end &&
1005 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1006 return 1;
1007 } while_for_each_ftrace_rec();
1008 return 0;
1009}
1010
1011
1019static int 1012static int
1020__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1013__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1021{ 1014{
@@ -1067,14 +1060,6 @@ static void ftrace_replace_code(int enable)
1067 !(rec->flags & FTRACE_FL_CONVERTED)) 1060 !(rec->flags & FTRACE_FL_CONVERTED))
1068 continue; 1061 continue;
1069 1062
1070 /* ignore updates to this record's mcount site */
1071 if (get_kprobe((void *)rec->ip)) {
1072 freeze_record(rec);
1073 continue;
1074 } else {
1075 unfreeze_record(rec);
1076 }
1077
1078 failed = __ftrace_replace_code(rec, enable); 1063 failed = __ftrace_replace_code(rec, enable);
1079 if (failed) { 1064 if (failed) {
1080 rec->flags |= FTRACE_FL_FAILED; 1065 rec->flags |= FTRACE_FL_FAILED;
@@ -1261,12 +1246,34 @@ static int ftrace_update_code(struct module *mod)
1261 ftrace_new_addrs = p->newlist; 1246 ftrace_new_addrs = p->newlist;
1262 p->flags = 0L; 1247 p->flags = 0L;
1263 1248
1264 /* convert record (i.e, patch mcount-call with NOP) */ 1249 /*
1265 if (ftrace_code_disable(mod, p)) { 1250 * Do the initial record convertion from mcount jump
1266 p->flags |= FTRACE_FL_CONVERTED; 1251 * to the NOP instructions.
1267 ftrace_update_cnt++; 1252 */
1268 } else 1253 if (!ftrace_code_disable(mod, p)) {
1269 ftrace_free_rec(p); 1254 ftrace_free_rec(p);
1255 continue;
1256 }
1257
1258 p->flags |= FTRACE_FL_CONVERTED;
1259 ftrace_update_cnt++;
1260
1261 /*
1262 * If the tracing is enabled, go ahead and enable the record.
1263 *
1264 * The reason not to enable the record immediatelly is the
1265 * inherent check of ftrace_make_nop/ftrace_make_call for
1266 * correct previous instructions. Making first the NOP
1267 * conversion puts the module to the correct state, thus
1268 * passing the ftrace_make_call check.
1269 */
1270 if (ftrace_start_up) {
1271 int failed = __ftrace_replace_code(p, 1);
1272 if (failed) {
1273 ftrace_bug(failed, p->ip);
1274 ftrace_free_rec(p);
1275 }
1276 }
1270 } 1277 }
1271 1278
1272 stop = ftrace_now(raw_smp_processor_id()); 1279 stop = ftrace_now(raw_smp_processor_id());
@@ -1656,64 +1663,10 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1656 return ret; 1663 return ret;
1657} 1664}
1658 1665
1659enum {
1660 MATCH_FULL,
1661 MATCH_FRONT_ONLY,
1662 MATCH_MIDDLE_ONLY,
1663 MATCH_END_ONLY,
1664};
1665
1666/*
1667 * (static function - no need for kernel doc)
1668 *
1669 * Pass in a buffer containing a glob and this function will
1670 * set search to point to the search part of the buffer and
1671 * return the type of search it is (see enum above).
1672 * This does modify buff.
1673 *
1674 * Returns enum type.
1675 * search returns the pointer to use for comparison.
1676 * not returns 1 if buff started with a '!'
1677 * 0 otherwise.
1678 */
1679static int
1680ftrace_setup_glob(char *buff, int len, char **search, int *not)
1681{
1682 int type = MATCH_FULL;
1683 int i;
1684
1685 if (buff[0] == '!') {
1686 *not = 1;
1687 buff++;
1688 len--;
1689 } else
1690 *not = 0;
1691
1692 *search = buff;
1693
1694 for (i = 0; i < len; i++) {
1695 if (buff[i] == '*') {
1696 if (!i) {
1697 *search = buff + 1;
1698 type = MATCH_END_ONLY;
1699 } else {
1700 if (type == MATCH_END_ONLY)
1701 type = MATCH_MIDDLE_ONLY;
1702 else
1703 type = MATCH_FRONT_ONLY;
1704 buff[i] = 0;
1705 break;
1706 }
1707 }
1708 }
1709
1710 return type;
1711}
1712
1713static int ftrace_match(char *str, char *regex, int len, int type) 1666static int ftrace_match(char *str, char *regex, int len, int type)
1714{ 1667{
1715 int matched = 0; 1668 int matched = 0;
1716 char *ptr; 1669 int slen;
1717 1670
1718 switch (type) { 1671 switch (type) {
1719 case MATCH_FULL: 1672 case MATCH_FULL:
@@ -1729,8 +1682,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
1729 matched = 1; 1682 matched = 1;
1730 break; 1683 break;
1731 case MATCH_END_ONLY: 1684 case MATCH_END_ONLY:
1732 ptr = strstr(str, regex); 1685 slen = strlen(str);
1733 if (ptr && (ptr[len] == 0)) 1686 if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
1734 matched = 1; 1687 matched = 1;
1735 break; 1688 break;
1736 } 1689 }
@@ -1747,7 +1700,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1747 return ftrace_match(str, regex, len, type); 1700 return ftrace_match(str, regex, len, type);
1748} 1701}
1749 1702
1750static void ftrace_match_records(char *buff, int len, int enable) 1703static int ftrace_match_records(char *buff, int len, int enable)
1751{ 1704{
1752 unsigned int search_len; 1705 unsigned int search_len;
1753 struct ftrace_page *pg; 1706 struct ftrace_page *pg;
@@ -1756,9 +1709,10 @@ static void ftrace_match_records(char *buff, int len, int enable)
1756 char *search; 1709 char *search;
1757 int type; 1710 int type;
1758 int not; 1711 int not;
1712 int found = 0;
1759 1713
1760 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1714 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1761 type = ftrace_setup_glob(buff, len, &search, &not); 1715 type = filter_parse_regex(buff, len, &search, &not);
1762 1716
1763 search_len = strlen(search); 1717 search_len = strlen(search);
1764 1718
@@ -1773,6 +1727,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1773 rec->flags &= ~flag; 1727 rec->flags &= ~flag;
1774 else 1728 else
1775 rec->flags |= flag; 1729 rec->flags |= flag;
1730 found = 1;
1776 } 1731 }
1777 /* 1732 /*
1778 * Only enable filtering if we have a function that 1733 * Only enable filtering if we have a function that
@@ -1782,6 +1737,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
1782 ftrace_filtered = 1; 1737 ftrace_filtered = 1;
1783 } while_for_each_ftrace_rec(); 1738 } while_for_each_ftrace_rec();
1784 mutex_unlock(&ftrace_lock); 1739 mutex_unlock(&ftrace_lock);
1740
1741 return found;
1785} 1742}
1786 1743
1787static int 1744static int
@@ -1803,7 +1760,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1803 return 1; 1760 return 1;
1804} 1761}
1805 1762
1806static void ftrace_match_module_records(char *buff, char *mod, int enable) 1763static int ftrace_match_module_records(char *buff, char *mod, int enable)
1807{ 1764{
1808 unsigned search_len = 0; 1765 unsigned search_len = 0;
1809 struct ftrace_page *pg; 1766 struct ftrace_page *pg;
@@ -1812,6 +1769,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1812 char *search = buff; 1769 char *search = buff;
1813 unsigned long flag; 1770 unsigned long flag;
1814 int not = 0; 1771 int not = 0;
1772 int found = 0;
1815 1773
1816 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1774 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1817 1775
@@ -1826,7 +1784,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1826 } 1784 }
1827 1785
1828 if (strlen(buff)) { 1786 if (strlen(buff)) {
1829 type = ftrace_setup_glob(buff, strlen(buff), &search, &not); 1787 type = filter_parse_regex(buff, strlen(buff), &search, &not);
1830 search_len = strlen(search); 1788 search_len = strlen(search);
1831 } 1789 }
1832 1790
@@ -1842,12 +1800,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1842 rec->flags &= ~flag; 1800 rec->flags &= ~flag;
1843 else 1801 else
1844 rec->flags |= flag; 1802 rec->flags |= flag;
1803 found = 1;
1845 } 1804 }
1846 if (enable && (rec->flags & FTRACE_FL_FILTER)) 1805 if (enable && (rec->flags & FTRACE_FL_FILTER))
1847 ftrace_filtered = 1; 1806 ftrace_filtered = 1;
1848 1807
1849 } while_for_each_ftrace_rec(); 1808 } while_for_each_ftrace_rec();
1850 mutex_unlock(&ftrace_lock); 1809 mutex_unlock(&ftrace_lock);
1810
1811 return found;
1851} 1812}
1852 1813
1853/* 1814/*
@@ -1876,8 +1837,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1876 if (!strlen(mod)) 1837 if (!strlen(mod))
1877 return -EINVAL; 1838 return -EINVAL;
1878 1839
1879 ftrace_match_module_records(func, mod, enable); 1840 if (ftrace_match_module_records(func, mod, enable))
1880 return 0; 1841 return 0;
1842 return -EINVAL;
1881} 1843}
1882 1844
1883static struct ftrace_func_command ftrace_mod_cmd = { 1845static struct ftrace_func_command ftrace_mod_cmd = {
@@ -1991,7 +1953,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1991 int count = 0; 1953 int count = 0;
1992 char *search; 1954 char *search;
1993 1955
1994 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 1956 type = filter_parse_regex(glob, strlen(glob), &search, &not);
1995 len = strlen(search); 1957 len = strlen(search);
1996 1958
1997 /* we do not support '!' for function probes */ 1959 /* we do not support '!' for function probes */
@@ -2068,7 +2030,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2068 else if (glob) { 2030 else if (glob) {
2069 int not; 2031 int not;
2070 2032
2071 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 2033 type = filter_parse_regex(glob, strlen(glob), &search, &not);
2072 len = strlen(search); 2034 len = strlen(search);
2073 2035
2074 /* we do not support '!' for function probes */ 2036 /* we do not support '!' for function probes */
@@ -2174,8 +2136,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2174 func = strsep(&next, ":"); 2136 func = strsep(&next, ":");
2175 2137
2176 if (!next) { 2138 if (!next) {
2177 ftrace_match_records(func, len, enable); 2139 if (ftrace_match_records(func, len, enable))
2178 return 0; 2140 return 0;
2141 return ret;
2179 } 2142 }
2180 2143
2181 /* command found */ 2144 /* command found */
@@ -2221,10 +2184,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2221 !trace_parser_cont(parser)) { 2184 !trace_parser_cont(parser)) {
2222 ret = ftrace_process_regex(parser->buffer, 2185 ret = ftrace_process_regex(parser->buffer,
2223 parser->idx, enable); 2186 parser->idx, enable);
2187 trace_parser_clear(parser);
2224 if (ret) 2188 if (ret)
2225 goto out_unlock; 2189 goto out_unlock;
2226
2227 trace_parser_clear(parser);
2228 } 2190 }
2229 2191
2230 ret = read; 2192 ret = read;
@@ -2312,6 +2274,32 @@ static int __init set_ftrace_filter(char *str)
2312} 2274}
2313__setup("ftrace_filter=", set_ftrace_filter); 2275__setup("ftrace_filter=", set_ftrace_filter);
2314 2276
2277#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2278static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2279static int __init set_graph_function(char *str)
2280{
2281 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
2282 return 1;
2283}
2284__setup("ftrace_graph_filter=", set_graph_function);
2285
2286static void __init set_ftrace_early_graph(char *buf)
2287{
2288 int ret;
2289 char *func;
2290
2291 while (buf) {
2292 func = strsep(&buf, ",");
2293 /* we allow only one expression at a time */
2294 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2295 func);
2296 if (ret)
2297 printk(KERN_DEBUG "ftrace: function %s not "
2298 "traceable\n", func);
2299 }
2300}
2301#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2302
2315static void __init set_ftrace_early_filter(char *buf, int enable) 2303static void __init set_ftrace_early_filter(char *buf, int enable)
2316{ 2304{
2317 char *func; 2305 char *func;
@@ -2328,6 +2316,10 @@ static void __init set_ftrace_early_filters(void)
2328 set_ftrace_early_filter(ftrace_filter_buf, 1); 2316 set_ftrace_early_filter(ftrace_filter_buf, 1);
2329 if (ftrace_notrace_buf[0]) 2317 if (ftrace_notrace_buf[0])
2330 set_ftrace_early_filter(ftrace_notrace_buf, 0); 2318 set_ftrace_early_filter(ftrace_notrace_buf, 0);
2319#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2320 if (ftrace_graph_buf[0])
2321 set_ftrace_early_graph(ftrace_graph_buf);
2322#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2331} 2323}
2332 2324
2333static int 2325static int
@@ -2410,6 +2402,7 @@ static const struct file_operations ftrace_notrace_fops = {
2410static DEFINE_MUTEX(graph_lock); 2402static DEFINE_MUTEX(graph_lock);
2411 2403
2412int ftrace_graph_count; 2404int ftrace_graph_count;
2405int ftrace_graph_filter_enabled;
2413unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2406unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2414 2407
2415static void * 2408static void *
@@ -2432,7 +2425,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
2432 mutex_lock(&graph_lock); 2425 mutex_lock(&graph_lock);
2433 2426
2434 /* Nothing, tell g_show to print all functions are enabled */ 2427 /* Nothing, tell g_show to print all functions are enabled */
2435 if (!ftrace_graph_count && !*pos) 2428 if (!ftrace_graph_filter_enabled && !*pos)
2436 return (void *)1; 2429 return (void *)1;
2437 2430
2438 return __g_next(m, pos); 2431 return __g_next(m, pos);
@@ -2478,6 +2471,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2478 mutex_lock(&graph_lock); 2471 mutex_lock(&graph_lock);
2479 if ((file->f_mode & FMODE_WRITE) && 2472 if ((file->f_mode & FMODE_WRITE) &&
2480 (file->f_flags & O_TRUNC)) { 2473 (file->f_flags & O_TRUNC)) {
2474 ftrace_graph_filter_enabled = 0;
2481 ftrace_graph_count = 0; 2475 ftrace_graph_count = 0;
2482 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2476 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2483 } 2477 }
@@ -2503,7 +2497,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2503 struct dyn_ftrace *rec; 2497 struct dyn_ftrace *rec;
2504 struct ftrace_page *pg; 2498 struct ftrace_page *pg;
2505 int search_len; 2499 int search_len;
2506 int found = 0; 2500 int fail = 1;
2507 int type, not; 2501 int type, not;
2508 char *search; 2502 char *search;
2509 bool exists; 2503 bool exists;
@@ -2513,39 +2507,52 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2513 return -ENODEV; 2507 return -ENODEV;
2514 2508
2515 /* decode regex */ 2509 /* decode regex */
2516 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not); 2510 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2517 if (not) 2511 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2518 return -EINVAL; 2512 return -EBUSY;
2519 2513
2520 search_len = strlen(search); 2514 search_len = strlen(search);
2521 2515
2522 mutex_lock(&ftrace_lock); 2516 mutex_lock(&ftrace_lock);
2523 do_for_each_ftrace_rec(pg, rec) { 2517 do_for_each_ftrace_rec(pg, rec) {
2524 2518
2525 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2526 break;
2527
2528 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2519 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2529 continue; 2520 continue;
2530 2521
2531 if (ftrace_match_record(rec, search, search_len, type)) { 2522 if (ftrace_match_record(rec, search, search_len, type)) {
2532 /* ensure it is not already in the array */ 2523 /* if it is in the array */
2533 exists = false; 2524 exists = false;
2534 for (i = 0; i < *idx; i++) 2525 for (i = 0; i < *idx; i++) {
2535 if (array[i] == rec->ip) { 2526 if (array[i] == rec->ip) {
2536 exists = true; 2527 exists = true;
2537 break; 2528 break;
2538 } 2529 }
2539 if (!exists) { 2530 }
2540 array[(*idx)++] = rec->ip; 2531
2541 found = 1; 2532 if (!not) {
2533 fail = 0;
2534 if (!exists) {
2535 array[(*idx)++] = rec->ip;
2536 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2537 goto out;
2538 }
2539 } else {
2540 if (exists) {
2541 array[i] = array[--(*idx)];
2542 array[*idx] = 0;
2543 fail = 0;
2544 }
2542 } 2545 }
2543 } 2546 }
2544 } while_for_each_ftrace_rec(); 2547 } while_for_each_ftrace_rec();
2545 2548out:
2546 mutex_unlock(&ftrace_lock); 2549 mutex_unlock(&ftrace_lock);
2547 2550
2548 return found ? 0 : -EINVAL; 2551 if (fail)
2552 return -EINVAL;
2553
2554 ftrace_graph_filter_enabled = 1;
2555 return 0;
2549} 2556}
2550 2557
2551static ssize_t 2558static ssize_t
@@ -2555,16 +2562,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2555 struct trace_parser parser; 2562 struct trace_parser parser;
2556 ssize_t read, ret; 2563 ssize_t read, ret;
2557 2564
2558 if (!cnt || cnt < 0) 2565 if (!cnt)
2559 return 0; 2566 return 0;
2560 2567
2561 mutex_lock(&graph_lock); 2568 mutex_lock(&graph_lock);
2562 2569
2563 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2564 ret = -EBUSY;
2565 goto out_unlock;
2566 }
2567
2568 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2570 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2569 ret = -ENOMEM; 2571 ret = -ENOMEM;
2570 goto out_unlock; 2572 goto out_unlock;
@@ -2624,7 +2626,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2624 return 0; 2626 return 0;
2625} 2627}
2626 2628
2627static int ftrace_convert_nops(struct module *mod, 2629static int ftrace_process_locs(struct module *mod,
2628 unsigned long *start, 2630 unsigned long *start,
2629 unsigned long *end) 2631 unsigned long *end)
2630{ 2632{
@@ -2684,7 +2686,7 @@ static void ftrace_init_module(struct module *mod,
2684{ 2686{
2685 if (ftrace_disabled || start == end) 2687 if (ftrace_disabled || start == end)
2686 return; 2688 return;
2687 ftrace_convert_nops(mod, start, end); 2689 ftrace_process_locs(mod, start, end);
2688} 2690}
2689 2691
2690static int ftrace_module_notify(struct notifier_block *self, 2692static int ftrace_module_notify(struct notifier_block *self,
@@ -2745,7 +2747,7 @@ void __init ftrace_init(void)
2745 2747
2746 last_ftrace_enabled = ftrace_enabled = 1; 2748 last_ftrace_enabled = ftrace_enabled = 1;
2747 2749
2748 ret = ftrace_convert_nops(NULL, 2750 ret = ftrace_process_locs(NULL,
2749 __start_mcount_loc, 2751 __start_mcount_loc,
2750 __stop_mcount_loc); 2752 __stop_mcount_loc);
2751 2753
@@ -2778,23 +2780,6 @@ static inline void ftrace_startup_enable(int command) { }
2778# define ftrace_shutdown_sysctl() do { } while (0) 2780# define ftrace_shutdown_sysctl() do { } while (0)
2779#endif /* CONFIG_DYNAMIC_FTRACE */ 2781#endif /* CONFIG_DYNAMIC_FTRACE */
2780 2782
2781static ssize_t
2782ftrace_pid_read(struct file *file, char __user *ubuf,
2783 size_t cnt, loff_t *ppos)
2784{
2785 char buf[64];
2786 int r;
2787
2788 if (ftrace_pid_trace == ftrace_swapper_pid)
2789 r = sprintf(buf, "swapper tasks\n");
2790 else if (ftrace_pid_trace)
2791 r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
2792 else
2793 r = sprintf(buf, "no pid\n");
2794
2795 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2796}
2797
2798static void clear_ftrace_swapper(void) 2783static void clear_ftrace_swapper(void)
2799{ 2784{
2800 struct task_struct *p; 2785 struct task_struct *p;
@@ -2845,14 +2830,12 @@ static void set_ftrace_pid(struct pid *pid)
2845 rcu_read_unlock(); 2830 rcu_read_unlock();
2846} 2831}
2847 2832
2848static void clear_ftrace_pid_task(struct pid **pid) 2833static void clear_ftrace_pid_task(struct pid *pid)
2849{ 2834{
2850 if (*pid == ftrace_swapper_pid) 2835 if (pid == ftrace_swapper_pid)
2851 clear_ftrace_swapper(); 2836 clear_ftrace_swapper();
2852 else 2837 else
2853 clear_ftrace_pid(*pid); 2838 clear_ftrace_pid(pid);
2854
2855 *pid = NULL;
2856} 2839}
2857 2840
2858static void set_ftrace_pid_task(struct pid *pid) 2841static void set_ftrace_pid_task(struct pid *pid)
@@ -2863,74 +2846,184 @@ static void set_ftrace_pid_task(struct pid *pid)
2863 set_ftrace_pid(pid); 2846 set_ftrace_pid(pid);
2864} 2847}
2865 2848
2866static ssize_t 2849static int ftrace_pid_add(int p)
2867ftrace_pid_write(struct file *filp, const char __user *ubuf,
2868 size_t cnt, loff_t *ppos)
2869{ 2850{
2870 struct pid *pid; 2851 struct pid *pid;
2871 char buf[64]; 2852 struct ftrace_pid *fpid;
2872 long val; 2853 int ret = -EINVAL;
2873 int ret;
2874 2854
2875 if (cnt >= sizeof(buf)) 2855 mutex_lock(&ftrace_lock);
2876 return -EINVAL;
2877 2856
2878 if (copy_from_user(&buf, ubuf, cnt)) 2857 if (!p)
2879 return -EFAULT; 2858 pid = ftrace_swapper_pid;
2859 else
2860 pid = find_get_pid(p);
2880 2861
2881 buf[cnt] = 0; 2862 if (!pid)
2863 goto out;
2882 2864
2883 ret = strict_strtol(buf, 10, &val); 2865 ret = 0;
2884 if (ret < 0)
2885 return ret;
2886 2866
2887 mutex_lock(&ftrace_lock); 2867 list_for_each_entry(fpid, &ftrace_pids, list)
2888 if (val < 0) { 2868 if (fpid->pid == pid)
2889 /* disable pid tracing */ 2869 goto out_put;
2890 if (!ftrace_pid_trace)
2891 goto out;
2892 2870
2893 clear_ftrace_pid_task(&ftrace_pid_trace); 2871 ret = -ENOMEM;
2894 2872
2895 } else { 2873 fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
2896 /* swapper task is special */ 2874 if (!fpid)
2897 if (!val) { 2875 goto out_put;
2898 pid = ftrace_swapper_pid;
2899 if (pid == ftrace_pid_trace)
2900 goto out;
2901 } else {
2902 pid = find_get_pid(val);
2903 2876
2904 if (pid == ftrace_pid_trace) { 2877 list_add(&fpid->list, &ftrace_pids);
2905 put_pid(pid); 2878 fpid->pid = pid;
2906 goto out;
2907 }
2908 }
2909 2879
2910 if (ftrace_pid_trace) 2880 set_ftrace_pid_task(pid);
2911 clear_ftrace_pid_task(&ftrace_pid_trace);
2912 2881
2913 if (!pid) 2882 ftrace_update_pid_func();
2914 goto out; 2883 ftrace_startup_enable(0);
2884
2885 mutex_unlock(&ftrace_lock);
2886 return 0;
2887
2888out_put:
2889 if (pid != ftrace_swapper_pid)
2890 put_pid(pid);
2891
2892out:
2893 mutex_unlock(&ftrace_lock);
2894 return ret;
2895}
2896
2897static void ftrace_pid_reset(void)
2898{
2899 struct ftrace_pid *fpid, *safe;
2900
2901 mutex_lock(&ftrace_lock);
2902 list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
2903 struct pid *pid = fpid->pid;
2915 2904
2916 ftrace_pid_trace = pid; 2905 clear_ftrace_pid_task(pid);
2917 2906
2918 set_ftrace_pid_task(ftrace_pid_trace); 2907 list_del(&fpid->list);
2908 kfree(fpid);
2919 } 2909 }
2920 2910
2921 /* update the function call */
2922 ftrace_update_pid_func(); 2911 ftrace_update_pid_func();
2923 ftrace_startup_enable(0); 2912 ftrace_startup_enable(0);
2924 2913
2925 out:
2926 mutex_unlock(&ftrace_lock); 2914 mutex_unlock(&ftrace_lock);
2915}
2927 2916
2928 return cnt; 2917static void *fpid_start(struct seq_file *m, loff_t *pos)
2918{
2919 mutex_lock(&ftrace_lock);
2920
2921 if (list_empty(&ftrace_pids) && (!*pos))
2922 return (void *) 1;
2923
2924 return seq_list_start(&ftrace_pids, *pos);
2925}
2926
2927static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
2928{
2929 if (v == (void *)1)
2930 return NULL;
2931
2932 return seq_list_next(v, &ftrace_pids, pos);
2933}
2934
2935static void fpid_stop(struct seq_file *m, void *p)
2936{
2937 mutex_unlock(&ftrace_lock);
2938}
2939
2940static int fpid_show(struct seq_file *m, void *v)
2941{
2942 const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
2943
2944 if (v == (void *)1) {
2945 seq_printf(m, "no pid\n");
2946 return 0;
2947 }
2948
2949 if (fpid->pid == ftrace_swapper_pid)
2950 seq_printf(m, "swapper tasks\n");
2951 else
2952 seq_printf(m, "%u\n", pid_vnr(fpid->pid));
2953
2954 return 0;
2955}
2956
2957static const struct seq_operations ftrace_pid_sops = {
2958 .start = fpid_start,
2959 .next = fpid_next,
2960 .stop = fpid_stop,
2961 .show = fpid_show,
2962};
2963
2964static int
2965ftrace_pid_open(struct inode *inode, struct file *file)
2966{
2967 int ret = 0;
2968
2969 if ((file->f_mode & FMODE_WRITE) &&
2970 (file->f_flags & O_TRUNC))
2971 ftrace_pid_reset();
2972
2973 if (file->f_mode & FMODE_READ)
2974 ret = seq_open(file, &ftrace_pid_sops);
2975
2976 return ret;
2977}
2978
2979static ssize_t
2980ftrace_pid_write(struct file *filp, const char __user *ubuf,
2981 size_t cnt, loff_t *ppos)
2982{
2983 char buf[64], *tmp;
2984 long val;
2985 int ret;
2986
2987 if (cnt >= sizeof(buf))
2988 return -EINVAL;
2989
2990 if (copy_from_user(&buf, ubuf, cnt))
2991 return -EFAULT;
2992
2993 buf[cnt] = 0;
2994
2995 /*
2996 * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
2997 * to clean the filter quietly.
2998 */
2999 tmp = strstrip(buf);
3000 if (strlen(tmp) == 0)
3001 return 1;
3002
3003 ret = strict_strtol(tmp, 10, &val);
3004 if (ret < 0)
3005 return ret;
3006
3007 ret = ftrace_pid_add(val);
3008
3009 return ret ? ret : cnt;
3010}
3011
3012static int
3013ftrace_pid_release(struct inode *inode, struct file *file)
3014{
3015 if (file->f_mode & FMODE_READ)
3016 seq_release(inode, file);
3017
3018 return 0;
2929} 3019}
2930 3020
2931static const struct file_operations ftrace_pid_fops = { 3021static const struct file_operations ftrace_pid_fops = {
2932 .read = ftrace_pid_read, 3022 .open = ftrace_pid_open,
2933 .write = ftrace_pid_write, 3023 .write = ftrace_pid_write,
3024 .read = seq_read,
3025 .llseek = seq_lseek,
3026 .release = ftrace_pid_release,
2934}; 3027};
2935 3028
2936static __init int ftrace_init_debugfs(void) 3029static __init int ftrace_init_debugfs(void)
@@ -3293,4 +3386,3 @@ void ftrace_graph_stop(void)
3293 ftrace_stop(); 3386 ftrace_stop();
3294} 3387}
3295#endif 3388#endif
3296
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..9f4f565b01e6 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
14#define CREATE_TRACE_POINTS 14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 15#include <trace/events/power.h>
16 16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 17EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20 18
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5dd017fea6f5..0287f9f52f5a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -20,6 +20,7 @@
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22 22
23#include <asm/local.h>
23#include "trace.h" 24#include "trace.h"
24 25
25/* 26/*
@@ -397,18 +398,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
397 int ret; 398 int ret;
398 399
399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" 400 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\n", 401 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp)); 402 (unsigned int)sizeof(field.time_stamp),
403 (unsigned int)is_signed_type(u64));
402 404
403 ret = trace_seq_printf(s, "\tfield: local_t commit;\t" 405 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
404 "offset:%u;\tsize:%u;\n", 406 "offset:%u;\tsize:%u;\tsigned:%u;\n",
405 (unsigned int)offsetof(typeof(field), commit), 407 (unsigned int)offsetof(typeof(field), commit),
406 (unsigned int)sizeof(field.commit)); 408 (unsigned int)sizeof(field.commit),
409 (unsigned int)is_signed_type(long));
407 410
408 ret = trace_seq_printf(s, "\tfield: char data;\t" 411 ret = trace_seq_printf(s, "\tfield: char data;\t"
409 "offset:%u;\tsize:%u;\n", 412 "offset:%u;\tsize:%u;\tsigned:%u;\n",
410 (unsigned int)offsetof(typeof(field), data), 413 (unsigned int)offsetof(typeof(field), data),
411 (unsigned int)BUF_PAGE_SIZE); 414 (unsigned int)BUF_PAGE_SIZE,
415 (unsigned int)is_signed_type(char));
412 416
413 return ret; 417 return ret;
414} 418}
@@ -420,7 +424,7 @@ struct ring_buffer_per_cpu {
420 int cpu; 424 int cpu;
421 struct ring_buffer *buffer; 425 struct ring_buffer *buffer;
422 spinlock_t reader_lock; /* serialize readers */ 426 spinlock_t reader_lock; /* serialize readers */
423 raw_spinlock_t lock; 427 arch_spinlock_t lock;
424 struct lock_class_key lock_key; 428 struct lock_class_key lock_key;
425 struct list_head *pages; 429 struct list_head *pages;
426 struct buffer_page *head_page; /* read from head */ 430 struct buffer_page *head_page; /* read from head */
@@ -461,6 +465,8 @@ struct ring_buffer_iter {
461 struct ring_buffer_per_cpu *cpu_buffer; 465 struct ring_buffer_per_cpu *cpu_buffer;
462 unsigned long head; 466 unsigned long head;
463 struct buffer_page *head_page; 467 struct buffer_page *head_page;
468 struct buffer_page *cache_reader_page;
469 unsigned long cache_read;
464 u64 read_stamp; 470 u64 read_stamp;
465}; 471};
466 472
@@ -995,7 +1001,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
995 cpu_buffer->buffer = buffer; 1001 cpu_buffer->buffer = buffer;
996 spin_lock_init(&cpu_buffer->reader_lock); 1002 spin_lock_init(&cpu_buffer->reader_lock);
997 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1003 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
998 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1004 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
999 1005
1000 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1006 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1001 GFP_KERNEL, cpu_to_node(cpu)); 1007 GFP_KERNEL, cpu_to_node(cpu));
@@ -1190,9 +1196,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1190 struct list_head *p; 1196 struct list_head *p;
1191 unsigned i; 1197 unsigned i;
1192 1198
1193 atomic_inc(&cpu_buffer->record_disabled);
1194 synchronize_sched();
1195
1196 spin_lock_irq(&cpu_buffer->reader_lock); 1199 spin_lock_irq(&cpu_buffer->reader_lock);
1197 rb_head_page_deactivate(cpu_buffer); 1200 rb_head_page_deactivate(cpu_buffer);
1198 1201
@@ -1208,12 +1211,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1208 return; 1211 return;
1209 1212
1210 rb_reset_cpu(cpu_buffer); 1213 rb_reset_cpu(cpu_buffer);
1211 spin_unlock_irq(&cpu_buffer->reader_lock);
1212
1213 rb_check_pages(cpu_buffer); 1214 rb_check_pages(cpu_buffer);
1214 1215
1215 atomic_dec(&cpu_buffer->record_disabled); 1216 spin_unlock_irq(&cpu_buffer->reader_lock);
1216
1217} 1217}
1218 1218
1219static void 1219static void
@@ -1224,9 +1224,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1224 struct list_head *p; 1224 struct list_head *p;
1225 unsigned i; 1225 unsigned i;
1226 1226
1227 atomic_inc(&cpu_buffer->record_disabled);
1228 synchronize_sched();
1229
1230 spin_lock_irq(&cpu_buffer->reader_lock); 1227 spin_lock_irq(&cpu_buffer->reader_lock);
1231 rb_head_page_deactivate(cpu_buffer); 1228 rb_head_page_deactivate(cpu_buffer);
1232 1229
@@ -1239,11 +1236,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1239 list_add_tail(&bpage->list, cpu_buffer->pages); 1236 list_add_tail(&bpage->list, cpu_buffer->pages);
1240 } 1237 }
1241 rb_reset_cpu(cpu_buffer); 1238 rb_reset_cpu(cpu_buffer);
1242 spin_unlock_irq(&cpu_buffer->reader_lock);
1243
1244 rb_check_pages(cpu_buffer); 1239 rb_check_pages(cpu_buffer);
1245 1240
1246 atomic_dec(&cpu_buffer->record_disabled); 1241 spin_unlock_irq(&cpu_buffer->reader_lock);
1247} 1242}
1248 1243
1249/** 1244/**
@@ -1251,11 +1246,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1251 * @buffer: the buffer to resize. 1246 * @buffer: the buffer to resize.
1252 * @size: the new size. 1247 * @size: the new size.
1253 * 1248 *
1254 * The tracer is responsible for making sure that the buffer is
1255 * not being used while changing the size.
1256 * Note: We may be able to change the above requirement by using
1257 * RCU synchronizations.
1258 *
1259 * Minimum size is 2 * BUF_PAGE_SIZE. 1249 * Minimum size is 2 * BUF_PAGE_SIZE.
1260 * 1250 *
1261 * Returns -1 on failure. 1251 * Returns -1 on failure.
@@ -1287,6 +1277,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1287 if (size == buffer_size) 1277 if (size == buffer_size)
1288 return size; 1278 return size;
1289 1279
1280 atomic_inc(&buffer->record_disabled);
1281
1282 /* Make sure all writers are done with this buffer. */
1283 synchronize_sched();
1284
1290 mutex_lock(&buffer->mutex); 1285 mutex_lock(&buffer->mutex);
1291 get_online_cpus(); 1286 get_online_cpus();
1292 1287
@@ -1349,6 +1344,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1349 put_online_cpus(); 1344 put_online_cpus();
1350 mutex_unlock(&buffer->mutex); 1345 mutex_unlock(&buffer->mutex);
1351 1346
1347 atomic_dec(&buffer->record_disabled);
1348
1352 return size; 1349 return size;
1353 1350
1354 free_pages: 1351 free_pages:
@@ -1358,6 +1355,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1358 } 1355 }
1359 put_online_cpus(); 1356 put_online_cpus();
1360 mutex_unlock(&buffer->mutex); 1357 mutex_unlock(&buffer->mutex);
1358 atomic_dec(&buffer->record_disabled);
1361 return -ENOMEM; 1359 return -ENOMEM;
1362 1360
1363 /* 1361 /*
@@ -1367,6 +1365,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1367 out_fail: 1365 out_fail:
1368 put_online_cpus(); 1366 put_online_cpus();
1369 mutex_unlock(&buffer->mutex); 1367 mutex_unlock(&buffer->mutex);
1368 atomic_dec(&buffer->record_disabled);
1370 return -1; 1369 return -1;
1371} 1370}
1372EXPORT_SYMBOL_GPL(ring_buffer_resize); 1371EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -1787,9 +1786,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1787static struct ring_buffer_event * 1786static struct ring_buffer_event *
1788rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1787rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1789 unsigned long length, unsigned long tail, 1788 unsigned long length, unsigned long tail,
1790 struct buffer_page *commit_page,
1791 struct buffer_page *tail_page, u64 *ts) 1789 struct buffer_page *tail_page, u64 *ts)
1792{ 1790{
1791 struct buffer_page *commit_page = cpu_buffer->commit_page;
1793 struct ring_buffer *buffer = cpu_buffer->buffer; 1792 struct ring_buffer *buffer = cpu_buffer->buffer;
1794 struct buffer_page *next_page; 1793 struct buffer_page *next_page;
1795 int ret; 1794 int ret;
@@ -1892,13 +1891,10 @@ static struct ring_buffer_event *
1892__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1891__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1893 unsigned type, unsigned long length, u64 *ts) 1892 unsigned type, unsigned long length, u64 *ts)
1894{ 1893{
1895 struct buffer_page *tail_page, *commit_page; 1894 struct buffer_page *tail_page;
1896 struct ring_buffer_event *event; 1895 struct ring_buffer_event *event;
1897 unsigned long tail, write; 1896 unsigned long tail, write;
1898 1897
1899 commit_page = cpu_buffer->commit_page;
1900 /* we just need to protect against interrupts */
1901 barrier();
1902 tail_page = cpu_buffer->tail_page; 1898 tail_page = cpu_buffer->tail_page;
1903 write = local_add_return(length, &tail_page->write); 1899 write = local_add_return(length, &tail_page->write);
1904 1900
@@ -1909,7 +1905,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1909 /* See if we shot pass the end of this buffer page */ 1905 /* See if we shot pass the end of this buffer page */
1910 if (write > BUF_PAGE_SIZE) 1906 if (write > BUF_PAGE_SIZE)
1911 return rb_move_tail(cpu_buffer, length, tail, 1907 return rb_move_tail(cpu_buffer, length, tail,
1912 commit_page, tail_page, ts); 1908 tail_page, ts);
1913 1909
1914 /* We reserved something on the buffer */ 1910 /* We reserved something on the buffer */
1915 1911
@@ -2723,6 +2719,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2723 iter->read_stamp = cpu_buffer->read_stamp; 2719 iter->read_stamp = cpu_buffer->read_stamp;
2724 else 2720 else
2725 iter->read_stamp = iter->head_page->page->time_stamp; 2721 iter->read_stamp = iter->head_page->page->time_stamp;
2722 iter->cache_reader_page = cpu_buffer->reader_page;
2723 iter->cache_read = cpu_buffer->read;
2726} 2724}
2727 2725
2728/** 2726/**
@@ -2834,7 +2832,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2834 int ret; 2832 int ret;
2835 2833
2836 local_irq_save(flags); 2834 local_irq_save(flags);
2837 __raw_spin_lock(&cpu_buffer->lock); 2835 arch_spin_lock(&cpu_buffer->lock);
2838 2836
2839 again: 2837 again:
2840 /* 2838 /*
@@ -2876,7 +2874,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2876 * Splice the empty reader page into the list around the head. 2874 * Splice the empty reader page into the list around the head.
2877 */ 2875 */
2878 reader = rb_set_head_page(cpu_buffer); 2876 reader = rb_set_head_page(cpu_buffer);
2879 cpu_buffer->reader_page->list.next = reader->list.next; 2877 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
2880 cpu_buffer->reader_page->list.prev = reader->list.prev; 2878 cpu_buffer->reader_page->list.prev = reader->list.prev;
2881 2879
2882 /* 2880 /*
@@ -2913,7 +2911,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2913 * 2911 *
2914 * Now make the new head point back to the reader page. 2912 * Now make the new head point back to the reader page.
2915 */ 2913 */
2916 reader->list.next->prev = &cpu_buffer->reader_page->list; 2914 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
2917 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2915 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2918 2916
2919 /* Finally update the reader page to the new head */ 2917 /* Finally update the reader page to the new head */
@@ -2923,7 +2921,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2923 goto again; 2921 goto again;
2924 2922
2925 out: 2923 out:
2926 __raw_spin_unlock(&cpu_buffer->lock); 2924 arch_spin_unlock(&cpu_buffer->lock);
2927 local_irq_restore(flags); 2925 local_irq_restore(flags);
2928 2926
2929 return reader; 2927 return reader;
@@ -3067,13 +3065,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3067 struct ring_buffer_event *event; 3065 struct ring_buffer_event *event;
3068 int nr_loops = 0; 3066 int nr_loops = 0;
3069 3067
3070 if (ring_buffer_iter_empty(iter))
3071 return NULL;
3072
3073 cpu_buffer = iter->cpu_buffer; 3068 cpu_buffer = iter->cpu_buffer;
3074 buffer = cpu_buffer->buffer; 3069 buffer = cpu_buffer->buffer;
3075 3070
3071 /*
3072 * Check if someone performed a consuming read to
3073 * the buffer. A consuming read invalidates the iterator
3074 * and we need to reset the iterator in this case.
3075 */
3076 if (unlikely(iter->cache_read != cpu_buffer->read ||
3077 iter->cache_reader_page != cpu_buffer->reader_page))
3078 rb_iter_reset(iter);
3079
3076 again: 3080 again:
3081 if (ring_buffer_iter_empty(iter))
3082 return NULL;
3083
3077 /* 3084 /*
3078 * We repeat when a timestamp is encountered. 3085 * We repeat when a timestamp is encountered.
3079 * We can get multiple timestamps by nested interrupts or also 3086 * We can get multiple timestamps by nested interrupts or also
@@ -3088,6 +3095,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3088 if (rb_per_cpu_empty(cpu_buffer)) 3095 if (rb_per_cpu_empty(cpu_buffer))
3089 return NULL; 3096 return NULL;
3090 3097
3098 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3099 rb_inc_iter(iter);
3100 goto again;
3101 }
3102
3091 event = rb_iter_head_event(iter); 3103 event = rb_iter_head_event(iter);
3092 3104
3093 switch (event->type_len) { 3105 switch (event->type_len) {
@@ -3286,9 +3298,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3286 synchronize_sched(); 3298 synchronize_sched();
3287 3299
3288 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3300 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3289 __raw_spin_lock(&cpu_buffer->lock); 3301 arch_spin_lock(&cpu_buffer->lock);
3290 rb_iter_reset(iter); 3302 rb_iter_reset(iter);
3291 __raw_spin_unlock(&cpu_buffer->lock); 3303 arch_spin_unlock(&cpu_buffer->lock);
3292 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3304 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3293 3305
3294 return iter; 3306 return iter;
@@ -3408,11 +3420,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3408 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3420 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3409 goto out; 3421 goto out;
3410 3422
3411 __raw_spin_lock(&cpu_buffer->lock); 3423 arch_spin_lock(&cpu_buffer->lock);
3412 3424
3413 rb_reset_cpu(cpu_buffer); 3425 rb_reset_cpu(cpu_buffer);
3414 3426
3415 __raw_spin_unlock(&cpu_buffer->lock); 3427 arch_spin_unlock(&cpu_buffer->lock);
3416 3428
3417 out: 3429 out:
3418 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3430 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 573d3cc762c3..df74c7982255 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <asm/local.h>
11 12
12struct rb_page { 13struct rb_page {
13 u64 ts; 14 u64 ts;
@@ -35,6 +36,28 @@ static int disable_reader;
35module_param(disable_reader, uint, 0644); 36module_param(disable_reader, uint, 0644);
36MODULE_PARM_DESC(disable_reader, "only run producer"); 37MODULE_PARM_DESC(disable_reader, "only run producer");
37 38
39static int write_iteration = 50;
40module_param(write_iteration, uint, 0644);
41MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
42
43static int producer_nice = 19;
44static int consumer_nice = 19;
45
46static int producer_fifo = -1;
47static int consumer_fifo = -1;
48
49module_param(producer_nice, uint, 0644);
50MODULE_PARM_DESC(producer_nice, "nice prio for producer");
51
52module_param(consumer_nice, uint, 0644);
53MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
54
55module_param(producer_fifo, uint, 0644);
56MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
57
58module_param(consumer_fifo, uint, 0644);
59MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
60
38static int read_events; 61static int read_events;
39 62
40static int kill_test; 63static int kill_test;
@@ -208,15 +231,18 @@ static void ring_buffer_producer(void)
208 do { 231 do {
209 struct ring_buffer_event *event; 232 struct ring_buffer_event *event;
210 int *entry; 233 int *entry;
211 234 int i;
212 event = ring_buffer_lock_reserve(buffer, 10); 235
213 if (!event) { 236 for (i = 0; i < write_iteration; i++) {
214 missed++; 237 event = ring_buffer_lock_reserve(buffer, 10);
215 } else { 238 if (!event) {
216 hit++; 239 missed++;
217 entry = ring_buffer_event_data(event); 240 } else {
218 *entry = smp_processor_id(); 241 hit++;
219 ring_buffer_unlock_commit(buffer, event); 242 entry = ring_buffer_event_data(event);
243 *entry = smp_processor_id();
244 ring_buffer_unlock_commit(buffer, event);
245 }
220 } 246 }
221 do_gettimeofday(&end_tv); 247 do_gettimeofday(&end_tv);
222 248
@@ -263,6 +289,27 @@ static void ring_buffer_producer(void)
263 289
264 if (kill_test) 290 if (kill_test)
265 trace_printk("ERROR!\n"); 291 trace_printk("ERROR!\n");
292
293 if (!disable_reader) {
294 if (consumer_fifo < 0)
295 trace_printk("Running Consumer at nice: %d\n",
296 consumer_nice);
297 else
298 trace_printk("Running Consumer at SCHED_FIFO %d\n",
299 consumer_fifo);
300 }
301 if (producer_fifo < 0)
302 trace_printk("Running Producer at nice: %d\n",
303 producer_nice);
304 else
305 trace_printk("Running Producer at SCHED_FIFO %d\n",
306 producer_fifo);
307
308 /* Let the user know that the test is running at low priority */
309 if (producer_fifo < 0 && consumer_fifo < 0 &&
310 producer_nice == 19 && consumer_nice == 19)
311 trace_printk("WARNING!!! This test is running at lowest priority.\n");
312
266 trace_printk("Time: %lld (usecs)\n", time); 313 trace_printk("Time: %lld (usecs)\n", time);
267 trace_printk("Overruns: %lld\n", overruns); 314 trace_printk("Overruns: %lld\n", overruns);
268 if (disable_reader) 315 if (disable_reader)
@@ -392,6 +439,27 @@ static int __init ring_buffer_benchmark_init(void)
392 if (IS_ERR(producer)) 439 if (IS_ERR(producer))
393 goto out_kill; 440 goto out_kill;
394 441
442 /*
443 * Run them as low-prio background tasks by default:
444 */
445 if (!disable_reader) {
446 if (consumer_fifo >= 0) {
447 struct sched_param param = {
448 .sched_priority = consumer_fifo
449 };
450 sched_setscheduler(consumer, SCHED_FIFO, &param);
451 } else
452 set_user_nice(consumer, consumer_nice);
453 }
454
455 if (producer_fifo >= 0) {
456 struct sched_param param = {
457 .sched_priority = consumer_fifo
458 };
459 sched_setscheduler(producer, SCHED_FIFO, &param);
460 } else
461 set_user_nice(producer, producer_nice);
462
395 return 0; 463 return 0;
396 464
397 out_kill: 465 out_kill:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b20d3ec75de9..ed01fdba4a55 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h> 14#include <linux/ring_buffer.h>
15#include <linux/utsrelease.h> 15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h> 16#include <linux/stacktrace.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
@@ -32,6 +32,7 @@
32#include <linux/splice.h> 32#include <linux/splice.h>
33#include <linux/kdebug.h> 33#include <linux/kdebug.h>
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/rwsem.h>
35#include <linux/ctype.h> 36#include <linux/ctype.h>
36#include <linux/init.h> 37#include <linux/init.h>
37#include <linux/poll.h> 38#include <linux/poll.h>
@@ -86,25 +87,22 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
86 */ 87 */
87static int tracing_disabled = 1; 88static int tracing_disabled = 1;
88 89
89DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 90DEFINE_PER_CPU(int, ftrace_cpu_disabled);
90 91
91static inline void ftrace_disable_cpu(void) 92static inline void ftrace_disable_cpu(void)
92{ 93{
93 preempt_disable(); 94 preempt_disable();
94 local_inc(&__get_cpu_var(ftrace_cpu_disabled)); 95 __this_cpu_inc(ftrace_cpu_disabled);
95} 96}
96 97
97static inline void ftrace_enable_cpu(void) 98static inline void ftrace_enable_cpu(void)
98{ 99{
99 local_dec(&__get_cpu_var(ftrace_cpu_disabled)); 100 __this_cpu_dec(ftrace_cpu_disabled);
100 preempt_enable(); 101 preempt_enable();
101} 102}
102 103
103static cpumask_var_t __read_mostly tracing_buffer_mask; 104static cpumask_var_t __read_mostly tracing_buffer_mask;
104 105
105/* Define which cpu buffers are currently read in trace_pipe */
106static cpumask_var_t tracing_reader_cpumask;
107
108#define for_each_tracing_cpu(cpu) \ 106#define for_each_tracing_cpu(cpu) \
109 for_each_cpu(cpu, tracing_buffer_mask) 107 for_each_cpu(cpu, tracing_buffer_mask)
110 108
@@ -129,7 +127,7 @@ static int tracing_set_tracer(const char *buf);
129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 127static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
130static char *default_bootup_tracer; 128static char *default_bootup_tracer;
131 129
132static int __init set_ftrace(char *str) 130static int __init set_cmdline_ftrace(char *str)
133{ 131{
134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 132 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
135 default_bootup_tracer = bootup_tracer_buf; 133 default_bootup_tracer = bootup_tracer_buf;
@@ -137,7 +135,7 @@ static int __init set_ftrace(char *str)
137 ring_buffer_expanded = 1; 135 ring_buffer_expanded = 1;
138 return 1; 136 return 1;
139} 137}
140__setup("ftrace=", set_ftrace); 138__setup("ftrace=", set_cmdline_ftrace);
141 139
142static int __init set_ftrace_dump_on_oops(char *str) 140static int __init set_ftrace_dump_on_oops(char *str)
143{ 141{
@@ -203,7 +201,7 @@ cycle_t ftrace_now(int cpu)
203 */ 201 */
204static struct trace_array max_tr; 202static struct trace_array max_tr;
205 203
206static DEFINE_PER_CPU(struct trace_array_cpu, max_data); 204static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
207 205
208/* tracer_enabled is used to toggle activation of a tracer */ 206/* tracer_enabled is used to toggle activation of a tracer */
209static int tracer_enabled = 1; 207static int tracer_enabled = 1;
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly;
243 241
244/* 242/*
245 * trace_types_lock is used to protect the trace_types list. 243 * trace_types_lock is used to protect the trace_types list.
246 * This lock is also used to keep user access serialized.
247 * Accesses from userspace will grab this lock while userspace
248 * activities happen inside the kernel.
249 */ 244 */
250static DEFINE_MUTEX(trace_types_lock); 245static DEFINE_MUTEX(trace_types_lock);
251 246
247/*
248 * serialize the access of the ring buffer
249 *
250 * ring buffer serializes readers, but it is low level protection.
251 * The validity of the events (which returns by ring_buffer_peek() ..etc)
252 * are not protected by ring buffer.
253 *
254 * The content of events may become garbage if we allow other process consumes
255 * these events concurrently:
256 * A) the page of the consumed events may become a normal page
257 * (not reader page) in ring buffer, and this page will be rewrited
258 * by events producer.
259 * B) The page of the consumed events may become a page for splice_read,
260 * and this page will be returned to system.
261 *
262 * These primitives allow multi process access to different cpu ring buffer
263 * concurrently.
264 *
265 * These primitives don't distinguish read-only and read-consume access.
266 * Multi read-only access are also serialized.
267 */
268
269#ifdef CONFIG_SMP
270static DECLARE_RWSEM(all_cpu_access_lock);
271static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
272
273static inline void trace_access_lock(int cpu)
274{
275 if (cpu == TRACE_PIPE_ALL_CPU) {
276 /* gain it for accessing the whole ring buffer. */
277 down_write(&all_cpu_access_lock);
278 } else {
279 /* gain it for accessing a cpu ring buffer. */
280
281 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
282 down_read(&all_cpu_access_lock);
283
284 /* Secondly block other access to this @cpu ring buffer. */
285 mutex_lock(&per_cpu(cpu_access_lock, cpu));
286 }
287}
288
289static inline void trace_access_unlock(int cpu)
290{
291 if (cpu == TRACE_PIPE_ALL_CPU) {
292 up_write(&all_cpu_access_lock);
293 } else {
294 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
295 up_read(&all_cpu_access_lock);
296 }
297}
298
299static inline void trace_access_lock_init(void)
300{
301 int cpu;
302
303 for_each_possible_cpu(cpu)
304 mutex_init(&per_cpu(cpu_access_lock, cpu));
305}
306
307#else
308
309static DEFINE_MUTEX(access_lock);
310
311static inline void trace_access_lock(int cpu)
312{
313 (void)cpu;
314 mutex_lock(&access_lock);
315}
316
317static inline void trace_access_unlock(int cpu)
318{
319 (void)cpu;
320 mutex_unlock(&access_lock);
321}
322
323static inline void trace_access_lock_init(void)
324{
325}
326
327#endif
328
252/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 329/* trace_wait is a waitqueue for tasks blocked on trace_poll */
253static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 330static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
254 331
@@ -313,7 +390,6 @@ static const char *trace_options[] = {
313 "bin", 390 "bin",
314 "block", 391 "block",
315 "stacktrace", 392 "stacktrace",
316 "sched-tree",
317 "trace_printk", 393 "trace_printk",
318 "ftrace_preempt", 394 "ftrace_preempt",
319 "branch", 395 "branch",
@@ -493,15 +569,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
493 * protected by per_cpu spinlocks. But the action of the swap 569 * protected by per_cpu spinlocks. But the action of the swap
494 * needs its own lock. 570 * needs its own lock.
495 * 571 *
496 * This is defined as a raw_spinlock_t in order to help 572 * This is defined as a arch_spinlock_t in order to help
497 * with performance when lockdep debugging is enabled. 573 * with performance when lockdep debugging is enabled.
498 * 574 *
499 * It is also used in other places outside the update_max_tr 575 * It is also used in other places outside the update_max_tr
500 * so it needs to be defined outside of the 576 * so it needs to be defined outside of the
501 * CONFIG_TRACER_MAX_TRACE. 577 * CONFIG_TRACER_MAX_TRACE.
502 */ 578 */
503static raw_spinlock_t ftrace_max_lock = 579static arch_spinlock_t ftrace_max_lock =
504 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 580 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
505 581
506#ifdef CONFIG_TRACER_MAX_TRACE 582#ifdef CONFIG_TRACER_MAX_TRACE
507unsigned long __read_mostly tracing_max_latency; 583unsigned long __read_mostly tracing_max_latency;
@@ -555,13 +631,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
555 return; 631 return;
556 632
557 WARN_ON_ONCE(!irqs_disabled()); 633 WARN_ON_ONCE(!irqs_disabled());
558 __raw_spin_lock(&ftrace_max_lock); 634 arch_spin_lock(&ftrace_max_lock);
559 635
560 tr->buffer = max_tr.buffer; 636 tr->buffer = max_tr.buffer;
561 max_tr.buffer = buf; 637 max_tr.buffer = buf;
562 638
563 __update_max_tr(tr, tsk, cpu); 639 __update_max_tr(tr, tsk, cpu);
564 __raw_spin_unlock(&ftrace_max_lock); 640 arch_spin_unlock(&ftrace_max_lock);
565} 641}
566 642
567/** 643/**
@@ -581,7 +657,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
581 return; 657 return;
582 658
583 WARN_ON_ONCE(!irqs_disabled()); 659 WARN_ON_ONCE(!irqs_disabled());
584 __raw_spin_lock(&ftrace_max_lock); 660 arch_spin_lock(&ftrace_max_lock);
585 661
586 ftrace_disable_cpu(); 662 ftrace_disable_cpu();
587 663
@@ -603,7 +679,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
603 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 679 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
604 680
605 __update_max_tr(tr, tsk, cpu); 681 __update_max_tr(tr, tsk, cpu);
606 __raw_spin_unlock(&ftrace_max_lock); 682 arch_spin_unlock(&ftrace_max_lock);
607} 683}
608#endif /* CONFIG_TRACER_MAX_TRACE */ 684#endif /* CONFIG_TRACER_MAX_TRACE */
609 685
@@ -802,7 +878,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
802static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 878static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
803static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 879static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
804static int cmdline_idx; 880static int cmdline_idx;
805static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; 881static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
806 882
807/* temporary disable recording */ 883/* temporary disable recording */
808static atomic_t trace_record_cmdline_disabled __read_mostly; 884static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +991,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
915 * nor do we want to disable interrupts, 991 * nor do we want to disable interrupts,
916 * so if we miss here, then better luck next time. 992 * so if we miss here, then better luck next time.
917 */ 993 */
918 if (!__raw_spin_trylock(&trace_cmdline_lock)) 994 if (!arch_spin_trylock(&trace_cmdline_lock))
919 return; 995 return;
920 996
921 idx = map_pid_to_cmdline[tsk->pid]; 997 idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +1016,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
940 1016
941 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 1017 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
942 1018
943 __raw_spin_unlock(&trace_cmdline_lock); 1019 arch_spin_unlock(&trace_cmdline_lock);
944} 1020}
945 1021
946void trace_find_cmdline(int pid, char comm[]) 1022void trace_find_cmdline(int pid, char comm[])
@@ -952,20 +1028,25 @@ void trace_find_cmdline(int pid, char comm[])
952 return; 1028 return;
953 } 1029 }
954 1030
1031 if (WARN_ON_ONCE(pid < 0)) {
1032 strcpy(comm, "<XXX>");
1033 return;
1034 }
1035
955 if (pid > PID_MAX_DEFAULT) { 1036 if (pid > PID_MAX_DEFAULT) {
956 strcpy(comm, "<...>"); 1037 strcpy(comm, "<...>");
957 return; 1038 return;
958 } 1039 }
959 1040
960 preempt_disable(); 1041 preempt_disable();
961 __raw_spin_lock(&trace_cmdline_lock); 1042 arch_spin_lock(&trace_cmdline_lock);
962 map = map_pid_to_cmdline[pid]; 1043 map = map_pid_to_cmdline[pid];
963 if (map != NO_CMDLINE_MAP) 1044 if (map != NO_CMDLINE_MAP)
964 strcpy(comm, saved_cmdlines[map]); 1045 strcpy(comm, saved_cmdlines[map]);
965 else 1046 else
966 strcpy(comm, "<...>"); 1047 strcpy(comm, "<...>");
967 1048
968 __raw_spin_unlock(&trace_cmdline_lock); 1049 arch_spin_unlock(&trace_cmdline_lock);
969 preempt_enable(); 1050 preempt_enable();
970} 1051}
971 1052
@@ -1085,7 +1166,7 @@ trace_function(struct trace_array *tr,
1085 struct ftrace_entry *entry; 1166 struct ftrace_entry *entry;
1086 1167
1087 /* If we are reading the ring buffer, don't trace */ 1168 /* If we are reading the ring buffer, don't trace */
1088 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 1169 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1089 return; 1170 return;
1090 1171
1091 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1172 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1151,6 +1232,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1151 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1232 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1152} 1233}
1153 1234
1235/**
1236 * trace_dump_stack - record a stack back trace in the trace buffer
1237 */
1238void trace_dump_stack(void)
1239{
1240 unsigned long flags;
1241
1242 if (tracing_disabled || tracing_selftest_running)
1243 return;
1244
1245 local_save_flags(flags);
1246
1247 /* skipping 3 traces, seems to get us at the caller of this function */
1248 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1249}
1250
1154void 1251void
1155ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1252ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1156{ 1253{
@@ -1251,8 +1348,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1251 */ 1348 */
1252int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1349int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1253{ 1350{
1254 static raw_spinlock_t trace_buf_lock = 1351 static arch_spinlock_t trace_buf_lock =
1255 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1352 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1256 static u32 trace_buf[TRACE_BUF_SIZE]; 1353 static u32 trace_buf[TRACE_BUF_SIZE];
1257 1354
1258 struct ftrace_event_call *call = &event_bprint; 1355 struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1380,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1283 1380
1284 /* Lockdep uses trace_printk for lock tracing */ 1381 /* Lockdep uses trace_printk for lock tracing */
1285 local_irq_save(flags); 1382 local_irq_save(flags);
1286 __raw_spin_lock(&trace_buf_lock); 1383 arch_spin_lock(&trace_buf_lock);
1287 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1384 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1288 1385
1289 if (len > TRACE_BUF_SIZE || len < 0) 1386 if (len > TRACE_BUF_SIZE || len < 0)
@@ -1300,11 +1397,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1300 entry->fmt = fmt; 1397 entry->fmt = fmt;
1301 1398
1302 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1399 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1303 if (!filter_check_discard(call, entry, buffer, event)) 1400 if (!filter_check_discard(call, entry, buffer, event)) {
1304 ring_buffer_unlock_commit(buffer, event); 1401 ring_buffer_unlock_commit(buffer, event);
1402 ftrace_trace_stack(buffer, flags, 6, pc);
1403 }
1305 1404
1306out_unlock: 1405out_unlock:
1307 __raw_spin_unlock(&trace_buf_lock); 1406 arch_spin_unlock(&trace_buf_lock);
1308 local_irq_restore(flags); 1407 local_irq_restore(flags);
1309 1408
1310out: 1409out:
@@ -1334,7 +1433,7 @@ int trace_array_printk(struct trace_array *tr,
1334int trace_array_vprintk(struct trace_array *tr, 1433int trace_array_vprintk(struct trace_array *tr,
1335 unsigned long ip, const char *fmt, va_list args) 1434 unsigned long ip, const char *fmt, va_list args)
1336{ 1435{
1337 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1436 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1338 static char trace_buf[TRACE_BUF_SIZE]; 1437 static char trace_buf[TRACE_BUF_SIZE];
1339 1438
1340 struct ftrace_event_call *call = &event_print; 1439 struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1459,9 @@ int trace_array_vprintk(struct trace_array *tr,
1360 1459
1361 pause_graph_tracing(); 1460 pause_graph_tracing();
1362 raw_local_irq_save(irq_flags); 1461 raw_local_irq_save(irq_flags);
1363 __raw_spin_lock(&trace_buf_lock); 1462 arch_spin_lock(&trace_buf_lock);
1364 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1463 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 1464
1366 len = min(len, TRACE_BUF_SIZE-1);
1367 trace_buf[len] = 0;
1368
1369 size = sizeof(*entry) + len + 1; 1465 size = sizeof(*entry) + len + 1;
1370 buffer = tr->buffer; 1466 buffer = tr->buffer;
1371 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1467 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
@@ -1373,15 +1469,17 @@ int trace_array_vprintk(struct trace_array *tr,
1373 if (!event) 1469 if (!event)
1374 goto out_unlock; 1470 goto out_unlock;
1375 entry = ring_buffer_event_data(event); 1471 entry = ring_buffer_event_data(event);
1376 entry->ip = ip; 1472 entry->ip = ip;
1377 1473
1378 memcpy(&entry->buf, trace_buf, len); 1474 memcpy(&entry->buf, trace_buf, len);
1379 entry->buf[len] = 0; 1475 entry->buf[len] = '\0';
1380 if (!filter_check_discard(call, entry, buffer, event)) 1476 if (!filter_check_discard(call, entry, buffer, event)) {
1381 ring_buffer_unlock_commit(buffer, event); 1477 ring_buffer_unlock_commit(buffer, event);
1478 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1479 }
1382 1480
1383 out_unlock: 1481 out_unlock:
1384 __raw_spin_unlock(&trace_buf_lock); 1482 arch_spin_unlock(&trace_buf_lock);
1385 raw_local_irq_restore(irq_flags); 1483 raw_local_irq_restore(irq_flags);
1386 unpause_graph_tracing(); 1484 unpause_graph_tracing();
1387 out: 1485 out:
@@ -1515,6 +1613,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1515 int i = (int)*pos; 1613 int i = (int)*pos;
1516 void *ent; 1614 void *ent;
1517 1615
1616 WARN_ON_ONCE(iter->leftover);
1617
1518 (*pos)++; 1618 (*pos)++;
1519 1619
1520 /* can't go backwards */ 1620 /* can't go backwards */
@@ -1566,12 +1666,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1566} 1666}
1567 1667
1568/* 1668/*
1569 * No necessary locking here. The worst thing which can
1570 * happen is loosing events consumed at the same time
1571 * by a trace_pipe reader.
1572 * Other than that, we don't risk to crash the ring buffer
1573 * because it serializes the readers.
1574 *
1575 * The current tracer is copied to avoid a global locking 1669 * The current tracer is copied to avoid a global locking
1576 * all around. 1670 * all around.
1577 */ 1671 */
@@ -1613,17 +1707,29 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1613 ; 1707 ;
1614 1708
1615 } else { 1709 } else {
1616 l = *pos - 1; 1710 /*
1617 p = s_next(m, p, &l); 1711 * If we overflowed the seq_file before, then we want
1712 * to just reuse the trace_seq buffer again.
1713 */
1714 if (iter->leftover)
1715 p = iter;
1716 else {
1717 l = *pos - 1;
1718 p = s_next(m, p, &l);
1719 }
1618 } 1720 }
1619 1721
1620 trace_event_read_lock(); 1722 trace_event_read_lock();
1723 trace_access_lock(cpu_file);
1621 return p; 1724 return p;
1622} 1725}
1623 1726
1624static void s_stop(struct seq_file *m, void *p) 1727static void s_stop(struct seq_file *m, void *p)
1625{ 1728{
1729 struct trace_iterator *iter = m->private;
1730
1626 atomic_dec(&trace_record_cmdline_disabled); 1731 atomic_dec(&trace_record_cmdline_disabled);
1732 trace_access_unlock(iter->cpu_file);
1627 trace_event_read_unlock(); 1733 trace_event_read_unlock();
1628} 1734}
1629 1735
@@ -1922,6 +2028,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1922static int s_show(struct seq_file *m, void *v) 2028static int s_show(struct seq_file *m, void *v)
1923{ 2029{
1924 struct trace_iterator *iter = v; 2030 struct trace_iterator *iter = v;
2031 int ret;
1925 2032
1926 if (iter->ent == NULL) { 2033 if (iter->ent == NULL) {
1927 if (iter->tr) { 2034 if (iter->tr) {
@@ -1941,9 +2048,27 @@ static int s_show(struct seq_file *m, void *v)
1941 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2048 if (!(trace_flags & TRACE_ITER_VERBOSE))
1942 print_func_help_header(m); 2049 print_func_help_header(m);
1943 } 2050 }
2051 } else if (iter->leftover) {
2052 /*
2053 * If we filled the seq_file buffer earlier, we
2054 * want to just show it now.
2055 */
2056 ret = trace_print_seq(m, &iter->seq);
2057
2058 /* ret should this time be zero, but you never know */
2059 iter->leftover = ret;
2060
1944 } else { 2061 } else {
1945 print_trace_line(iter); 2062 print_trace_line(iter);
1946 trace_print_seq(m, &iter->seq); 2063 ret = trace_print_seq(m, &iter->seq);
2064 /*
2065 * If we overflow the seq_file buffer, then it will
2066 * ask us for this data again at start up.
2067 * Use that instead.
2068 * ret is 0 if seq_file write succeeded.
2069 * -1 otherwise.
2070 */
2071 iter->leftover = ret;
1947 } 2072 }
1948 2073
1949 return 0; 2074 return 0;
@@ -2253,7 +2378,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2253 mutex_lock(&tracing_cpumask_update_lock); 2378 mutex_lock(&tracing_cpumask_update_lock);
2254 2379
2255 local_irq_disable(); 2380 local_irq_disable();
2256 __raw_spin_lock(&ftrace_max_lock); 2381 arch_spin_lock(&ftrace_max_lock);
2257 for_each_tracing_cpu(cpu) { 2382 for_each_tracing_cpu(cpu) {
2258 /* 2383 /*
2259 * Increase/decrease the disabled counter if we are 2384 * Increase/decrease the disabled counter if we are
@@ -2268,7 +2393,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2268 atomic_dec(&global_trace.data[cpu]->disabled); 2393 atomic_dec(&global_trace.data[cpu]->disabled);
2269 } 2394 }
2270 } 2395 }
2271 __raw_spin_unlock(&ftrace_max_lock); 2396 arch_spin_unlock(&ftrace_max_lock);
2272 local_irq_enable(); 2397 local_irq_enable();
2273 2398
2274 cpumask_copy(tracing_cpumask, tracing_cpumask_new); 2399 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2290,67 +2415,49 @@ static const struct file_operations tracing_cpumask_fops = {
2290 .write = tracing_cpumask_write, 2415 .write = tracing_cpumask_write,
2291}; 2416};
2292 2417
2293static ssize_t 2418static int tracing_trace_options_show(struct seq_file *m, void *v)
2294tracing_trace_options_read(struct file *filp, char __user *ubuf,
2295 size_t cnt, loff_t *ppos)
2296{ 2419{
2297 struct tracer_opt *trace_opts; 2420 struct tracer_opt *trace_opts;
2298 u32 tracer_flags; 2421 u32 tracer_flags;
2299 int len = 0;
2300 char *buf;
2301 int r = 0;
2302 int i; 2422 int i;
2303 2423
2304
2305 /* calculate max size */
2306 for (i = 0; trace_options[i]; i++) {
2307 len += strlen(trace_options[i]);
2308 len += 3; /* "no" and newline */
2309 }
2310
2311 mutex_lock(&trace_types_lock); 2424 mutex_lock(&trace_types_lock);
2312 tracer_flags = current_trace->flags->val; 2425 tracer_flags = current_trace->flags->val;
2313 trace_opts = current_trace->flags->opts; 2426 trace_opts = current_trace->flags->opts;
2314 2427
2315 /*
2316 * Increase the size with names of options specific
2317 * of the current tracer.
2318 */
2319 for (i = 0; trace_opts[i].name; i++) {
2320 len += strlen(trace_opts[i].name);
2321 len += 3; /* "no" and newline */
2322 }
2323
2324 /* +1 for \0 */
2325 buf = kmalloc(len + 1, GFP_KERNEL);
2326 if (!buf) {
2327 mutex_unlock(&trace_types_lock);
2328 return -ENOMEM;
2329 }
2330
2331 for (i = 0; trace_options[i]; i++) { 2428 for (i = 0; trace_options[i]; i++) {
2332 if (trace_flags & (1 << i)) 2429 if (trace_flags & (1 << i))
2333 r += sprintf(buf + r, "%s\n", trace_options[i]); 2430 seq_printf(m, "%s\n", trace_options[i]);
2334 else 2431 else
2335 r += sprintf(buf + r, "no%s\n", trace_options[i]); 2432 seq_printf(m, "no%s\n", trace_options[i]);
2336 } 2433 }
2337 2434
2338 for (i = 0; trace_opts[i].name; i++) { 2435 for (i = 0; trace_opts[i].name; i++) {
2339 if (tracer_flags & trace_opts[i].bit) 2436 if (tracer_flags & trace_opts[i].bit)
2340 r += sprintf(buf + r, "%s\n", 2437 seq_printf(m, "%s\n", trace_opts[i].name);
2341 trace_opts[i].name);
2342 else 2438 else
2343 r += sprintf(buf + r, "no%s\n", 2439 seq_printf(m, "no%s\n", trace_opts[i].name);
2344 trace_opts[i].name);
2345 } 2440 }
2346 mutex_unlock(&trace_types_lock); 2441 mutex_unlock(&trace_types_lock);
2347 2442
2348 WARN_ON(r >= len + 1); 2443 return 0;
2444}
2445
2446static int __set_tracer_option(struct tracer *trace,
2447 struct tracer_flags *tracer_flags,
2448 struct tracer_opt *opts, int neg)
2449{
2450 int ret;
2349 2451
2350 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2452 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2453 if (ret)
2454 return ret;
2351 2455
2352 kfree(buf); 2456 if (neg)
2353 return r; 2457 tracer_flags->val &= ~opts->bit;
2458 else
2459 tracer_flags->val |= opts->bit;
2460 return 0;
2354} 2461}
2355 2462
2356/* Try to assign a tracer specific option */ 2463/* Try to assign a tracer specific option */
@@ -2358,33 +2465,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2358{ 2465{
2359 struct tracer_flags *tracer_flags = trace->flags; 2466 struct tracer_flags *tracer_flags = trace->flags;
2360 struct tracer_opt *opts = NULL; 2467 struct tracer_opt *opts = NULL;
2361 int ret = 0, i = 0; 2468 int i;
2362 int len;
2363 2469
2364 for (i = 0; tracer_flags->opts[i].name; i++) { 2470 for (i = 0; tracer_flags->opts[i].name; i++) {
2365 opts = &tracer_flags->opts[i]; 2471 opts = &tracer_flags->opts[i];
2366 len = strlen(opts->name);
2367 2472
2368 if (strncmp(cmp, opts->name, len) == 0) { 2473 if (strcmp(cmp, opts->name) == 0)
2369 ret = trace->set_flag(tracer_flags->val, 2474 return __set_tracer_option(trace, trace->flags,
2370 opts->bit, !neg); 2475 opts, neg);
2371 break;
2372 }
2373 } 2476 }
2374 /* Not found */
2375 if (!tracer_flags->opts[i].name)
2376 return -EINVAL;
2377 2477
2378 /* Refused to handle */ 2478 return -EINVAL;
2379 if (ret)
2380 return ret;
2381
2382 if (neg)
2383 tracer_flags->val &= ~opts->bit;
2384 else
2385 tracer_flags->val |= opts->bit;
2386
2387 return 0;
2388} 2479}
2389 2480
2390static void set_tracer_flags(unsigned int mask, int enabled) 2481static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2404,7 +2495,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2404 size_t cnt, loff_t *ppos) 2495 size_t cnt, loff_t *ppos)
2405{ 2496{
2406 char buf[64]; 2497 char buf[64];
2407 char *cmp = buf; 2498 char *cmp;
2408 int neg = 0; 2499 int neg = 0;
2409 int ret; 2500 int ret;
2410 int i; 2501 int i;
@@ -2416,16 +2507,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2416 return -EFAULT; 2507 return -EFAULT;
2417 2508
2418 buf[cnt] = 0; 2509 buf[cnt] = 0;
2510 cmp = strstrip(buf);
2419 2511
2420 if (strncmp(buf, "no", 2) == 0) { 2512 if (strncmp(cmp, "no", 2) == 0) {
2421 neg = 1; 2513 neg = 1;
2422 cmp += 2; 2514 cmp += 2;
2423 } 2515 }
2424 2516
2425 for (i = 0; trace_options[i]; i++) { 2517 for (i = 0; trace_options[i]; i++) {
2426 int len = strlen(trace_options[i]); 2518 if (strcmp(cmp, trace_options[i]) == 0) {
2427
2428 if (strncmp(cmp, trace_options[i], len) == 0) {
2429 set_tracer_flags(1 << i, !neg); 2519 set_tracer_flags(1 << i, !neg);
2430 break; 2520 break;
2431 } 2521 }
@@ -2445,9 +2535,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2445 return cnt; 2535 return cnt;
2446} 2536}
2447 2537
2538static int tracing_trace_options_open(struct inode *inode, struct file *file)
2539{
2540 if (tracing_disabled)
2541 return -ENODEV;
2542 return single_open(file, tracing_trace_options_show, NULL);
2543}
2544
2448static const struct file_operations tracing_iter_fops = { 2545static const struct file_operations tracing_iter_fops = {
2449 .open = tracing_open_generic, 2546 .open = tracing_trace_options_open,
2450 .read = tracing_trace_options_read, 2547 .read = seq_read,
2548 .llseek = seq_lseek,
2549 .release = single_release,
2451 .write = tracing_trace_options_write, 2550 .write = tracing_trace_options_write,
2452}; 2551};
2453 2552
@@ -2821,22 +2920,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2821 2920
2822 mutex_lock(&trace_types_lock); 2921 mutex_lock(&trace_types_lock);
2823 2922
2824 /* We only allow one reader per cpu */
2825 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2826 if (!cpumask_empty(tracing_reader_cpumask)) {
2827 ret = -EBUSY;
2828 goto out;
2829 }
2830 cpumask_setall(tracing_reader_cpumask);
2831 } else {
2832 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2833 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2834 else {
2835 ret = -EBUSY;
2836 goto out;
2837 }
2838 }
2839
2840 /* create a buffer to store the information to pass to userspace */ 2923 /* create a buffer to store the information to pass to userspace */
2841 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2924 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2842 if (!iter) { 2925 if (!iter) {
@@ -2892,10 +2975,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2892 2975
2893 mutex_lock(&trace_types_lock); 2976 mutex_lock(&trace_types_lock);
2894 2977
2895 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) 2978 if (iter->trace->pipe_close)
2896 cpumask_clear(tracing_reader_cpumask); 2979 iter->trace->pipe_close(iter);
2897 else
2898 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2899 2980
2900 mutex_unlock(&trace_types_lock); 2981 mutex_unlock(&trace_types_lock);
2901 2982
@@ -3055,6 +3136,7 @@ waitagain:
3055 iter->pos = -1; 3136 iter->pos = -1;
3056 3137
3057 trace_event_read_lock(); 3138 trace_event_read_lock();
3139 trace_access_lock(iter->cpu_file);
3058 while (find_next_entry_inc(iter) != NULL) { 3140 while (find_next_entry_inc(iter) != NULL) {
3059 enum print_line_t ret; 3141 enum print_line_t ret;
3060 int len = iter->seq.len; 3142 int len = iter->seq.len;
@@ -3071,6 +3153,7 @@ waitagain:
3071 if (iter->seq.len >= cnt) 3153 if (iter->seq.len >= cnt)
3072 break; 3154 break;
3073 } 3155 }
3156 trace_access_unlock(iter->cpu_file);
3074 trace_event_read_unlock(); 3157 trace_event_read_unlock();
3075 3158
3076 /* Now copy what we have to the user */ 3159 /* Now copy what we have to the user */
@@ -3103,7 +3186,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3103 __free_page(spd->pages[idx]); 3186 __free_page(spd->pages[idx]);
3104} 3187}
3105 3188
3106static struct pipe_buf_operations tracing_pipe_buf_ops = { 3189static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3107 .can_merge = 0, 3190 .can_merge = 0,
3108 .map = generic_pipe_buf_map, 3191 .map = generic_pipe_buf_map,
3109 .unmap = generic_pipe_buf_unmap, 3192 .unmap = generic_pipe_buf_unmap,
@@ -3196,6 +3279,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3196 } 3279 }
3197 3280
3198 trace_event_read_lock(); 3281 trace_event_read_lock();
3282 trace_access_lock(iter->cpu_file);
3199 3283
3200 /* Fill as many pages as possible. */ 3284 /* Fill as many pages as possible. */
3201 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3285 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3219,6 +3303,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3219 trace_seq_init(&iter->seq); 3303 trace_seq_init(&iter->seq);
3220 } 3304 }
3221 3305
3306 trace_access_unlock(iter->cpu_file);
3222 trace_event_read_unlock(); 3307 trace_event_read_unlock();
3223 mutex_unlock(&iter->mutex); 3308 mutex_unlock(&iter->mutex);
3224 3309
@@ -3334,7 +3419,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3334 size_t cnt, loff_t *fpos) 3419 size_t cnt, loff_t *fpos)
3335{ 3420{
3336 char *buf; 3421 char *buf;
3337 char *end;
3338 3422
3339 if (tracing_disabled) 3423 if (tracing_disabled)
3340 return -EINVAL; 3424 return -EINVAL;
@@ -3342,7 +3426,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3342 if (cnt > TRACE_BUF_SIZE) 3426 if (cnt > TRACE_BUF_SIZE)
3343 cnt = TRACE_BUF_SIZE; 3427 cnt = TRACE_BUF_SIZE;
3344 3428
3345 buf = kmalloc(cnt + 1, GFP_KERNEL); 3429 buf = kmalloc(cnt + 2, GFP_KERNEL);
3346 if (buf == NULL) 3430 if (buf == NULL)
3347 return -ENOMEM; 3431 return -ENOMEM;
3348 3432
@@ -3350,35 +3434,31 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3350 kfree(buf); 3434 kfree(buf);
3351 return -EFAULT; 3435 return -EFAULT;
3352 } 3436 }
3437 if (buf[cnt-1] != '\n') {
3438 buf[cnt] = '\n';
3439 buf[cnt+1] = '\0';
3440 } else
3441 buf[cnt] = '\0';
3353 3442
3354 /* Cut from the first nil or newline. */ 3443 cnt = mark_printk("%s", buf);
3355 buf[cnt] = '\0';
3356 end = strchr(buf, '\n');
3357 if (end)
3358 *end = '\0';
3359
3360 cnt = mark_printk("%s\n", buf);
3361 kfree(buf); 3444 kfree(buf);
3362 *fpos += cnt; 3445 *fpos += cnt;
3363 3446
3364 return cnt; 3447 return cnt;
3365} 3448}
3366 3449
3367static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf, 3450static int tracing_clock_show(struct seq_file *m, void *v)
3368 size_t cnt, loff_t *ppos)
3369{ 3451{
3370 char buf[64];
3371 int bufiter = 0;
3372 int i; 3452 int i;
3373 3453
3374 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 3454 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3375 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, 3455 seq_printf(m,
3376 "%s%s%s%s", i ? " " : "", 3456 "%s%s%s%s", i ? " " : "",
3377 i == trace_clock_id ? "[" : "", trace_clocks[i].name, 3457 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3378 i == trace_clock_id ? "]" : ""); 3458 i == trace_clock_id ? "]" : "");
3379 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n"); 3459 seq_putc(m, '\n');
3380 3460
3381 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter); 3461 return 0;
3382} 3462}
3383 3463
3384static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 3464static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3420,6 +3500,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3420 return cnt; 3500 return cnt;
3421} 3501}
3422 3502
3503static int tracing_clock_open(struct inode *inode, struct file *file)
3504{
3505 if (tracing_disabled)
3506 return -ENODEV;
3507 return single_open(file, tracing_clock_show, NULL);
3508}
3509
3423static const struct file_operations tracing_max_lat_fops = { 3510static const struct file_operations tracing_max_lat_fops = {
3424 .open = tracing_open_generic, 3511 .open = tracing_open_generic,
3425 .read = tracing_max_lat_read, 3512 .read = tracing_max_lat_read,
@@ -3458,8 +3545,10 @@ static const struct file_operations tracing_mark_fops = {
3458}; 3545};
3459 3546
3460static const struct file_operations trace_clock_fops = { 3547static const struct file_operations trace_clock_fops = {
3461 .open = tracing_open_generic, 3548 .open = tracing_clock_open,
3462 .read = tracing_clock_read, 3549 .read = seq_read,
3550 .llseek = seq_lseek,
3551 .release = single_release,
3463 .write = tracing_clock_write, 3552 .write = tracing_clock_write,
3464}; 3553};
3465 3554
@@ -3516,10 +3605,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3516 3605
3517 info->read = 0; 3606 info->read = 0;
3518 3607
3608 trace_access_lock(info->cpu);
3519 ret = ring_buffer_read_page(info->tr->buffer, 3609 ret = ring_buffer_read_page(info->tr->buffer,
3520 &info->spare, 3610 &info->spare,
3521 count, 3611 count,
3522 info->cpu, 0); 3612 info->cpu, 0);
3613 trace_access_unlock(info->cpu);
3523 if (ret < 0) 3614 if (ret < 0)
3524 return 0; 3615 return 0;
3525 3616
@@ -3589,7 +3680,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3589} 3680}
3590 3681
3591/* Pipe buffer operations for a buffer. */ 3682/* Pipe buffer operations for a buffer. */
3592static struct pipe_buf_operations buffer_pipe_buf_ops = { 3683static const struct pipe_buf_operations buffer_pipe_buf_ops = {
3593 .can_merge = 0, 3684 .can_merge = 0,
3594 .map = generic_pipe_buf_map, 3685 .map = generic_pipe_buf_map,
3595 .unmap = generic_pipe_buf_unmap, 3686 .unmap = generic_pipe_buf_unmap,
@@ -3647,6 +3738,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3647 len &= PAGE_MASK; 3738 len &= PAGE_MASK;
3648 } 3739 }
3649 3740
3741 trace_access_lock(info->cpu);
3650 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3742 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3651 3743
3652 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3744 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3694,6 +3786,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3694 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3786 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3695 } 3787 }
3696 3788
3789 trace_access_unlock(info->cpu);
3697 spd.nr_pages = i; 3790 spd.nr_pages = i;
3698 3791
3699 /* did we read anything? */ 3792 /* did we read anything? */
@@ -3730,7 +3823,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3730 3823
3731 s = kmalloc(sizeof(*s), GFP_KERNEL); 3824 s = kmalloc(sizeof(*s), GFP_KERNEL);
3732 if (!s) 3825 if (!s)
3733 return ENOMEM; 3826 return -ENOMEM;
3734 3827
3735 trace_seq_init(s); 3828 trace_seq_init(s);
3736 3829
@@ -3920,39 +4013,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3920 if (ret < 0) 4013 if (ret < 0)
3921 return ret; 4014 return ret;
3922 4015
3923 ret = 0; 4016 if (val != 0 && val != 1)
3924 switch (val) { 4017 return -EINVAL;
3925 case 0:
3926 /* do nothing if already cleared */
3927 if (!(topt->flags->val & topt->opt->bit))
3928 break;
3929
3930 mutex_lock(&trace_types_lock);
3931 if (current_trace->set_flag)
3932 ret = current_trace->set_flag(topt->flags->val,
3933 topt->opt->bit, 0);
3934 mutex_unlock(&trace_types_lock);
3935 if (ret)
3936 return ret;
3937 topt->flags->val &= ~topt->opt->bit;
3938 break;
3939 case 1:
3940 /* do nothing if already set */
3941 if (topt->flags->val & topt->opt->bit)
3942 break;
3943 4018
4019 if (!!(topt->flags->val & topt->opt->bit) != val) {
3944 mutex_lock(&trace_types_lock); 4020 mutex_lock(&trace_types_lock);
3945 if (current_trace->set_flag) 4021 ret = __set_tracer_option(current_trace, topt->flags,
3946 ret = current_trace->set_flag(topt->flags->val, 4022 topt->opt, !val);
3947 topt->opt->bit, 1);
3948 mutex_unlock(&trace_types_lock); 4023 mutex_unlock(&trace_types_lock);
3949 if (ret) 4024 if (ret)
3950 return ret; 4025 return ret;
3951 topt->flags->val |= topt->opt->bit;
3952 break;
3953
3954 default:
3955 return -EINVAL;
3956 } 4026 }
3957 4027
3958 *ppos += cnt; 4028 *ppos += cnt;
@@ -4153,6 +4223,8 @@ static __init int tracer_init_debugfs(void)
4153 struct dentry *d_tracer; 4223 struct dentry *d_tracer;
4154 int cpu; 4224 int cpu;
4155 4225
4226 trace_access_lock_init();
4227
4156 d_tracer = tracing_init_dentry(); 4228 d_tracer = tracing_init_dentry();
4157 4229
4158 trace_create_file("tracing_enabled", 0644, d_tracer, 4230 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4279,8 +4351,8 @@ trace_printk_seq(struct trace_seq *s)
4279 4351
4280static void __ftrace_dump(bool disable_tracing) 4352static void __ftrace_dump(bool disable_tracing)
4281{ 4353{
4282 static raw_spinlock_t ftrace_dump_lock = 4354 static arch_spinlock_t ftrace_dump_lock =
4283 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 4355 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
4284 /* use static because iter can be a bit big for the stack */ 4356 /* use static because iter can be a bit big for the stack */
4285 static struct trace_iterator iter; 4357 static struct trace_iterator iter;
4286 unsigned int old_userobj; 4358 unsigned int old_userobj;
@@ -4290,7 +4362,7 @@ static void __ftrace_dump(bool disable_tracing)
4290 4362
4291 /* only one dump */ 4363 /* only one dump */
4292 local_irq_save(flags); 4364 local_irq_save(flags);
4293 __raw_spin_lock(&ftrace_dump_lock); 4365 arch_spin_lock(&ftrace_dump_lock);
4294 if (dump_ran) 4366 if (dump_ran)
4295 goto out; 4367 goto out;
4296 4368
@@ -4365,7 +4437,7 @@ static void __ftrace_dump(bool disable_tracing)
4365 } 4437 }
4366 4438
4367 out: 4439 out:
4368 __raw_spin_unlock(&ftrace_dump_lock); 4440 arch_spin_unlock(&ftrace_dump_lock);
4369 local_irq_restore(flags); 4441 local_irq_restore(flags);
4370} 4442}
4371 4443
@@ -4387,9 +4459,6 @@ __init static int tracer_alloc_buffers(void)
4387 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4459 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4388 goto out_free_buffer_mask; 4460 goto out_free_buffer_mask;
4389 4461
4390 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4391 goto out_free_tracing_cpumask;
4392
4393 /* To save memory, keep the ring buffer size to its minimum */ 4462 /* To save memory, keep the ring buffer size to its minimum */
4394 if (ring_buffer_expanded) 4463 if (ring_buffer_expanded)
4395 ring_buf_size = trace_buf_size; 4464 ring_buf_size = trace_buf_size;
@@ -4426,7 +4495,7 @@ __init static int tracer_alloc_buffers(void)
4426 /* Allocate the first page for all buffers */ 4495 /* Allocate the first page for all buffers */
4427 for_each_tracing_cpu(i) { 4496 for_each_tracing_cpu(i) {
4428 global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4497 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4429 max_tr.data[i] = &per_cpu(max_data, i); 4498 max_tr.data[i] = &per_cpu(max_tr_data, i);
4430 } 4499 }
4431 4500
4432 trace_init_cmdlines(); 4501 trace_init_cmdlines();
@@ -4447,8 +4516,6 @@ __init static int tracer_alloc_buffers(void)
4447 return 0; 4516 return 0;
4448 4517
4449out_free_cpumask: 4518out_free_cpumask:
4450 free_cpumask_var(tracing_reader_cpumask);
4451out_free_tracing_cpumask:
4452 free_cpumask_var(tracing_cpumask); 4519 free_cpumask_var(tracing_cpumask);
4453out_free_buffer_mask: 4520out_free_buffer_mask:
4454 free_cpumask_var(tracing_buffer_mask); 4521 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 405cb850b75d..fd05bcaf91b0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -98,9 +100,32 @@ struct syscall_trace_enter {
98struct syscall_trace_exit { 100struct syscall_trace_exit {
99 struct trace_entry ent; 101 struct trace_entry ent;
100 int nr; 102 int nr;
101 unsigned long ret; 103 long ret;
102}; 104};
103 105
106struct kprobe_trace_entry {
107 struct trace_entry ent;
108 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111};
112
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent;
119 unsigned long func;
120 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123};
124
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
104/* 129/*
105 * trace_flag_type is an enumeration that holds different 130 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 131 * states when a trace occurs. These are:
@@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void);
209 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
211 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
212 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
213 } while (0) 239 } while (0)
214 240
@@ -246,6 +272,7 @@ struct tracer_flags {
246 * @pipe_open: called when the trace_pipe file is opened 272 * @pipe_open: called when the trace_pipe file is opened
247 * @wait_pipe: override how the user waits for traces on trace_pipe 273 * @wait_pipe: override how the user waits for traces on trace_pipe
248 * @close: called when the trace file is released 274 * @close: called when the trace file is released
275 * @pipe_close: called when the trace_pipe file is released
249 * @read: override the default read callback on trace_pipe 276 * @read: override the default read callback on trace_pipe
250 * @splice_read: override the default splice_read callback on trace_pipe 277 * @splice_read: override the default splice_read callback on trace_pipe
251 * @selftest: selftest to run on boot (see trace_selftest.c) 278 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -264,6 +291,7 @@ struct tracer {
264 void (*pipe_open)(struct trace_iterator *iter); 291 void (*pipe_open)(struct trace_iterator *iter);
265 void (*wait_pipe)(struct trace_iterator *iter); 292 void (*wait_pipe)(struct trace_iterator *iter);
266 void (*close)(struct trace_iterator *iter); 293 void (*close)(struct trace_iterator *iter);
294 void (*pipe_close)(struct trace_iterator *iter);
267 ssize_t (*read)(struct trace_iterator *iter, 295 ssize_t (*read)(struct trace_iterator *iter,
268 struct file *filp, char __user *ubuf, 296 struct file *filp, char __user *ubuf,
269 size_t cnt, loff_t *ppos); 297 size_t cnt, loff_t *ppos);
@@ -364,6 +392,8 @@ int register_tracer(struct tracer *type);
364void unregister_tracer(struct tracer *type); 392void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void); 393int is_tracing_stopped(void);
366 394
395extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396
367extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
368 398
369#ifdef CONFIG_TRACER_MAX_TRACE 399#ifdef CONFIG_TRACER_MAX_TRACE
@@ -413,7 +443,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
413 443
414extern int ring_buffer_expanded; 444extern int ring_buffer_expanded;
415extern bool tracing_selftest_disabled; 445extern bool tracing_selftest_disabled;
416DECLARE_PER_CPU(local_t, ftrace_cpu_disabled); 446DECLARE_PER_CPU(int, ftrace_cpu_disabled);
417 447
418#ifdef CONFIG_FTRACE_STARTUP_TEST 448#ifdef CONFIG_FTRACE_STARTUP_TEST
419extern int trace_selftest_startup_function(struct tracer *trace, 449extern int trace_selftest_startup_function(struct tracer *trace,
@@ -438,6 +468,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
438 struct trace_array *tr); 468 struct trace_array *tr);
439extern int trace_selftest_startup_hw_branches(struct tracer *trace, 469extern int trace_selftest_startup_hw_branches(struct tracer *trace,
440 struct trace_array *tr); 470 struct trace_array *tr);
471extern int trace_selftest_startup_ksym(struct tracer *trace,
472 struct trace_array *tr);
441#endif /* CONFIG_FTRACE_STARTUP_TEST */ 473#endif /* CONFIG_FTRACE_STARTUP_TEST */
442 474
443extern void *head_page(struct trace_array_cpu *data); 475extern void *head_page(struct trace_array_cpu *data);
@@ -465,6 +497,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
465#ifdef CONFIG_DYNAMIC_FTRACE 497#ifdef CONFIG_DYNAMIC_FTRACE
466/* TODO: make this variable */ 498/* TODO: make this variable */
467#define FTRACE_GRAPH_MAX_FUNCS 32 499#define FTRACE_GRAPH_MAX_FUNCS 32
500extern int ftrace_graph_filter_enabled;
468extern int ftrace_graph_count; 501extern int ftrace_graph_count;
469extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 502extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
470 503
@@ -472,7 +505,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
472{ 505{
473 int i; 506 int i;
474 507
475 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 508 if (!ftrace_graph_filter_enabled)
476 return 1; 509 return 1;
477 510
478 for (i = 0; i < ftrace_graph_count; i++) { 511 for (i = 0; i < ftrace_graph_count; i++) {
@@ -483,10 +516,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
483 return 0; 516 return 0;
484} 517}
485#else 518#else
486static inline int ftrace_trace_addr(unsigned long addr)
487{
488 return 1;
489}
490static inline int ftrace_graph_addr(unsigned long addr) 519static inline int ftrace_graph_addr(unsigned long addr)
491{ 520{
492 return 1; 521 return 1;
@@ -500,12 +529,12 @@ print_graph_function(struct trace_iterator *iter)
500} 529}
501#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 530#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
502 531
503extern struct pid *ftrace_pid_trace; 532extern struct list_head ftrace_pids;
504 533
505#ifdef CONFIG_FUNCTION_TRACER 534#ifdef CONFIG_FUNCTION_TRACER
506static inline int ftrace_trace_task(struct task_struct *task) 535static inline int ftrace_trace_task(struct task_struct *task)
507{ 536{
508 if (!ftrace_pid_trace) 537 if (list_empty(&ftrace_pids))
509 return 1; 538 return 1;
510 539
511 return test_tsk_trace_trace(task); 540 return test_tsk_trace_trace(task);
@@ -569,18 +598,17 @@ enum trace_iterator_flags {
569 TRACE_ITER_BIN = 0x40, 598 TRACE_ITER_BIN = 0x40,
570 TRACE_ITER_BLOCK = 0x80, 599 TRACE_ITER_BLOCK = 0x80,
571 TRACE_ITER_STACKTRACE = 0x100, 600 TRACE_ITER_STACKTRACE = 0x100,
572 TRACE_ITER_SCHED_TREE = 0x200, 601 TRACE_ITER_PRINTK = 0x200,
573 TRACE_ITER_PRINTK = 0x400, 602 TRACE_ITER_PREEMPTONLY = 0x400,
574 TRACE_ITER_PREEMPTONLY = 0x800, 603 TRACE_ITER_BRANCH = 0x800,
575 TRACE_ITER_BRANCH = 0x1000, 604 TRACE_ITER_ANNOTATE = 0x1000,
576 TRACE_ITER_ANNOTATE = 0x2000, 605 TRACE_ITER_USERSTACKTRACE = 0x2000,
577 TRACE_ITER_USERSTACKTRACE = 0x4000, 606 TRACE_ITER_SYM_USEROBJ = 0x4000,
578 TRACE_ITER_SYM_USEROBJ = 0x8000, 607 TRACE_ITER_PRINTK_MSGONLY = 0x8000,
579 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 608 TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
580 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 609 TRACE_ITER_LATENCY_FMT = 0x20000,
581 TRACE_ITER_LATENCY_FMT = 0x40000, 610 TRACE_ITER_SLEEP_TIME = 0x40000,
582 TRACE_ITER_SLEEP_TIME = 0x80000, 611 TRACE_ITER_GRAPH_TIME = 0x80000,
583 TRACE_ITER_GRAPH_TIME = 0x100000,
584}; 612};
585 613
586/* 614/*
@@ -687,7 +715,6 @@ struct event_filter {
687 int n_preds; 715 int n_preds;
688 struct filter_pred **preds; 716 struct filter_pred **preds;
689 char *filter_string; 717 char *filter_string;
690 bool no_reset;
691}; 718};
692 719
693struct event_subsystem { 720struct event_subsystem {
@@ -699,22 +726,40 @@ struct event_subsystem {
699}; 726};
700 727
701struct filter_pred; 728struct filter_pred;
729struct regex;
702 730
703typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 731typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
704 int val1, int val2); 732 int val1, int val2);
705 733
734typedef int (*regex_match_func)(char *str, struct regex *r, int len);
735
736enum regex_type {
737 MATCH_FULL = 0,
738 MATCH_FRONT_ONLY,
739 MATCH_MIDDLE_ONLY,
740 MATCH_END_ONLY,
741};
742
743struct regex {
744 char pattern[MAX_FILTER_STR_VAL];
745 int len;
746 int field_len;
747 regex_match_func match;
748};
749
706struct filter_pred { 750struct filter_pred {
707 filter_pred_fn_t fn; 751 filter_pred_fn_t fn;
708 u64 val; 752 u64 val;
709 char str_val[MAX_FILTER_STR_VAL]; 753 struct regex regex;
710 int str_len; 754 char *field_name;
711 char *field_name; 755 int offset;
712 int offset; 756 int not;
713 int not; 757 int op;
714 int op; 758 int pop_n;
715 int pop_n;
716}; 759};
717 760
761extern enum regex_type
762filter_parse_regex(char *buff, int len, char **search, int *not);
718extern void print_event_filter(struct ftrace_event_call *call, 763extern void print_event_filter(struct ftrace_event_call *call,
719 struct trace_seq *s); 764 struct trace_seq *s);
720extern int apply_event_filter(struct ftrace_event_call *call, 765extern int apply_event_filter(struct ftrace_event_call *call,
@@ -730,7 +775,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
730 struct ring_buffer *buffer, 775 struct ring_buffer *buffer,
731 struct ring_buffer_event *event) 776 struct ring_buffer_event *event)
732{ 777{
733 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { 778 if (unlikely(call->filter_active) &&
779 !filter_match_preds(call->filter, rec)) {
734 ring_buffer_discard_commit(buffer, event); 780 ring_buffer_discard_commit(buffer, event);
735 return 1; 781 return 1;
736 } 782 }
@@ -746,7 +792,8 @@ extern const char *__stop___trace_bprintk_fmt[];
746 792
747#undef FTRACE_ENTRY 793#undef FTRACE_ENTRY
748#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 794#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
749 extern struct ftrace_event_call event_##call; 795 extern struct ftrace_event_call \
796 __attribute__((__aligned__(4))) event_##call;
750#undef FTRACE_ENTRY_DUP 797#undef FTRACE_ENTRY_DUP
751#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 798#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
752 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 799 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..b9bc4d470177 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
307 return -1; 307 return -1;
308 if (percent_a > percent_b) 308 if (percent_a > percent_b)
309 return 1; 309 return 1;
310 else 310
311 return 0; 311 if (a->incorrect < b->incorrect)
312 return -1;
313 if (a->incorrect > b->incorrect)
314 return 1;
315
316 /*
317 * Since the above shows worse (incorrect) cases
318 * first, we continue that by showing best (correct)
319 * cases last.
320 */
321 if (a->correct > b->correct)
322 return -1;
323 if (a->correct < b->correct)
324 return 1;
325
326 return 0;
312} 327}
313 328
314static struct tracer_stat annotated_branch_stats = { 329static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 20c5f92e28a8..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -20,6 +20,8 @@
20#include <linux/ktime.h> 20#include <linux/ktime.h>
21#include <linux/trace_clock.h> 21#include <linux/trace_clock.h>
22 22
23#include "trace.h"
24
23/* 25/*
24 * trace_clock_local(): the simplest and least coherent tracing clock. 26 * trace_clock_local(): the simplest and least coherent tracing clock.
25 * 27 *
@@ -28,17 +30,17 @@
28 */ 30 */
29u64 notrace trace_clock_local(void) 31u64 notrace trace_clock_local(void)
30{ 32{
31 unsigned long flags;
32 u64 clock; 33 u64 clock;
34 int resched;
33 35
34 /* 36 /*
35 * sched_clock() is an architecture implemented, fast, scalable, 37 * sched_clock() is an architecture implemented, fast, scalable,
36 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
37 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
38 */ 40 */
39 raw_local_irq_save(flags); 41 resched = ftrace_preempt_disable();
40 clock = sched_clock(); 42 clock = sched_clock();
41 raw_local_irq_restore(flags); 43 ftrace_preempt_enable(resched);
42 44
43 return clock; 45 return clock;
44} 46}
@@ -69,10 +71,10 @@ u64 notrace trace_clock(void)
69/* keep prev_time and lock in the same cacheline. */ 71/* keep prev_time and lock in the same cacheline. */
70static struct { 72static struct {
71 u64 prev_time; 73 u64 prev_time;
72 raw_spinlock_t lock; 74 arch_spinlock_t lock;
73} trace_clock_struct ____cacheline_aligned_in_smp = 75} trace_clock_struct ____cacheline_aligned_in_smp =
74 { 76 {
75 .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, 77 .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
76 }; 78 };
77 79
78u64 notrace trace_clock_global(void) 80u64 notrace trace_clock_global(void)
@@ -92,7 +94,7 @@ u64 notrace trace_clock_global(void)
92 if (unlikely(in_nmi())) 94 if (unlikely(in_nmi()))
93 goto out; 95 goto out;
94 96
95 __raw_spin_lock(&trace_clock_struct.lock); 97 arch_spin_lock(&trace_clock_struct.lock);
96 98
97 /* 99 /*
98 * TODO: if this happens often then maybe we should reset 100 * TODO: if this happens often then maybe we should reset
@@ -104,7 +106,7 @@ u64 notrace trace_clock_global(void)
104 106
105 trace_clock_struct.prev_time = now; 107 trace_clock_struct.prev_time = now;
106 108
107 __raw_spin_unlock(&trace_clock_struct.lock); 109 arch_spin_unlock(&trace_clock_struct.lock);
108 110
109 out: 111 out:
110 raw_local_irq_restore(flags); 112 raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 8d5c171cc998..f0d693005075 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -6,19 +6,14 @@
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/kprobes.h>
9#include "trace.h" 10#include "trace.h"
10 11
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 12
17char *trace_profile_buf; 13static char *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 14static char *perf_trace_buf_nmi;
19 15
20char *trace_profile_buf_nmi; 16typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22 17
23/* Count the events in use (per event id, not per instance) */ 18/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 19static int total_profile_count;
@@ -28,24 +23,24 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
28 char *buf; 23 char *buf;
29 int ret = -ENOMEM; 24 int ret = -ENOMEM;
30 25
31 if (atomic_inc_return(&event->profile_count)) 26 if (event->profile_count++ > 0)
32 return 0; 27 return 0;
33 28
34 if (!total_profile_count) { 29 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 30 buf = (char *)alloc_percpu(perf_trace_t);
36 if (!buf) 31 if (!buf)
37 goto fail_buf; 32 goto fail_buf;
38 33
39 rcu_assign_pointer(trace_profile_buf, buf); 34 rcu_assign_pointer(perf_trace_buf, buf);
40 35
41 buf = (char *)alloc_percpu(profile_buf_t); 36 buf = (char *)alloc_percpu(perf_trace_t);
42 if (!buf) 37 if (!buf)
43 goto fail_buf_nmi; 38 goto fail_buf_nmi;
44 39
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 40 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 41 }
47 42
48 ret = event->profile_enable(); 43 ret = event->profile_enable(event);
49 if (!ret) { 44 if (!ret) {
50 total_profile_count++; 45 total_profile_count++;
51 return 0; 46 return 0;
@@ -53,13 +48,13 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
53 48
54fail_buf_nmi: 49fail_buf_nmi:
55 if (!total_profile_count) { 50 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi); 51 free_percpu(perf_trace_buf_nmi);
57 free_percpu(trace_profile_buf); 52 free_percpu(perf_trace_buf);
58 trace_profile_buf_nmi = NULL; 53 perf_trace_buf_nmi = NULL;
59 trace_profile_buf = NULL; 54 perf_trace_buf = NULL;
60 } 55 }
61fail_buf: 56fail_buf:
62 atomic_dec(&event->profile_count); 57 event->profile_count--;
63 58
64 return ret; 59 return ret;
65} 60}
@@ -86,17 +81,17 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
86{ 81{
87 char *buf, *nmi_buf; 82 char *buf, *nmi_buf;
88 83
89 if (!atomic_add_negative(-1, &event->profile_count)) 84 if (--event->profile_count > 0)
90 return; 85 return;
91 86
92 event->profile_disable(); 87 event->profile_disable(event);
93 88
94 if (!--total_profile_count) { 89 if (!--total_profile_count) {
95 buf = trace_profile_buf; 90 buf = perf_trace_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL); 91 rcu_assign_pointer(perf_trace_buf, NULL);
97 92
98 nmi_buf = trace_profile_buf_nmi; 93 nmi_buf = perf_trace_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 94 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
100 95
101 /* 96 /*
102 * Ensure every events in profiling have finished before 97 * Ensure every events in profiling have finished before
@@ -123,3 +118,47 @@ void ftrace_profile_disable(int event_id)
123 } 118 }
124 mutex_unlock(&event_mutex); 119 mutex_unlock(&event_mutex);
125} 120}
121
122__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
123 int *rctxp, unsigned long *irq_flags)
124{
125 struct trace_entry *entry;
126 char *trace_buf, *raw_data;
127 int pc, cpu;
128
129 pc = preempt_count();
130
131 /* Protect the per cpu buffer, begin the rcu read side */
132 local_irq_save(*irq_flags);
133
134 *rctxp = perf_swevent_get_recursion_context();
135 if (*rctxp < 0)
136 goto err_recursion;
137
138 cpu = smp_processor_id();
139
140 if (in_nmi())
141 trace_buf = rcu_dereference(perf_trace_buf_nmi);
142 else
143 trace_buf = rcu_dereference(perf_trace_buf);
144
145 if (!trace_buf)
146 goto err;
147
148 raw_data = per_cpu_ptr(trace_buf, cpu);
149
150 /* zero the dead bytes from align to not leak stack to user */
151 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
152
153 entry = (struct trace_entry *)raw_data;
154 tracing_generic_entry_update(entry, *irq_flags, pc);
155 entry->type = type;
156
157 return raw_data;
158err:
159 perf_swevent_put_recursion_context(*rctxp);
160err_recursion:
161 local_irq_restore(*irq_flags);
162 return NULL;
163}
164EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d128f65778e6..3f972ad98d04 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -60,10 +60,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
60 return 0; 60 return 0;
61 61
62err: 62err:
63 if (field) { 63 if (field)
64 kfree(field->name); 64 kfree(field->name);
65 kfree(field->type);
66 }
67 kfree(field); 65 kfree(field);
68 66
69 return -ENOMEM; 67 return -ENOMEM;
@@ -78,7 +76,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
78 if (ret) \ 76 if (ret) \
79 return ret; 77 return ret;
80 78
81int trace_define_common_fields(struct ftrace_event_call *call) 79static int trace_define_common_fields(struct ftrace_event_call *call)
82{ 80{
83 int ret; 81 int ret;
84 struct trace_entry ent; 82 struct trace_entry ent;
@@ -91,11 +89,8 @@ int trace_define_common_fields(struct ftrace_event_call *call)
91 89
92 return ret; 90 return ret;
93} 91}
94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95
96#ifdef CONFIG_MODULES
97 92
98static void trace_destroy_fields(struct ftrace_event_call *call) 93void trace_destroy_fields(struct ftrace_event_call *call)
99{ 94{
100 struct ftrace_event_field *field, *next; 95 struct ftrace_event_field *field, *next;
101 96
@@ -107,27 +102,49 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 102 }
108} 103}
109 104
110#endif /* CONFIG_MODULES */ 105int trace_event_raw_init(struct ftrace_event_call *call)
106{
107 int id;
111 108
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 109 id = register_ftrace_event(call->event);
110 if (!id)
111 return -ENODEV;
112 call->id = id;
113 INIT_LIST_HEAD(&call->fields);
114
115 return 0;
116}
117EXPORT_SYMBOL_GPL(trace_event_raw_init);
118
119static int ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 120 int enable)
114{ 121{
122 int ret = 0;
123
115 switch (enable) { 124 switch (enable) {
116 case 0: 125 case 0:
117 if (call->enabled) { 126 if (call->enabled) {
118 call->enabled = 0; 127 call->enabled = 0;
119 tracing_stop_cmdline_record(); 128 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 129 call->unregfunc(call);
121 } 130 }
122 break; 131 break;
123 case 1: 132 case 1:
124 if (!call->enabled) { 133 if (!call->enabled) {
125 call->enabled = 1;
126 tracing_start_cmdline_record(); 134 tracing_start_cmdline_record();
127 call->regfunc(call->data); 135 ret = call->regfunc(call);
136 if (ret) {
137 tracing_stop_cmdline_record();
138 pr_info("event trace: Could not enable event "
139 "%s\n", call->name);
140 break;
141 }
142 call->enabled = 1;
128 } 143 }
129 break; 144 break;
130 } 145 }
146
147 return ret;
131} 148}
132 149
133static void ftrace_clear_events(void) 150static void ftrace_clear_events(void)
@@ -406,7 +423,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
406 case 0: 423 case 0:
407 case 1: 424 case 1:
408 mutex_lock(&event_mutex); 425 mutex_lock(&event_mutex);
409 ftrace_event_enable_disable(call, val); 426 ret = ftrace_event_enable_disable(call, val);
410 mutex_unlock(&event_mutex); 427 mutex_unlock(&event_mutex);
411 break; 428 break;
412 429
@@ -416,7 +433,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
416 433
417 *ppos += cnt; 434 *ppos += cnt;
418 435
419 return cnt; 436 return ret ? ret : cnt;
420} 437}
421 438
422static ssize_t 439static ssize_t
@@ -501,41 +518,16 @@ out:
501 return ret; 518 return ret;
502} 519}
503 520
504extern char *__bad_type_size(void);
505
506#undef FIELD
507#define FIELD(type, name) \
508 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
509 #type, "common_" #name, offsetof(typeof(field), name), \
510 sizeof(field.name)
511
512static int trace_write_header(struct trace_seq *s)
513{
514 struct trace_entry field;
515
516 /* struct trace_entry */
517 return trace_seq_printf(s,
518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
519 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
520 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
521 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
522 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
523 "\n",
524 FIELD(unsigned short, type),
525 FIELD(unsigned char, flags),
526 FIELD(unsigned char, preempt_count),
527 FIELD(int, pid),
528 FIELD(int, lock_depth));
529}
530
531static ssize_t 521static ssize_t
532event_format_read(struct file *filp, char __user *ubuf, size_t cnt, 522event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
533 loff_t *ppos) 523 loff_t *ppos)
534{ 524{
535 struct ftrace_event_call *call = filp->private_data; 525 struct ftrace_event_call *call = filp->private_data;
526 struct ftrace_event_field *field;
536 struct trace_seq *s; 527 struct trace_seq *s;
528 int common_field_count = 5;
537 char *buf; 529 char *buf;
538 int r; 530 int r = 0;
539 531
540 if (*ppos) 532 if (*ppos)
541 return 0; 533 return 0;
@@ -546,14 +538,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
546 538
547 trace_seq_init(s); 539 trace_seq_init(s);
548 540
549 /* If any of the first writes fail, so will the show_format. */
550
551 trace_seq_printf(s, "name: %s\n", call->name); 541 trace_seq_printf(s, "name: %s\n", call->name);
552 trace_seq_printf(s, "ID: %d\n", call->id); 542 trace_seq_printf(s, "ID: %d\n", call->id);
553 trace_seq_printf(s, "format:\n"); 543 trace_seq_printf(s, "format:\n");
554 trace_write_header(s);
555 544
556 r = call->show_format(call, s); 545 list_for_each_entry_reverse(field, &call->fields, link) {
546 /*
547 * Smartly shows the array type(except dynamic array).
548 * Normal:
549 * field:TYPE VAR
550 * If TYPE := TYPE[LEN], it is shown:
551 * field:TYPE VAR[LEN]
552 */
553 const char *array_descriptor = strchr(field->type, '[');
554
555 if (!strncmp(field->type, "__data_loc", 10))
556 array_descriptor = NULL;
557
558 if (!array_descriptor) {
559 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
560 "\tsize:%u;\tsigned:%d;\n",
561 field->type, field->name, field->offset,
562 field->size, !!field->is_signed);
563 } else {
564 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
565 "\tsize:%u;\tsigned:%d;\n",
566 (int)(array_descriptor - field->type),
567 field->type, field->name,
568 array_descriptor, field->offset,
569 field->size, !!field->is_signed);
570 }
571
572 if (--common_field_count == 0)
573 r = trace_seq_printf(s, "\n");
574
575 if (!r)
576 break;
577 }
578
579 if (r)
580 r = trace_seq_printf(s, "\nprint fmt: %s\n",
581 call->print_fmt);
582
557 if (!r) { 583 if (!r) {
558 /* 584 /*
559 * ug! The format output is bigger than a PAGE!! 585 * ug! The format output is bigger than a PAGE!!
@@ -878,9 +904,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
878 "'%s/filter' entry\n", name); 904 "'%s/filter' entry\n", name);
879 } 905 }
880 906
881 entry = trace_create_file("enable", 0644, system->entry, 907 trace_create_file("enable", 0644, system->entry,
882 (void *)system->name, 908 (void *)system->name,
883 &ftrace_system_enable_fops); 909 &ftrace_system_enable_fops);
884 910
885 return system->entry; 911 return system->entry;
886} 912}
@@ -892,7 +918,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
892 const struct file_operations *filter, 918 const struct file_operations *filter,
893 const struct file_operations *format) 919 const struct file_operations *format)
894{ 920{
895 struct dentry *entry;
896 int ret; 921 int ret;
897 922
898 /* 923 /*
@@ -910,55 +935,72 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
910 } 935 }
911 936
912 if (call->regfunc) 937 if (call->regfunc)
913 entry = trace_create_file("enable", 0644, call->dir, call, 938 trace_create_file("enable", 0644, call->dir, call,
914 enable); 939 enable);
915 940
916 if (call->id && call->profile_enable) 941 if (call->id && call->profile_enable)
917 entry = trace_create_file("id", 0444, call->dir, call, 942 trace_create_file("id", 0444, call->dir, call,
918 id); 943 id);
919 944
920 if (call->define_fields) { 945 if (call->define_fields) {
921 ret = call->define_fields(call); 946 ret = trace_define_common_fields(call);
947 if (!ret)
948 ret = call->define_fields(call);
922 if (ret < 0) { 949 if (ret < 0) {
923 pr_warning("Could not initialize trace point" 950 pr_warning("Could not initialize trace point"
924 " events/%s\n", call->name); 951 " events/%s\n", call->name);
925 return ret; 952 return ret;
926 } 953 }
927 entry = trace_create_file("filter", 0644, call->dir, call, 954 trace_create_file("filter", 0644, call->dir, call,
928 filter); 955 filter);
929 } 956 }
930 957
931 /* A trace may not want to export its format */ 958 trace_create_file("format", 0444, call->dir, call,
932 if (!call->show_format) 959 format);
933 return 0;
934
935 entry = trace_create_file("format", 0444, call->dir, call,
936 format);
937 960
938 return 0; 961 return 0;
939} 962}
940 963
941#define for_each_event(event, start, end) \ 964static int __trace_add_event_call(struct ftrace_event_call *call)
942 for (event = start; \ 965{
943 (unsigned long)event < (unsigned long)end; \ 966 struct dentry *d_events;
944 event++) 967 int ret;
945 968
946#ifdef CONFIG_MODULES 969 if (!call->name)
970 return -EINVAL;
947 971
948static LIST_HEAD(ftrace_module_file_list); 972 if (call->raw_init) {
973 ret = call->raw_init(call);
974 if (ret < 0) {
975 if (ret != -ENOSYS)
976 pr_warning("Could not initialize trace "
977 "events/%s\n", call->name);
978 return ret;
979 }
980 }
949 981
950/* 982 d_events = event_trace_events_dir();
951 * Modules must own their file_operations to keep up with 983 if (!d_events)
952 * reference counting. 984 return -ENOENT;
953 */ 985
954struct ftrace_module_file_ops { 986 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
955 struct list_head list; 987 &ftrace_enable_fops, &ftrace_event_filter_fops,
956 struct module *mod; 988 &ftrace_event_format_fops);
957 struct file_operations id; 989 if (!ret)
958 struct file_operations enable; 990 list_add(&call->list, &ftrace_events);
959 struct file_operations format; 991
960 struct file_operations filter; 992 return ret;
961}; 993}
994
995/* Add an additional event_call dynamically */
996int trace_add_event_call(struct ftrace_event_call *call)
997{
998 int ret;
999 mutex_lock(&event_mutex);
1000 ret = __trace_add_event_call(call);
1001 mutex_unlock(&event_mutex);
1002 return ret;
1003}
962 1004
963static void remove_subsystem_dir(const char *name) 1005static void remove_subsystem_dir(const char *name)
964{ 1006{
@@ -986,6 +1028,53 @@ static void remove_subsystem_dir(const char *name)
986 } 1028 }
987} 1029}
988 1030
1031/*
1032 * Must be called under locking both of event_mutex and trace_event_mutex.
1033 */
1034static void __trace_remove_event_call(struct ftrace_event_call *call)
1035{
1036 ftrace_event_enable_disable(call, 0);
1037 if (call->event)
1038 __unregister_ftrace_event(call->event);
1039 debugfs_remove_recursive(call->dir);
1040 list_del(&call->list);
1041 trace_destroy_fields(call);
1042 destroy_preds(call);
1043 remove_subsystem_dir(call->system);
1044}
1045
1046/* Remove an event_call */
1047void trace_remove_event_call(struct ftrace_event_call *call)
1048{
1049 mutex_lock(&event_mutex);
1050 down_write(&trace_event_mutex);
1051 __trace_remove_event_call(call);
1052 up_write(&trace_event_mutex);
1053 mutex_unlock(&event_mutex);
1054}
1055
1056#define for_each_event(event, start, end) \
1057 for (event = start; \
1058 (unsigned long)event < (unsigned long)end; \
1059 event++)
1060
1061#ifdef CONFIG_MODULES
1062
1063static LIST_HEAD(ftrace_module_file_list);
1064
1065/*
1066 * Modules must own their file_operations to keep up with
1067 * reference counting.
1068 */
1069struct ftrace_module_file_ops {
1070 struct list_head list;
1071 struct module *mod;
1072 struct file_operations id;
1073 struct file_operations enable;
1074 struct file_operations format;
1075 struct file_operations filter;
1076};
1077
989static struct ftrace_module_file_ops * 1078static struct ftrace_module_file_ops *
990trace_create_file_ops(struct module *mod) 1079trace_create_file_ops(struct module *mod)
991{ 1080{
@@ -1043,7 +1132,7 @@ static void trace_module_add_events(struct module *mod)
1043 if (!call->name) 1132 if (!call->name)
1044 continue; 1133 continue;
1045 if (call->raw_init) { 1134 if (call->raw_init) {
1046 ret = call->raw_init(); 1135 ret = call->raw_init(call);
1047 if (ret < 0) { 1136 if (ret < 0) {
1048 if (ret != -ENOSYS) 1137 if (ret != -ENOSYS)
1049 pr_warning("Could not initialize trace " 1138 pr_warning("Could not initialize trace "
@@ -1061,10 +1150,11 @@ static void trace_module_add_events(struct module *mod)
1061 return; 1150 return;
1062 } 1151 }
1063 call->mod = mod; 1152 call->mod = mod;
1064 list_add(&call->list, &ftrace_events); 1153 ret = event_create_dir(call, d_events,
1065 event_create_dir(call, d_events, 1154 &file_ops->id, &file_ops->enable,
1066 &file_ops->id, &file_ops->enable, 1155 &file_ops->filter, &file_ops->format);
1067 &file_ops->filter, &file_ops->format); 1156 if (!ret)
1157 list_add(&call->list, &ftrace_events);
1068 } 1158 }
1069} 1159}
1070 1160
@@ -1078,14 +1168,7 @@ static void trace_module_remove_events(struct module *mod)
1078 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1168 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1079 if (call->mod == mod) { 1169 if (call->mod == mod) {
1080 found = true; 1170 found = true;
1081 ftrace_event_enable_disable(call, 0); 1171 __trace_remove_event_call(call);
1082 if (call->event)
1083 __unregister_ftrace_event(call->event);
1084 debugfs_remove_recursive(call->dir);
1085 list_del(&call->list);
1086 trace_destroy_fields(call);
1087 destroy_preds(call);
1088 remove_subsystem_dir(call->system);
1089 } 1172 }
1090 } 1173 }
1091 1174
@@ -1203,7 +1286,7 @@ static __init int event_trace_init(void)
1203 if (!call->name) 1286 if (!call->name)
1204 continue; 1287 continue;
1205 if (call->raw_init) { 1288 if (call->raw_init) {
1206 ret = call->raw_init(); 1289 ret = call->raw_init(call);
1207 if (ret < 0) { 1290 if (ret < 0) {
1208 if (ret != -ENOSYS) 1291 if (ret != -ENOSYS)
1209 pr_warning("Could not initialize trace " 1292 pr_warning("Could not initialize trace "
@@ -1211,10 +1294,12 @@ static __init int event_trace_init(void)
1211 continue; 1294 continue;
1212 } 1295 }
1213 } 1296 }
1214 list_add(&call->list, &ftrace_events); 1297 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1215 event_create_dir(call, d_events, &ftrace_event_id_fops, 1298 &ftrace_enable_fops,
1216 &ftrace_enable_fops, &ftrace_event_filter_fops, 1299 &ftrace_event_filter_fops,
1217 &ftrace_event_format_fops); 1300 &ftrace_event_format_fops);
1301 if (!ret)
1302 list_add(&call->list, &ftrace_events);
1218 } 1303 }
1219 1304
1220 while (true) { 1305 while (true) {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 98a6cc5c64ed..4615f62a04f1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -18,11 +18,10 @@
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */ 19 */
20 20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/ctype.h> 22#include <linux/ctype.h>
25#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h>
26 25
27#include "trace.h" 26#include "trace.h"
28#include "trace_output.h" 27#include "trace_output.h"
@@ -31,6 +30,7 @@ enum filter_op_ids
31{ 30{
32 OP_OR, 31 OP_OR,
33 OP_AND, 32 OP_AND,
33 OP_GLOB,
34 OP_NE, 34 OP_NE,
35 OP_EQ, 35 OP_EQ,
36 OP_LT, 36 OP_LT,
@@ -48,16 +48,17 @@ struct filter_op {
48}; 48};
49 49
50static struct filter_op filter_ops[] = { 50static struct filter_op filter_ops[] = {
51 { OP_OR, "||", 1 }, 51 { OP_OR, "||", 1 },
52 { OP_AND, "&&", 2 }, 52 { OP_AND, "&&", 2 },
53 { OP_NE, "!=", 4 }, 53 { OP_GLOB, "~", 4 },
54 { OP_EQ, "==", 4 }, 54 { OP_NE, "!=", 4 },
55 { OP_LT, "<", 5 }, 55 { OP_EQ, "==", 4 },
56 { OP_LE, "<=", 5 }, 56 { OP_LT, "<", 5 },
57 { OP_GT, ">", 5 }, 57 { OP_LE, "<=", 5 },
58 { OP_GE, ">=", 5 }, 58 { OP_GT, ">", 5 },
59 { OP_NONE, "OP_NONE", 0 }, 59 { OP_GE, ">=", 5 },
60 { OP_OPEN_PAREN, "(", 0 }, 60 { OP_NONE, "OP_NONE", 0 },
61 { OP_OPEN_PAREN, "(", 0 },
61}; 62};
62 63
63enum { 64enum {
@@ -197,9 +198,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
197 char *addr = (char *)(event + pred->offset); 198 char *addr = (char *)(event + pred->offset);
198 int cmp, match; 199 int cmp, match;
199 200
200 cmp = strncmp(addr, pred->str_val, pred->str_len); 201 cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
201 202
202 match = (!cmp) ^ pred->not; 203 match = cmp ^ pred->not;
203 204
204 return match; 205 return match;
205} 206}
@@ -210,10 +211,11 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
210{ 211{
211 char **addr = (char **)(event + pred->offset); 212 char **addr = (char **)(event + pred->offset);
212 int cmp, match; 213 int cmp, match;
214 int len = strlen(*addr) + 1; /* including tailing '\0' */
213 215
214 cmp = strncmp(*addr, pred->str_val, pred->str_len); 216 cmp = pred->regex.match(*addr, &pred->regex, len);
215 217
216 match = (!cmp) ^ pred->not; 218 match = cmp ^ pred->not;
217 219
218 return match; 220 return match;
219} 221}
@@ -237,9 +239,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
237 char *addr = (char *)(event + str_loc); 239 char *addr = (char *)(event + str_loc);
238 int cmp, match; 240 int cmp, match;
239 241
240 cmp = strncmp(addr, pred->str_val, str_len); 242 cmp = pred->regex.match(addr, &pred->regex, str_len);
241 243
242 match = (!cmp) ^ pred->not; 244 match = cmp ^ pred->not;
243 245
244 return match; 246 return match;
245} 247}
@@ -250,10 +252,133 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
250 return 0; 252 return 0;
251} 253}
252 254
255/*
256 * regex_match_foo - Basic regex callbacks
257 *
258 * @str: the string to be searched
259 * @r: the regex structure containing the pattern string
260 * @len: the length of the string to be searched (including '\0')
261 *
262 * Note:
263 * - @str might not be NULL-terminated if it's of type DYN_STRING
264 * or STATIC_STRING
265 */
266
267static int regex_match_full(char *str, struct regex *r, int len)
268{
269 if (strncmp(str, r->pattern, len) == 0)
270 return 1;
271 return 0;
272}
273
274static int regex_match_front(char *str, struct regex *r, int len)
275{
276 if (strncmp(str, r->pattern, r->len) == 0)
277 return 1;
278 return 0;
279}
280
281static int regex_match_middle(char *str, struct regex *r, int len)
282{
283 if (strnstr(str, r->pattern, len))
284 return 1;
285 return 0;
286}
287
288static int regex_match_end(char *str, struct regex *r, int len)
289{
290 int strlen = len - 1;
291
292 if (strlen >= r->len &&
293 memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
294 return 1;
295 return 0;
296}
297
298/**
299 * filter_parse_regex - parse a basic regex
300 * @buff: the raw regex
301 * @len: length of the regex
302 * @search: will point to the beginning of the string to compare
303 * @not: tell whether the match will have to be inverted
304 *
305 * This passes in a buffer containing a regex and this function will
306 * set search to point to the search part of the buffer and
307 * return the type of search it is (see enum above).
308 * This does modify buff.
309 *
310 * Returns enum type.
311 * search returns the pointer to use for comparison.
312 * not returns 1 if buff started with a '!'
313 * 0 otherwise.
314 */
315enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
316{
317 int type = MATCH_FULL;
318 int i;
319
320 if (buff[0] == '!') {
321 *not = 1;
322 buff++;
323 len--;
324 } else
325 *not = 0;
326
327 *search = buff;
328
329 for (i = 0; i < len; i++) {
330 if (buff[i] == '*') {
331 if (!i) {
332 *search = buff + 1;
333 type = MATCH_END_ONLY;
334 } else {
335 if (type == MATCH_END_ONLY)
336 type = MATCH_MIDDLE_ONLY;
337 else
338 type = MATCH_FRONT_ONLY;
339 buff[i] = 0;
340 break;
341 }
342 }
343 }
344
345 return type;
346}
347
348static void filter_build_regex(struct filter_pred *pred)
349{
350 struct regex *r = &pred->regex;
351 char *search;
352 enum regex_type type = MATCH_FULL;
353 int not = 0;
354
355 if (pred->op == OP_GLOB) {
356 type = filter_parse_regex(r->pattern, r->len, &search, &not);
357 r->len = strlen(search);
358 memmove(r->pattern, search, r->len+1);
359 }
360
361 switch (type) {
362 case MATCH_FULL:
363 r->match = regex_match_full;
364 break;
365 case MATCH_FRONT_ONLY:
366 r->match = regex_match_front;
367 break;
368 case MATCH_MIDDLE_ONLY:
369 r->match = regex_match_middle;
370 break;
371 case MATCH_END_ONLY:
372 r->match = regex_match_end;
373 break;
374 }
375
376 pred->not ^= not;
377}
378
253/* return 1 if event matches, 0 otherwise (discard) */ 379/* return 1 if event matches, 0 otherwise (discard) */
254int filter_match_preds(struct ftrace_event_call *call, void *rec) 380int filter_match_preds(struct event_filter *filter, void *rec)
255{ 381{
256 struct event_filter *filter = call->filter;
257 int match, top = 0, val1 = 0, val2 = 0; 382 int match, top = 0, val1 = 0, val2 = 0;
258 int stack[MAX_FILTER_PRED]; 383 int stack[MAX_FILTER_PRED];
259 struct filter_pred *pred; 384 struct filter_pred *pred;
@@ -396,7 +521,7 @@ static void filter_clear_pred(struct filter_pred *pred)
396{ 521{
397 kfree(pred->field_name); 522 kfree(pred->field_name);
398 pred->field_name = NULL; 523 pred->field_name = NULL;
399 pred->str_len = 0; 524 pred->regex.len = 0;
400} 525}
401 526
402static int filter_set_pred(struct filter_pred *dest, 527static int filter_set_pred(struct filter_pred *dest,
@@ -426,9 +551,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
426 filter->preds[i]->fn = filter_pred_none; 551 filter->preds[i]->fn = filter_pred_none;
427} 552}
428 553
429void destroy_preds(struct ftrace_event_call *call) 554static void __free_preds(struct event_filter *filter)
430{ 555{
431 struct event_filter *filter = call->filter;
432 int i; 556 int i;
433 557
434 if (!filter) 558 if (!filter)
@@ -441,21 +565,24 @@ void destroy_preds(struct ftrace_event_call *call)
441 kfree(filter->preds); 565 kfree(filter->preds);
442 kfree(filter->filter_string); 566 kfree(filter->filter_string);
443 kfree(filter); 567 kfree(filter);
568}
569
570void destroy_preds(struct ftrace_event_call *call)
571{
572 __free_preds(call->filter);
444 call->filter = NULL; 573 call->filter = NULL;
574 call->filter_active = 0;
445} 575}
446 576
447static int init_preds(struct ftrace_event_call *call) 577static struct event_filter *__alloc_preds(void)
448{ 578{
449 struct event_filter *filter; 579 struct event_filter *filter;
450 struct filter_pred *pred; 580 struct filter_pred *pred;
451 int i; 581 int i;
452 582
453 if (call->filter) 583 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
454 return 0; 584 if (!filter)
455 585 return ERR_PTR(-ENOMEM);
456 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
457 if (!call->filter)
458 return -ENOMEM;
459 586
460 filter->n_preds = 0; 587 filter->n_preds = 0;
461 588
@@ -471,12 +598,24 @@ static int init_preds(struct ftrace_event_call *call)
471 filter->preds[i] = pred; 598 filter->preds[i] = pred;
472 } 599 }
473 600
474 return 0; 601 return filter;
475 602
476oom: 603oom:
477 destroy_preds(call); 604 __free_preds(filter);
605 return ERR_PTR(-ENOMEM);
606}
607
608static int init_preds(struct ftrace_event_call *call)
609{
610 if (call->filter)
611 return 0;
478 612
479 return -ENOMEM; 613 call->filter_active = 0;
614 call->filter = __alloc_preds();
615 if (IS_ERR(call->filter))
616 return PTR_ERR(call->filter);
617
618 return 0;
480} 619}
481 620
482static int init_subsystem_preds(struct event_subsystem *system) 621static int init_subsystem_preds(struct event_subsystem *system)
@@ -499,14 +638,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
499 return 0; 638 return 0;
500} 639}
501 640
502enum { 641static void filter_free_subsystem_preds(struct event_subsystem *system)
503 FILTER_DISABLE_ALL,
504 FILTER_INIT_NO_RESET,
505 FILTER_SKIP_NO_RESET,
506};
507
508static void filter_free_subsystem_preds(struct event_subsystem *system,
509 int flag)
510{ 642{
511 struct ftrace_event_call *call; 643 struct ftrace_event_call *call;
512 644
@@ -517,14 +649,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
517 if (strcmp(call->system, system->name) != 0) 649 if (strcmp(call->system, system->name) != 0)
518 continue; 650 continue;
519 651
520 if (flag == FILTER_INIT_NO_RESET) {
521 call->filter->no_reset = false;
522 continue;
523 }
524
525 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
526 continue;
527
528 filter_disable_preds(call); 652 filter_disable_preds(call);
529 remove_filter_string(call->filter); 653 remove_filter_string(call->filter);
530 } 654 }
@@ -532,10 +656,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
532 656
533static int filter_add_pred_fn(struct filter_parse_state *ps, 657static int filter_add_pred_fn(struct filter_parse_state *ps,
534 struct ftrace_event_call *call, 658 struct ftrace_event_call *call,
659 struct event_filter *filter,
535 struct filter_pred *pred, 660 struct filter_pred *pred,
536 filter_pred_fn_t fn) 661 filter_pred_fn_t fn)
537{ 662{
538 struct event_filter *filter = call->filter;
539 int idx, err; 663 int idx, err;
540 664
541 if (filter->n_preds == MAX_FILTER_PRED) { 665 if (filter->n_preds == MAX_FILTER_PRED) {
@@ -550,7 +674,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
550 return err; 674 return err;
551 675
552 filter->n_preds++; 676 filter->n_preds++;
553 call->filter_active = 1;
554 677
555 return 0; 678 return 0;
556} 679}
@@ -575,7 +698,10 @@ static bool is_string_field(struct ftrace_event_field *field)
575 698
576static int is_legal_op(struct ftrace_event_field *field, int op) 699static int is_legal_op(struct ftrace_event_field *field, int op)
577{ 700{
578 if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) 701 if (is_string_field(field) &&
702 (op != OP_EQ && op != OP_NE && op != OP_GLOB))
703 return 0;
704 if (!is_string_field(field) && op == OP_GLOB)
579 return 0; 705 return 0;
580 706
581 return 1; 707 return 1;
@@ -626,6 +752,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
626 752
627static int filter_add_pred(struct filter_parse_state *ps, 753static int filter_add_pred(struct filter_parse_state *ps,
628 struct ftrace_event_call *call, 754 struct ftrace_event_call *call,
755 struct event_filter *filter,
629 struct filter_pred *pred, 756 struct filter_pred *pred,
630 bool dry_run) 757 bool dry_run)
631{ 758{
@@ -660,21 +787,20 @@ static int filter_add_pred(struct filter_parse_state *ps,
660 } 787 }
661 788
662 if (is_string_field(field)) { 789 if (is_string_field(field)) {
663 pred->str_len = field->size; 790 filter_build_regex(pred);
664 791
665 if (field->filter_type == FILTER_STATIC_STRING) 792 if (field->filter_type == FILTER_STATIC_STRING) {
666 fn = filter_pred_string; 793 fn = filter_pred_string;
667 else if (field->filter_type == FILTER_DYN_STRING) 794 pred->regex.field_len = field->size;
795 } else if (field->filter_type == FILTER_DYN_STRING)
668 fn = filter_pred_strloc; 796 fn = filter_pred_strloc;
669 else { 797 else
670 fn = filter_pred_pchar; 798 fn = filter_pred_pchar;
671 pred->str_len = strlen(pred->str_val);
672 }
673 } else { 799 } else {
674 if (field->is_signed) 800 if (field->is_signed)
675 ret = strict_strtoll(pred->str_val, 0, &val); 801 ret = strict_strtoll(pred->regex.pattern, 0, &val);
676 else 802 else
677 ret = strict_strtoull(pred->str_val, 0, &val); 803 ret = strict_strtoull(pred->regex.pattern, 0, &val);
678 if (ret) { 804 if (ret) {
679 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 805 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
680 return -EINVAL; 806 return -EINVAL;
@@ -694,45 +820,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
694 820
695add_pred_fn: 821add_pred_fn:
696 if (!dry_run) 822 if (!dry_run)
697 return filter_add_pred_fn(ps, call, pred, fn); 823 return filter_add_pred_fn(ps, call, filter, pred, fn);
698 return 0;
699}
700
701static int filter_add_subsystem_pred(struct filter_parse_state *ps,
702 struct event_subsystem *system,
703 struct filter_pred *pred,
704 char *filter_string,
705 bool dry_run)
706{
707 struct ftrace_event_call *call;
708 int err = 0;
709 bool fail = true;
710
711 list_for_each_entry(call, &ftrace_events, list) {
712
713 if (!call->define_fields)
714 continue;
715
716 if (strcmp(call->system, system->name))
717 continue;
718
719 if (call->filter->no_reset)
720 continue;
721
722 err = filter_add_pred(ps, call, pred, dry_run);
723 if (err)
724 call->filter->no_reset = true;
725 else
726 fail = false;
727
728 if (!dry_run)
729 replace_filter_string(call->filter, filter_string);
730 }
731
732 if (fail) {
733 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
734 return err;
735 }
736 return 0; 824 return 0;
737} 825}
738 826
@@ -1045,8 +1133,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
1045 return NULL; 1133 return NULL;
1046 } 1134 }
1047 1135
1048 strcpy(pred->str_val, operand2); 1136 strcpy(pred->regex.pattern, operand2);
1049 pred->str_len = strlen(operand2); 1137 pred->regex.len = strlen(pred->regex.pattern);
1050 1138
1051 pred->op = op; 1139 pred->op = op;
1052 1140
@@ -1090,8 +1178,8 @@ static int check_preds(struct filter_parse_state *ps)
1090 return 0; 1178 return 0;
1091} 1179}
1092 1180
1093static int replace_preds(struct event_subsystem *system, 1181static int replace_preds(struct ftrace_event_call *call,
1094 struct ftrace_event_call *call, 1182 struct event_filter *filter,
1095 struct filter_parse_state *ps, 1183 struct filter_parse_state *ps,
1096 char *filter_string, 1184 char *filter_string,
1097 bool dry_run) 1185 bool dry_run)
@@ -1138,11 +1226,7 @@ static int replace_preds(struct event_subsystem *system,
1138add_pred: 1226add_pred:
1139 if (!pred) 1227 if (!pred)
1140 return -ENOMEM; 1228 return -ENOMEM;
1141 if (call) 1229 err = filter_add_pred(ps, call, filter, pred, dry_run);
1142 err = filter_add_pred(ps, call, pred, false);
1143 else
1144 err = filter_add_subsystem_pred(ps, system, pred,
1145 filter_string, dry_run);
1146 filter_free_pred(pred); 1230 filter_free_pred(pred);
1147 if (err) 1231 if (err)
1148 return err; 1232 return err;
@@ -1153,10 +1237,50 @@ add_pred:
1153 return 0; 1237 return 0;
1154} 1238}
1155 1239
1156int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1240static int replace_system_preds(struct event_subsystem *system,
1241 struct filter_parse_state *ps,
1242 char *filter_string)
1157{ 1243{
1244 struct ftrace_event_call *call;
1245 bool fail = true;
1158 int err; 1246 int err;
1159 1247
1248 list_for_each_entry(call, &ftrace_events, list) {
1249 struct event_filter *filter = call->filter;
1250
1251 if (!call->define_fields)
1252 continue;
1253
1254 if (strcmp(call->system, system->name) != 0)
1255 continue;
1256
1257 /* try to see if the filter can be applied */
1258 err = replace_preds(call, filter, ps, filter_string, true);
1259 if (err)
1260 continue;
1261
1262 /* really apply the filter */
1263 filter_disable_preds(call);
1264 err = replace_preds(call, filter, ps, filter_string, false);
1265 if (err)
1266 filter_disable_preds(call);
1267 else {
1268 call->filter_active = 1;
1269 replace_filter_string(filter, filter_string);
1270 }
1271 fail = false;
1272 }
1273
1274 if (fail) {
1275 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1276 return -EINVAL;
1277 }
1278 return 0;
1279}
1280
1281int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1282{
1283 int err;
1160 struct filter_parse_state *ps; 1284 struct filter_parse_state *ps;
1161 1285
1162 mutex_lock(&event_mutex); 1286 mutex_lock(&event_mutex);
@@ -1168,8 +1292,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1168 if (!strcmp(strstrip(filter_string), "0")) { 1292 if (!strcmp(strstrip(filter_string), "0")) {
1169 filter_disable_preds(call); 1293 filter_disable_preds(call);
1170 remove_filter_string(call->filter); 1294 remove_filter_string(call->filter);
1171 mutex_unlock(&event_mutex); 1295 goto out_unlock;
1172 return 0;
1173 } 1296 }
1174 1297
1175 err = -ENOMEM; 1298 err = -ENOMEM;
@@ -1187,10 +1310,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1187 goto out; 1310 goto out;
1188 } 1311 }
1189 1312
1190 err = replace_preds(NULL, call, ps, filter_string, false); 1313 err = replace_preds(call, call->filter, ps, filter_string, false);
1191 if (err) 1314 if (err)
1192 append_filter_err(ps, call->filter); 1315 append_filter_err(ps, call->filter);
1193 1316 else
1317 call->filter_active = 1;
1194out: 1318out:
1195 filter_opstack_clear(ps); 1319 filter_opstack_clear(ps);
1196 postfix_clear(ps); 1320 postfix_clear(ps);
@@ -1205,7 +1329,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1205 char *filter_string) 1329 char *filter_string)
1206{ 1330{
1207 int err; 1331 int err;
1208
1209 struct filter_parse_state *ps; 1332 struct filter_parse_state *ps;
1210 1333
1211 mutex_lock(&event_mutex); 1334 mutex_lock(&event_mutex);
@@ -1215,10 +1338,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1215 goto out_unlock; 1338 goto out_unlock;
1216 1339
1217 if (!strcmp(strstrip(filter_string), "0")) { 1340 if (!strcmp(strstrip(filter_string), "0")) {
1218 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); 1341 filter_free_subsystem_preds(system);
1219 remove_filter_string(system->filter); 1342 remove_filter_string(system->filter);
1220 mutex_unlock(&event_mutex); 1343 goto out_unlock;
1221 return 0;
1222 } 1344 }
1223 1345
1224 err = -ENOMEM; 1346 err = -ENOMEM;
@@ -1235,31 +1357,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1235 goto out; 1357 goto out;
1236 } 1358 }
1237 1359
1238 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); 1360 err = replace_system_preds(system, ps, filter_string);
1239 1361 if (err)
1240 /* try to see the filter can be applied to which events */
1241 err = replace_preds(system, NULL, ps, filter_string, true);
1242 if (err) {
1243 append_filter_err(ps, system->filter); 1362 append_filter_err(ps, system->filter);
1244 goto out; 1363
1364out:
1365 filter_opstack_clear(ps);
1366 postfix_clear(ps);
1367 kfree(ps);
1368out_unlock:
1369 mutex_unlock(&event_mutex);
1370
1371 return err;
1372}
1373
1374#ifdef CONFIG_PERF_EVENTS
1375
1376void ftrace_profile_free_filter(struct perf_event *event)
1377{
1378 struct event_filter *filter = event->filter;
1379
1380 event->filter = NULL;
1381 __free_preds(filter);
1382}
1383
1384int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1385 char *filter_str)
1386{
1387 int err;
1388 struct event_filter *filter;
1389 struct filter_parse_state *ps;
1390 struct ftrace_event_call *call = NULL;
1391
1392 mutex_lock(&event_mutex);
1393
1394 list_for_each_entry(call, &ftrace_events, list) {
1395 if (call->id == event_id)
1396 break;
1245 } 1397 }
1246 1398
1247 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); 1399 err = -EINVAL;
1400 if (!call)
1401 goto out_unlock;
1402
1403 err = -EEXIST;
1404 if (event->filter)
1405 goto out_unlock;
1248 1406
1249 /* really apply the filter to the events */ 1407 filter = __alloc_preds();
1250 err = replace_preds(system, NULL, ps, filter_string, false); 1408 if (IS_ERR(filter)) {
1251 if (err) { 1409 err = PTR_ERR(filter);
1252 append_filter_err(ps, system->filter); 1410 goto out_unlock;
1253 filter_free_subsystem_preds(system, 2);
1254 } 1411 }
1255 1412
1256out: 1413 err = -ENOMEM;
1414 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1415 if (!ps)
1416 goto free_preds;
1417
1418 parse_init(ps, filter_ops, filter_str);
1419 err = filter_parse(ps);
1420 if (err)
1421 goto free_ps;
1422
1423 err = replace_preds(call, filter, ps, filter_str, false);
1424 if (!err)
1425 event->filter = filter;
1426
1427free_ps:
1257 filter_opstack_clear(ps); 1428 filter_opstack_clear(ps);
1258 postfix_clear(ps); 1429 postfix_clear(ps);
1259 kfree(ps); 1430 kfree(ps);
1431
1432free_preds:
1433 if (err)
1434 __free_preds(filter);
1435
1260out_unlock: 1436out_unlock:
1261 mutex_unlock(&event_mutex); 1437 mutex_unlock(&event_mutex);
1262 1438
1263 return err; 1439 return err;
1264} 1440}
1265 1441
1442#endif /* CONFIG_PERF_EVENTS */
1443
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 9753fcc61bc5..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -48,11 +48,11 @@
48struct ____ftrace_##name { \ 48struct ____ftrace_##name { \
49 tstruct \ 49 tstruct \
50}; \ 50}; \
51static void __used ____ftrace_check_##name(void) \ 51static void __always_unused ____ftrace_check_##name(void) \
52{ \ 52{ \
53 struct ____ftrace_##name *__entry = NULL; \ 53 struct ____ftrace_##name *__entry = NULL; \
54 \ 54 \
55 /* force cmpile-time check on F_printk() */ \ 55 /* force compile-time check on F_printk() */ \
56 printk(print); \ 56 printk(print); \
57} 57}
58 58
@@ -62,76 +62,6 @@ static void __used ____ftrace_check_##name(void) \
62 62
63#include "trace_entries.h" 63#include "trace_entries.h"
64 64
65
66#undef __field
67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\n", \
70 offsetof(typeof(field), item), \
71 sizeof(field.item)); \
72 if (!ret) \
73 return 0;
74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item)); \
81 if (!ret) \
82 return 0;
83
84#undef __array
85#define __array(type, item, len) \
86 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
87 "offset:%zu;\tsize:%zu;\n", \
88 offsetof(typeof(field), item), \
89 sizeof(field.item)); \
90 if (!ret) \
91 return 0;
92
93#undef __array_desc
94#define __array_desc(type, container, item, len) \
95 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
96 "offset:%zu;\tsize:%zu;\n", \
97 offsetof(typeof(field), container.item), \
98 sizeof(field.container.item)); \
99 if (!ret) \
100 return 0;
101
102#undef __dynamic_array
103#define __dynamic_array(type, item) \
104 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
105 "offset:%zu;\tsize:0;\n", \
106 offsetof(typeof(field), item)); \
107 if (!ret) \
108 return 0;
109
110#undef F_printk
111#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
112
113#undef __entry
114#define __entry REC
115
116#undef FTRACE_ENTRY
117#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
118static int \
119ftrace_format_##name(struct ftrace_event_call *unused, \
120 struct trace_seq *s) \
121{ \
122 struct struct_name field __attribute__((unused)); \
123 int ret = 0; \
124 \
125 tstruct; \
126 \
127 trace_seq_printf(s, "\nprint fmt: " print); \
128 \
129 return ret; \
130}
131
132#include "trace_entries.h"
133
134
135#undef __field 65#undef __field
136#define __field(type, item) \ 66#define __field(type, item) \
137 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
@@ -156,7 +86,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
156 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 86 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
157 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 87 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
158 offsetof(typeof(field), item), \ 88 offsetof(typeof(field), item), \
159 sizeof(field.item), 0, FILTER_OTHER); \ 89 sizeof(field.item), \
90 is_signed_type(type), FILTER_OTHER); \
160 if (ret) \ 91 if (ret) \
161 return ret; 92 return ret;
162 93
@@ -166,13 +97,18 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
166 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 97 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
167 offsetof(typeof(field), \ 98 offsetof(typeof(field), \
168 container.item), \ 99 container.item), \
169 sizeof(field.container.item), 0, \ 100 sizeof(field.container.item), \
170 FILTER_OTHER); \ 101 is_signed_type(type), FILTER_OTHER); \
171 if (ret) \ 102 if (ret) \
172 return ret; 103 return ret;
173 104
174#undef __dynamic_array 105#undef __dynamic_array
175#define __dynamic_array(type, item) 106#define __dynamic_array(type, item) \
107 ret = trace_define_field(event_call, #type, #item, \
108 offsetof(typeof(field), item), \
109 0, is_signed_type(type), FILTER_OTHER);\
110 if (ret) \
111 return ret;
176 112
177#undef FTRACE_ENTRY 113#undef FTRACE_ENTRY
178#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 114#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -182,10 +118,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
182 struct struct_name field; \ 118 struct struct_name field; \
183 int ret; \ 119 int ret; \
184 \ 120 \
185 ret = trace_define_common_fields(event_call); \
186 if (ret) \
187 return ret; \
188 \
189 tstruct; \ 121 tstruct; \
190 \ 122 \
191 return ret; \ 123 return ret; \
@@ -193,6 +125,14 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
193 125
194#include "trace_entries.h" 126#include "trace_entries.h"
195 127
128static int ftrace_raw_init_event(struct ftrace_event_call *call)
129{
130 INIT_LIST_HEAD(&call->fields);
131 return 0;
132}
133
134#undef __entry
135#define __entry REC
196 136
197#undef __field 137#undef __field
198#define __field(type, item) 138#define __field(type, item)
@@ -209,9 +149,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
209#undef __dynamic_array 149#undef __dynamic_array
210#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
211 151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
212#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
213#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
214static int ftrace_raw_init_event_##call(void); \
215 \ 157 \
216struct ftrace_event_call __used \ 158struct ftrace_event_call __used \
217__attribute__((__aligned__(4))) \ 159__attribute__((__aligned__(4))) \
@@ -219,14 +161,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
219 .name = #call, \ 161 .name = #call, \
220 .id = type, \ 162 .id = type, \
221 .system = __stringify(TRACE_SYSTEM), \ 163 .system = __stringify(TRACE_SYSTEM), \
222 .raw_init = ftrace_raw_init_event_##call, \ 164 .raw_init = ftrace_raw_init_event, \
223 .show_format = ftrace_format_##call, \ 165 .print_fmt = print, \
224 .define_fields = ftrace_define_fields_##call, \ 166 .define_fields = ftrace_define_fields_##call, \
225}; \ 167}; \
226static int ftrace_raw_init_event_##call(void) \
227{ \
228 INIT_LIST_HEAD(&event_##call.fields); \
229 return 0; \
230} \
231 168
232#include "trace_entries.h" 169#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01b2e4d..3fc2a575664f 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,21 @@
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h" 15#include "trace_output.h"
16 16
17struct fgraph_data { 17struct fgraph_cpu_data {
18 pid_t last_pid; 18 pid_t last_pid;
19 int depth; 19 int depth;
20 int ignore;
21 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
22};
23
24struct fgraph_data {
25 struct fgraph_cpu_data *cpu_data;
26
27 /* Place to preserve last processed entry. */
28 struct ftrace_graph_ent_entry ent;
29 struct ftrace_graph_ret_entry ret;
30 int failed;
31 int cpu;
20}; 32};
21 33
22#define TRACE_GRAPH_INDENT 2 34#define TRACE_GRAPH_INDENT 2
@@ -176,7 +188,7 @@ static int __trace_graph_entry(struct trace_array *tr,
176 struct ring_buffer *buffer = tr->buffer; 188 struct ring_buffer *buffer = tr->buffer;
177 struct ftrace_graph_ent_entry *entry; 189 struct ftrace_graph_ent_entry *entry;
178 190
179 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 191 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
180 return 0; 192 return 0;
181 193
182 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 194 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -201,13 +213,11 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
201 int cpu; 213 int cpu;
202 int pc; 214 int pc;
203 215
204 if (unlikely(!tr))
205 return 0;
206
207 if (!ftrace_trace_task(current)) 216 if (!ftrace_trace_task(current))
208 return 0; 217 return 0;
209 218
210 if (!ftrace_graph_addr(trace->func)) 219 /* trace it when it is-nested-in or is a function enabled. */
220 if (!(trace->depth || ftrace_graph_addr(trace->func)))
211 return 0; 221 return 0;
212 222
213 local_irq_save(flags); 223 local_irq_save(flags);
@@ -220,9 +230,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
220 } else { 230 } else {
221 ret = 0; 231 ret = 0;
222 } 232 }
223 /* Only do the atomic if it is not already set */
224 if (!test_tsk_trace_graph(current))
225 set_tsk_trace_graph(current);
226 233
227 atomic_dec(&data->disabled); 234 atomic_dec(&data->disabled);
228 local_irq_restore(flags); 235 local_irq_restore(flags);
@@ -240,7 +247,7 @@ static void __trace_graph_return(struct trace_array *tr,
240 struct ring_buffer *buffer = tr->buffer; 247 struct ring_buffer *buffer = tr->buffer;
241 struct ftrace_graph_ret_entry *entry; 248 struct ftrace_graph_ret_entry *entry;
242 249
243 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 250 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
244 return; 251 return;
245 252
246 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 253 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -270,17 +277,24 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
270 pc = preempt_count(); 277 pc = preempt_count();
271 __trace_graph_return(tr, trace, flags, pc); 278 __trace_graph_return(tr, trace, flags, pc);
272 } 279 }
273 if (!trace->depth)
274 clear_tsk_trace_graph(current);
275 atomic_dec(&data->disabled); 280 atomic_dec(&data->disabled);
276 local_irq_restore(flags); 281 local_irq_restore(flags);
277} 282}
278 283
284void set_graph_array(struct trace_array *tr)
285{
286 graph_array = tr;
287
288 /* Make graph_array visible before we start tracing */
289
290 smp_mb();
291}
292
279static int graph_trace_init(struct trace_array *tr) 293static int graph_trace_init(struct trace_array *tr)
280{ 294{
281 int ret; 295 int ret;
282 296
283 graph_array = tr; 297 set_graph_array(tr);
284 ret = register_ftrace_graph(&trace_graph_return, 298 ret = register_ftrace_graph(&trace_graph_return,
285 &trace_graph_entry); 299 &trace_graph_entry);
286 if (ret) 300 if (ret)
@@ -290,11 +304,6 @@ static int graph_trace_init(struct trace_array *tr)
290 return 0; 304 return 0;
291} 305}
292 306
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296}
297
298static void graph_trace_reset(struct trace_array *tr) 307static void graph_trace_reset(struct trace_array *tr)
299{ 308{
300 tracing_stop_cmdline_record(); 309 tracing_stop_cmdline_record();
@@ -384,7 +393,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
384 if (!data) 393 if (!data)
385 return TRACE_TYPE_HANDLED; 394 return TRACE_TYPE_HANDLED;
386 395
387 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 396 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
388 397
389 if (*last_pid == pid) 398 if (*last_pid == pid)
390 return TRACE_TYPE_HANDLED; 399 return TRACE_TYPE_HANDLED;
@@ -435,26 +444,49 @@ static struct ftrace_graph_ret_entry *
435get_return_for_leaf(struct trace_iterator *iter, 444get_return_for_leaf(struct trace_iterator *iter,
436 struct ftrace_graph_ent_entry *curr) 445 struct ftrace_graph_ent_entry *curr)
437{ 446{
438 struct ring_buffer_iter *ring_iter; 447 struct fgraph_data *data = iter->private;
448 struct ring_buffer_iter *ring_iter = NULL;
439 struct ring_buffer_event *event; 449 struct ring_buffer_event *event;
440 struct ftrace_graph_ret_entry *next; 450 struct ftrace_graph_ret_entry *next;
441 451
442 ring_iter = iter->buffer_iter[iter->cpu]; 452 /*
453 * If the previous output failed to write to the seq buffer,
454 * then we just reuse the data from before.
455 */
456 if (data && data->failed) {
457 curr = &data->ent;
458 next = &data->ret;
459 } else {
443 460
444 /* First peek to compare current entry and the next one */ 461 ring_iter = iter->buffer_iter[iter->cpu];
445 if (ring_iter) 462
446 event = ring_buffer_iter_peek(ring_iter, NULL); 463 /* First peek to compare current entry and the next one */
447 else { 464 if (ring_iter)
448 /* We need to consume the current entry to see the next one */ 465 event = ring_buffer_iter_peek(ring_iter, NULL);
449 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 466 else {
450 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 467 /*
451 NULL); 468 * We need to consume the current entry to see
452 } 469 * the next one.
470 */
471 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
472 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
473 NULL);
474 }
453 475
454 if (!event) 476 if (!event)
455 return NULL; 477 return NULL;
456 478
457 next = ring_buffer_event_data(event); 479 next = ring_buffer_event_data(event);
480
481 if (data) {
482 /*
483 * Save current and next entries for later reference
484 * if the output fails.
485 */
486 data->ent = *curr;
487 data->ret = *next;
488 }
489 }
458 490
459 if (next->ent.type != TRACE_GRAPH_RET) 491 if (next->ent.type != TRACE_GRAPH_RET)
460 return NULL; 492 return NULL;
@@ -639,15 +671,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
639 duration = graph_ret->rettime - graph_ret->calltime; 671 duration = graph_ret->rettime - graph_ret->calltime;
640 672
641 if (data) { 673 if (data) {
674 struct fgraph_cpu_data *cpu_data;
642 int cpu = iter->cpu; 675 int cpu = iter->cpu;
643 int *depth = &(per_cpu_ptr(data, cpu)->depth); 676
677 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
644 678
645 /* 679 /*
646 * Comments display at + 1 to depth. Since 680 * Comments display at + 1 to depth. Since
647 * this is a leaf function, keep the comments 681 * this is a leaf function, keep the comments
648 * equal to this depth. 682 * equal to this depth.
649 */ 683 */
650 *depth = call->depth - 1; 684 cpu_data->depth = call->depth - 1;
685
686 /* No need to keep this function around for this depth */
687 if (call->depth < FTRACE_RETFUNC_DEPTH)
688 cpu_data->enter_funcs[call->depth] = 0;
651 } 689 }
652 690
653 /* Overhead */ 691 /* Overhead */
@@ -687,10 +725,15 @@ print_graph_entry_nested(struct trace_iterator *iter,
687 int i; 725 int i;
688 726
689 if (data) { 727 if (data) {
728 struct fgraph_cpu_data *cpu_data;
690 int cpu = iter->cpu; 729 int cpu = iter->cpu;
691 int *depth = &(per_cpu_ptr(data, cpu)->depth);
692 730
693 *depth = call->depth; 731 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
732 cpu_data->depth = call->depth;
733
734 /* Save this function pointer to see if the exit matches */
735 if (call->depth < FTRACE_RETFUNC_DEPTH)
736 cpu_data->enter_funcs[call->depth] = call->func;
694 } 737 }
695 738
696 /* No overhead */ 739 /* No overhead */
@@ -782,19 +825,34 @@ static enum print_line_t
782print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 825print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
783 struct trace_iterator *iter) 826 struct trace_iterator *iter)
784{ 827{
785 int cpu = iter->cpu; 828 struct fgraph_data *data = iter->private;
786 struct ftrace_graph_ent *call = &field->graph_ent; 829 struct ftrace_graph_ent *call = &field->graph_ent;
787 struct ftrace_graph_ret_entry *leaf_ret; 830 struct ftrace_graph_ret_entry *leaf_ret;
831 static enum print_line_t ret;
832 int cpu = iter->cpu;
788 833
789 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 834 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
790 return TRACE_TYPE_PARTIAL_LINE; 835 return TRACE_TYPE_PARTIAL_LINE;
791 836
792 leaf_ret = get_return_for_leaf(iter, field); 837 leaf_ret = get_return_for_leaf(iter, field);
793 if (leaf_ret) 838 if (leaf_ret)
794 return print_graph_entry_leaf(iter, field, leaf_ret, s); 839 ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
795 else 840 else
796 return print_graph_entry_nested(iter, field, s, cpu); 841 ret = print_graph_entry_nested(iter, field, s, cpu);
842
843 if (data) {
844 /*
845 * If we failed to write our output, then we need to make
846 * note of it. Because we already consumed our entry.
847 */
848 if (s->full) {
849 data->failed = 1;
850 data->cpu = cpu;
851 } else
852 data->failed = 0;
853 }
797 854
855 return ret;
798} 856}
799 857
800static enum print_line_t 858static enum print_line_t
@@ -805,19 +863,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
805 struct fgraph_data *data = iter->private; 863 struct fgraph_data *data = iter->private;
806 pid_t pid = ent->pid; 864 pid_t pid = ent->pid;
807 int cpu = iter->cpu; 865 int cpu = iter->cpu;
866 int func_match = 1;
808 int ret; 867 int ret;
809 int i; 868 int i;
810 869
811 if (data) { 870 if (data) {
871 struct fgraph_cpu_data *cpu_data;
812 int cpu = iter->cpu; 872 int cpu = iter->cpu;
813 int *depth = &(per_cpu_ptr(data, cpu)->depth); 873
874 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
814 875
815 /* 876 /*
816 * Comments display at + 1 to depth. This is the 877 * Comments display at + 1 to depth. This is the
817 * return from a function, we now want the comments 878 * return from a function, we now want the comments
818 * to display at the same level of the bracket. 879 * to display at the same level of the bracket.
819 */ 880 */
820 *depth = trace->depth - 1; 881 cpu_data->depth = trace->depth - 1;
882
883 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
884 if (cpu_data->enter_funcs[trace->depth] != trace->func)
885 func_match = 0;
886 cpu_data->enter_funcs[trace->depth] = 0;
887 }
821 } 888 }
822 889
823 if (print_graph_prologue(iter, s, 0, 0)) 890 if (print_graph_prologue(iter, s, 0, 0))
@@ -842,9 +909,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
842 return TRACE_TYPE_PARTIAL_LINE; 909 return TRACE_TYPE_PARTIAL_LINE;
843 } 910 }
844 911
845 ret = trace_seq_printf(s, "}\n"); 912 /*
846 if (!ret) 913 * If the return function does not have a matching entry,
847 return TRACE_TYPE_PARTIAL_LINE; 914 * then the entry was lost. Instead of just printing
915 * the '}' and letting the user guess what function this
916 * belongs to, write out the function name.
917 */
918 if (func_match) {
919 ret = trace_seq_printf(s, "}\n");
920 if (!ret)
921 return TRACE_TYPE_PARTIAL_LINE;
922 } else {
923 ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
924 if (!ret)
925 return TRACE_TYPE_PARTIAL_LINE;
926 }
848 927
849 /* Overrun */ 928 /* Overrun */
850 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 929 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
@@ -873,7 +952,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
873 int i; 952 int i;
874 953
875 if (data) 954 if (data)
876 depth = per_cpu_ptr(data, iter->cpu)->depth; 955 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
877 956
878 if (print_graph_prologue(iter, s, 0, 0)) 957 if (print_graph_prologue(iter, s, 0, 0))
879 return TRACE_TYPE_PARTIAL_LINE; 958 return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +1020,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
941enum print_line_t 1020enum print_line_t
942print_graph_function(struct trace_iterator *iter) 1021print_graph_function(struct trace_iterator *iter)
943{ 1022{
1023 struct ftrace_graph_ent_entry *field;
1024 struct fgraph_data *data = iter->private;
944 struct trace_entry *entry = iter->ent; 1025 struct trace_entry *entry = iter->ent;
945 struct trace_seq *s = &iter->seq; 1026 struct trace_seq *s = &iter->seq;
1027 int cpu = iter->cpu;
1028 int ret;
1029
1030 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1031 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1032 return TRACE_TYPE_HANDLED;
1033 }
1034
1035 /*
1036 * If the last output failed, there's a possibility we need
1037 * to print out the missing entry which would never go out.
1038 */
1039 if (data && data->failed) {
1040 field = &data->ent;
1041 iter->cpu = data->cpu;
1042 ret = print_graph_entry(field, s, iter);
1043 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1044 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1045 ret = TRACE_TYPE_NO_CONSUME;
1046 }
1047 iter->cpu = cpu;
1048 return ret;
1049 }
946 1050
947 switch (entry->type) { 1051 switch (entry->type) {
948 case TRACE_GRAPH_ENT: { 1052 case TRACE_GRAPH_ENT: {
@@ -952,7 +1056,7 @@ print_graph_function(struct trace_iterator *iter)
952 * sizeof(struct ftrace_graph_ent_entry) is very small, 1056 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack. 1057 * it can be safely saved at the stack.
954 */ 1058 */
955 struct ftrace_graph_ent_entry *field, saved; 1059 struct ftrace_graph_ent_entry saved;
956 trace_assign_type(field, entry); 1060 trace_assign_type(field, entry);
957 saved = *field; 1061 saved = *field;
958 return print_graph_entry(&saved, s, iter); 1062 return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1134,54 @@ static void print_graph_headers(struct seq_file *s)
1030static void graph_trace_open(struct trace_iterator *iter) 1134static void graph_trace_open(struct trace_iterator *iter)
1031{ 1135{
1032 /* pid and depth on the last trace processed */ 1136 /* pid and depth on the last trace processed */
1033 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1137 struct fgraph_data *data;
1034 int cpu; 1138 int cpu;
1035 1139
1140 iter->private = NULL;
1141
1142 data = kzalloc(sizeof(*data), GFP_KERNEL);
1036 if (!data) 1143 if (!data)
1037 pr_warning("function graph tracer: not enough memory\n"); 1144 goto out_err;
1038 else 1145
1039 for_each_possible_cpu(cpu) { 1146 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
1040 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1147 if (!data->cpu_data)
1041 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1148 goto out_err_free;
1042 *pid = -1; 1149
1043 *depth = 0; 1150 for_each_possible_cpu(cpu) {
1044 } 1151 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1152 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1153 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1154 *pid = -1;
1155 *depth = 0;
1156 *ignore = 0;
1157 }
1045 1158
1046 iter->private = data; 1159 iter->private = data;
1160
1161 return;
1162
1163 out_err_free:
1164 kfree(data);
1165 out_err:
1166 pr_warning("function graph tracer: not enough memory\n");
1047} 1167}
1048 1168
1049static void graph_trace_close(struct trace_iterator *iter) 1169static void graph_trace_close(struct trace_iterator *iter)
1050{ 1170{
1051 free_percpu(iter->private); 1171 struct fgraph_data *data = iter->private;
1172
1173 if (data) {
1174 free_percpu(data->cpu_data);
1175 kfree(data);
1176 }
1052} 1177}
1053 1178
1054static struct tracer graph_trace __read_mostly = { 1179static struct tracer graph_trace __read_mostly = {
1055 .name = "function_graph", 1180 .name = "function_graph",
1056 .open = graph_trace_open, 1181 .open = graph_trace_open,
1182 .pipe_open = graph_trace_open,
1057 .close = graph_trace_close, 1183 .close = graph_trace_close,
1184 .pipe_close = graph_trace_close,
1058 .wait_pipe = poll_wait_pipe, 1185 .wait_pipe = poll_wait_pipe,
1059 .init = graph_trace_init, 1186 .init = graph_trace_init,
1060 .reset = graph_trace_reset, 1187 .reset = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 69543a905cd5..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -20,10 +20,10 @@
20 20
21#define BTS_BUFFER_SIZE (1 << 13) 21#define BTS_BUFFER_SIZE (1 << 13)
22 22
23static DEFINE_PER_CPU(struct bts_tracer *, tracer); 23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); 24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25 25
26#define this_tracer per_cpu(tracer, smp_processor_id()) 26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27 27
28static int trace_hw_branches_enabled __read_mostly; 28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly; 29static int trace_hw_branches_suspended __read_mostly;
@@ -32,12 +32,13 @@ static struct trace_array *hw_branch_trace __read_mostly;
32 32
33static void bts_trace_init_cpu(int cpu) 33static void bts_trace_init_cpu(int cpu)
34{ 34{
35 per_cpu(tracer, cpu) = 35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, 36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 NULL, (size_t)-1, BTS_KERNEL); 37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
38 39
39 if (IS_ERR(per_cpu(tracer, cpu))) 40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
40 per_cpu(tracer, cpu) = NULL; 41 per_cpu(hwb_tracer, cpu) = NULL;
41} 42}
42 43
43static int bts_trace_init(struct trace_array *tr) 44static int bts_trace_init(struct trace_array *tr)
@@ -51,7 +52,7 @@ static int bts_trace_init(struct trace_array *tr)
51 for_each_online_cpu(cpu) { 52 for_each_online_cpu(cpu) {
52 bts_trace_init_cpu(cpu); 53 bts_trace_init_cpu(cpu);
53 54
54 if (likely(per_cpu(tracer, cpu))) 55 if (likely(per_cpu(hwb_tracer, cpu)))
55 trace_hw_branches_enabled = 1; 56 trace_hw_branches_enabled = 1;
56 } 57 }
57 trace_hw_branches_suspended = 0; 58 trace_hw_branches_suspended = 0;
@@ -67,9 +68,9 @@ static void bts_trace_reset(struct trace_array *tr)
67 68
68 get_online_cpus(); 69 get_online_cpus();
69 for_each_online_cpu(cpu) { 70 for_each_online_cpu(cpu) {
70 if (likely(per_cpu(tracer, cpu))) { 71 if (likely(per_cpu(hwb_tracer, cpu))) {
71 ds_release_bts(per_cpu(tracer, cpu)); 72 ds_release_bts(per_cpu(hwb_tracer, cpu));
72 per_cpu(tracer, cpu) = NULL; 73 per_cpu(hwb_tracer, cpu) = NULL;
73 } 74 }
74 } 75 }
75 trace_hw_branches_enabled = 0; 76 trace_hw_branches_enabled = 0;
@@ -83,8 +84,8 @@ static void bts_trace_start(struct trace_array *tr)
83 84
84 get_online_cpus(); 85 get_online_cpus();
85 for_each_online_cpu(cpu) 86 for_each_online_cpu(cpu)
86 if (likely(per_cpu(tracer, cpu))) 87 if (likely(per_cpu(hwb_tracer, cpu)))
87 ds_resume_bts(per_cpu(tracer, cpu)); 88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
88 trace_hw_branches_suspended = 0; 89 trace_hw_branches_suspended = 0;
89 put_online_cpus(); 90 put_online_cpus();
90} 91}
@@ -95,8 +96,8 @@ static void bts_trace_stop(struct trace_array *tr)
95 96
96 get_online_cpus(); 97 get_online_cpus();
97 for_each_online_cpu(cpu) 98 for_each_online_cpu(cpu)
98 if (likely(per_cpu(tracer, cpu))) 99 if (likely(per_cpu(hwb_tracer, cpu)))
99 ds_suspend_bts(per_cpu(tracer, cpu)); 100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
100 trace_hw_branches_suspended = 1; 101 trace_hw_branches_suspended = 1;
101 put_online_cpus(); 102 put_online_cpus();
102} 103}
@@ -114,16 +115,16 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
114 bts_trace_init_cpu(cpu); 115 bts_trace_init_cpu(cpu);
115 116
116 if (trace_hw_branches_suspended && 117 if (trace_hw_branches_suspended &&
117 likely(per_cpu(tracer, cpu))) 118 likely(per_cpu(hwb_tracer, cpu)))
118 ds_suspend_bts(per_cpu(tracer, cpu)); 119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
119 } 120 }
120 break; 121 break;
121 122
122 case CPU_DOWN_PREPARE: 123 case CPU_DOWN_PREPARE:
123 /* The notification is sent with interrupts enabled. */ 124 /* The notification is sent with interrupts enabled. */
124 if (likely(per_cpu(tracer, cpu))) { 125 if (likely(per_cpu(hwb_tracer, cpu))) {
125 ds_release_bts(per_cpu(tracer, cpu)); 126 ds_release_bts(per_cpu(hwb_tracer, cpu));
126 per_cpu(tracer, cpu) = NULL; 127 per_cpu(hwb_tracer, cpu) = NULL;
127 } 128 }
128 } 129 }
129 130
@@ -258,8 +259,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
258 259
259 get_online_cpus(); 260 get_online_cpus();
260 for_each_online_cpu(cpu) 261 for_each_online_cpu(cpu)
261 if (likely(per_cpu(tracer, cpu))) 262 if (likely(per_cpu(hwb_tracer, cpu)))
262 ds_suspend_bts(per_cpu(tracer, cpu)); 263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
263 /* 264 /*
264 * We need to collect the trace on the respective cpu since ftrace 265 * We need to collect the trace on the respective cpu since ftrace
265 * implicitly adds the record for the current cpu. 266 * implicitly adds the record for the current cpu.
@@ -268,8 +269,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
268 on_each_cpu(trace_bts_cpu, iter->tr, 1); 269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
269 270
270 for_each_online_cpu(cpu) 271 for_each_online_cpu(cpu)
271 if (likely(per_cpu(tracer, cpu))) 272 if (likely(per_cpu(hwb_tracer, cpu)))
272 ds_resume_bts(per_cpu(tracer, cpu)); 273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
273 put_online_cpus(); 274 put_online_cpus();
274} 275}
275 276
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
151 goto out_unlock; 151 goto out_unlock;
152 152
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc);
154 156
155 if (data->critical_sequence != max_sequence) 157 if (data->critical_sequence != max_sequence)
156 goto out_unlock; 158 goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..505c92273b1a
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1487 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
95 void *dummy)
96{
97 return regs_return_value(regs);
98}
99
100static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
101 void *dummy)
102{
103 return kernel_stack_pointer(regs);
104}
105
106/* Memory fetching by symbol */
107struct symbol_cache {
108 char *symbol;
109 long offset;
110 unsigned long addr;
111};
112
113static unsigned long update_symbol_cache(struct symbol_cache *sc)
114{
115 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
116 if (sc->addr)
117 sc->addr += sc->offset;
118 return sc->addr;
119}
120
121static void free_symbol_cache(struct symbol_cache *sc)
122{
123 kfree(sc->symbol);
124 kfree(sc);
125}
126
127static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
128{
129 struct symbol_cache *sc;
130
131 if (!sym || strlen(sym) == 0)
132 return NULL;
133 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
134 if (!sc)
135 return NULL;
136
137 sc->symbol = kstrdup(sym, GFP_KERNEL);
138 if (!sc->symbol) {
139 kfree(sc);
140 return NULL;
141 }
142 sc->offset = offset;
143
144 update_symbol_cache(sc);
145 return sc;
146}
147
148static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
149{
150 struct symbol_cache *sc = data;
151
152 if (sc->addr)
153 return fetch_memory(regs, (void *)sc->addr);
154 else
155 return 0;
156}
157
158/* Special indirect memory access interface */
159struct indirect_fetch_data {
160 struct fetch_func orig;
161 long offset;
162};
163
164static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
165{
166 struct indirect_fetch_data *ind = data;
167 unsigned long addr;
168
169 addr = call_fetch(&ind->orig, regs);
170 if (addr) {
171 addr += ind->offset;
172 return fetch_memory(regs, (void *)addr);
173 } else
174 return 0;
175}
176
177static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
178{
179 if (data->orig.func == fetch_indirect)
180 free_indirect_fetch_data(data->orig.data);
181 else if (data->orig.func == fetch_symbol)
182 free_symbol_cache(data->orig.data);
183 kfree(data);
184}
185
186/**
187 * Kprobe event core functions
188 */
189
190struct probe_arg {
191 struct fetch_func fetch;
192 const char *name;
193};
194
195/* Flags for trace_probe */
196#define TP_FLAG_TRACE 1
197#define TP_FLAG_PROFILE 2
198
199struct trace_probe {
200 struct list_head list;
201 struct kretprobe rp; /* Use rp.kp for kprobe use */
202 unsigned long nhit;
203 unsigned int flags; /* For TP_FLAG_* */
204 const char *symbol; /* symbol name */
205 struct ftrace_event_call call;
206 struct trace_event event;
207 unsigned int nr_args;
208 struct probe_arg args[];
209};
210
211#define SIZEOF_TRACE_PROBE(n) \
212 (offsetof(struct trace_probe, args) + \
213 (sizeof(struct probe_arg) * (n)))
214
215static __kprobes int probe_is_return(struct trace_probe *tp)
216{
217 return tp->rp.handler != NULL;
218}
219
220static __kprobes const char *probe_symbol(struct trace_probe *tp)
221{
222 return tp->symbol ? tp->symbol : "unknown";
223}
224
225static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
226{
227 int ret = -EINVAL;
228
229 if (ff->func == fetch_register) {
230 const char *name;
231 name = regs_query_register_name((unsigned int)((long)ff->data));
232 ret = snprintf(buf, n, "%%%s", name);
233 } else if (ff->func == fetch_stack)
234 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
235 else if (ff->func == fetch_memory)
236 ret = snprintf(buf, n, "@0x%p", ff->data);
237 else if (ff->func == fetch_symbol) {
238 struct symbol_cache *sc = ff->data;
239 if (sc->offset)
240 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
241 sc->offset);
242 else
243 ret = snprintf(buf, n, "@%s", sc->symbol);
244 } else if (ff->func == fetch_retvalue)
245 ret = snprintf(buf, n, "$retval");
246 else if (ff->func == fetch_stack_address)
247 ret = snprintf(buf, n, "$stack");
248 else if (ff->func == fetch_indirect) {
249 struct indirect_fetch_data *id = ff->data;
250 size_t l = 0;
251 ret = snprintf(buf, n, "%+ld(", id->offset);
252 if (ret >= n)
253 goto end;
254 l += ret;
255 ret = probe_arg_string(buf + l, n - l, &id->orig);
256 if (ret < 0)
257 goto end;
258 l += ret;
259 ret = snprintf(buf + l, n - l, ")");
260 ret += l;
261 }
262end:
263 if (ret >= n)
264 return -ENOSPC;
265 return ret;
266}
267
268static int register_probe_event(struct trace_probe *tp);
269static void unregister_probe_event(struct trace_probe *tp);
270
271static DEFINE_MUTEX(probe_lock);
272static LIST_HEAD(probe_list);
273
274static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
275static int kretprobe_dispatcher(struct kretprobe_instance *ri,
276 struct pt_regs *regs);
277
278/* Check the name is good for event/group */
279static int check_event_name(const char *name)
280{
281 if (!isalpha(*name) && *name != '_')
282 return 0;
283 while (*++name != '\0') {
284 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
285 return 0;
286 }
287 return 1;
288}
289
290/*
291 * Allocate new trace_probe and initialize it (including kprobes).
292 */
293static struct trace_probe *alloc_trace_probe(const char *group,
294 const char *event,
295 void *addr,
296 const char *symbol,
297 unsigned long offs,
298 int nargs, int is_return)
299{
300 struct trace_probe *tp;
301 int ret = -ENOMEM;
302
303 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
304 if (!tp)
305 return ERR_PTR(ret);
306
307 if (symbol) {
308 tp->symbol = kstrdup(symbol, GFP_KERNEL);
309 if (!tp->symbol)
310 goto error;
311 tp->rp.kp.symbol_name = tp->symbol;
312 tp->rp.kp.offset = offs;
313 } else
314 tp->rp.kp.addr = addr;
315
316 if (is_return)
317 tp->rp.handler = kretprobe_dispatcher;
318 else
319 tp->rp.kp.pre_handler = kprobe_dispatcher;
320
321 if (!event || !check_event_name(event)) {
322 ret = -EINVAL;
323 goto error;
324 }
325
326 tp->call.name = kstrdup(event, GFP_KERNEL);
327 if (!tp->call.name)
328 goto error;
329
330 if (!group || !check_event_name(group)) {
331 ret = -EINVAL;
332 goto error;
333 }
334
335 tp->call.system = kstrdup(group, GFP_KERNEL);
336 if (!tp->call.system)
337 goto error;
338
339 INIT_LIST_HEAD(&tp->list);
340 return tp;
341error:
342 kfree(tp->call.name);
343 kfree(tp->symbol);
344 kfree(tp);
345 return ERR_PTR(ret);
346}
347
348static void free_probe_arg(struct probe_arg *arg)
349{
350 if (arg->fetch.func == fetch_symbol)
351 free_symbol_cache(arg->fetch.data);
352 else if (arg->fetch.func == fetch_indirect)
353 free_indirect_fetch_data(arg->fetch.data);
354 kfree(arg->name);
355}
356
357static void free_trace_probe(struct trace_probe *tp)
358{
359 int i;
360
361 for (i = 0; i < tp->nr_args; i++)
362 free_probe_arg(&tp->args[i]);
363
364 kfree(tp->call.system);
365 kfree(tp->call.name);
366 kfree(tp->symbol);
367 kfree(tp);
368}
369
370static struct trace_probe *find_probe_event(const char *event,
371 const char *group)
372{
373 struct trace_probe *tp;
374
375 list_for_each_entry(tp, &probe_list, list)
376 if (strcmp(tp->call.name, event) == 0 &&
377 strcmp(tp->call.system, group) == 0)
378 return tp;
379 return NULL;
380}
381
382/* Unregister a trace_probe and probe_event: call with locking probe_lock */
383static void unregister_trace_probe(struct trace_probe *tp)
384{
385 if (probe_is_return(tp))
386 unregister_kretprobe(&tp->rp);
387 else
388 unregister_kprobe(&tp->rp.kp);
389 list_del(&tp->list);
390 unregister_probe_event(tp);
391}
392
393/* Register a trace_probe and probe_event */
394static int register_trace_probe(struct trace_probe *tp)
395{
396 struct trace_probe *old_tp;
397 int ret;
398
399 mutex_lock(&probe_lock);
400
401 /* register as an event */
402 old_tp = find_probe_event(tp->call.name, tp->call.system);
403 if (old_tp) {
404 /* delete old event */
405 unregister_trace_probe(old_tp);
406 free_trace_probe(old_tp);
407 }
408 ret = register_probe_event(tp);
409 if (ret) {
410 pr_warning("Faild to register probe event(%d)\n", ret);
411 goto end;
412 }
413
414 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
415 if (probe_is_return(tp))
416 ret = register_kretprobe(&tp->rp);
417 else
418 ret = register_kprobe(&tp->rp.kp);
419
420 if (ret) {
421 pr_warning("Could not insert probe(%d)\n", ret);
422 if (ret == -EILSEQ) {
423 pr_warning("Probing address(0x%p) is not an "
424 "instruction boundary.\n",
425 tp->rp.kp.addr);
426 ret = -EINVAL;
427 }
428 unregister_probe_event(tp);
429 } else
430 list_add_tail(&tp->list, &probe_list);
431end:
432 mutex_unlock(&probe_lock);
433 return ret;
434}
435
436/* Split symbol and offset. */
437static int split_symbol_offset(char *symbol, unsigned long *offset)
438{
439 char *tmp;
440 int ret;
441
442 if (!offset)
443 return -EINVAL;
444
445 tmp = strchr(symbol, '+');
446 if (tmp) {
447 /* skip sign because strict_strtol doesn't accept '+' */
448 ret = strict_strtoul(tmp + 1, 0, offset);
449 if (ret)
450 return ret;
451 *tmp = '\0';
452 } else
453 *offset = 0;
454 return 0;
455}
456
457#define PARAM_MAX_ARGS 16
458#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
459
460static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
461{
462 int ret = 0;
463 unsigned long param;
464
465 if (strcmp(arg, "retval") == 0) {
466 if (is_return) {
467 ff->func = fetch_retvalue;
468 ff->data = NULL;
469 } else
470 ret = -EINVAL;
471 } else if (strncmp(arg, "stack", 5) == 0) {
472 if (arg[5] == '\0') {
473 ff->func = fetch_stack_address;
474 ff->data = NULL;
475 } else if (isdigit(arg[5])) {
476 ret = strict_strtoul(arg + 5, 10, &param);
477 if (ret || param > PARAM_MAX_STACK)
478 ret = -EINVAL;
479 else {
480 ff->func = fetch_stack;
481 ff->data = (void *)param;
482 }
483 } else
484 ret = -EINVAL;
485 } else
486 ret = -EINVAL;
487 return ret;
488}
489
490/* Recursive argument parser */
491static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
492{
493 int ret = 0;
494 unsigned long param;
495 long offset;
496 char *tmp;
497
498 switch (arg[0]) {
499 case '$':
500 ret = parse_probe_vars(arg + 1, ff, is_return);
501 break;
502 case '%': /* named register */
503 ret = regs_query_register_offset(arg + 1);
504 if (ret >= 0) {
505 ff->func = fetch_register;
506 ff->data = (void *)(unsigned long)ret;
507 ret = 0;
508 }
509 break;
510 case '@': /* memory or symbol */
511 if (isdigit(arg[1])) {
512 ret = strict_strtoul(arg + 1, 0, &param);
513 if (ret)
514 break;
515 ff->func = fetch_memory;
516 ff->data = (void *)param;
517 } else {
518 ret = split_symbol_offset(arg + 1, &offset);
519 if (ret)
520 break;
521 ff->data = alloc_symbol_cache(arg + 1, offset);
522 if (ff->data)
523 ff->func = fetch_symbol;
524 else
525 ret = -EINVAL;
526 }
527 break;
528 case '+': /* indirect memory */
529 case '-':
530 tmp = strchr(arg, '(');
531 if (!tmp) {
532 ret = -EINVAL;
533 break;
534 }
535 *tmp = '\0';
536 ret = strict_strtol(arg + 1, 0, &offset);
537 if (ret)
538 break;
539 if (arg[0] == '-')
540 offset = -offset;
541 arg = tmp + 1;
542 tmp = strrchr(arg, ')');
543 if (tmp) {
544 struct indirect_fetch_data *id;
545 *tmp = '\0';
546 id = kzalloc(sizeof(struct indirect_fetch_data),
547 GFP_KERNEL);
548 if (!id)
549 return -ENOMEM;
550 id->offset = offset;
551 ret = __parse_probe_arg(arg, &id->orig, is_return);
552 if (ret)
553 kfree(id);
554 else {
555 ff->func = fetch_indirect;
556 ff->data = (void *)id;
557 }
558 } else
559 ret = -EINVAL;
560 break;
561 default:
562 /* TODO: support custom handler */
563 ret = -EINVAL;
564 }
565 return ret;
566}
567
568/* String length checking wrapper */
569static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
570{
571 if (strlen(arg) > MAX_ARGSTR_LEN) {
572 pr_info("Argument is too long.: %s\n", arg);
573 return -ENOSPC;
574 }
575 return __parse_probe_arg(arg, ff, is_return);
576}
577
578/* Return 1 if name is reserved or already used by another argument */
579static int conflict_field_name(const char *name,
580 struct probe_arg *args, int narg)
581{
582 int i;
583 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
584 if (strcmp(reserved_field_names[i], name) == 0)
585 return 1;
586 for (i = 0; i < narg; i++)
587 if (strcmp(args[i].name, name) == 0)
588 return 1;
589 return 0;
590}
591
592static int create_trace_probe(int argc, char **argv)
593{
594 /*
595 * Argument syntax:
596 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
597 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
598 * Fetch args:
599 * $retval : fetch return value
600 * $stack : fetch stack address
601 * $stackN : fetch Nth of stack (N:0-)
602 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
603 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
604 * %REG : fetch register REG
605 * Indirect memory fetch:
606 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
607 * Alias name of args:
608 * NAME=FETCHARG : set NAME as alias of FETCHARG.
609 */
610 struct trace_probe *tp;
611 int i, ret = 0;
612 int is_return = 0, is_delete = 0;
613 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
614 unsigned long offset = 0;
615 void *addr = NULL;
616 char buf[MAX_EVENT_NAME_LEN];
617
618 /* argc must be >= 1 */
619 if (argv[0][0] == 'p')
620 is_return = 0;
621 else if (argv[0][0] == 'r')
622 is_return = 1;
623 else if (argv[0][0] == '-')
624 is_delete = 1;
625 else {
626 pr_info("Probe definition must be started with 'p', 'r' or"
627 " '-'.\n");
628 return -EINVAL;
629 }
630
631 if (argv[0][1] == ':') {
632 event = &argv[0][2];
633 if (strchr(event, '/')) {
634 group = event;
635 event = strchr(group, '/') + 1;
636 event[-1] = '\0';
637 if (strlen(group) == 0) {
638 pr_info("Group name is not specified\n");
639 return -EINVAL;
640 }
641 }
642 if (strlen(event) == 0) {
643 pr_info("Event name is not specified\n");
644 return -EINVAL;
645 }
646 }
647 if (!group)
648 group = KPROBE_EVENT_SYSTEM;
649
650 if (is_delete) {
651 if (!event) {
652 pr_info("Delete command needs an event name.\n");
653 return -EINVAL;
654 }
655 tp = find_probe_event(event, group);
656 if (!tp) {
657 pr_info("Event %s/%s doesn't exist.\n", group, event);
658 return -ENOENT;
659 }
660 /* delete an event */
661 unregister_trace_probe(tp);
662 free_trace_probe(tp);
663 return 0;
664 }
665
666 if (argc < 2) {
667 pr_info("Probe point is not specified.\n");
668 return -EINVAL;
669 }
670 if (isdigit(argv[1][0])) {
671 if (is_return) {
672 pr_info("Return probe point must be a symbol.\n");
673 return -EINVAL;
674 }
675 /* an address specified */
676 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
677 if (ret) {
678 pr_info("Failed to parse address.\n");
679 return ret;
680 }
681 } else {
682 /* a symbol specified */
683 symbol = argv[1];
684 /* TODO: support .init module functions */
685 ret = split_symbol_offset(symbol, &offset);
686 if (ret) {
687 pr_info("Failed to parse symbol.\n");
688 return ret;
689 }
690 if (offset && is_return) {
691 pr_info("Return probe must be used without offset.\n");
692 return -EINVAL;
693 }
694 }
695 argc -= 2; argv += 2;
696
697 /* setup a probe */
698 if (!event) {
699 /* Make a new event name */
700 if (symbol)
701 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
702 is_return ? 'r' : 'p', symbol, offset);
703 else
704 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
705 is_return ? 'r' : 'p', addr);
706 event = buf;
707 }
708 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
709 is_return);
710 if (IS_ERR(tp)) {
711 pr_info("Failed to allocate trace_probe.(%d)\n",
712 (int)PTR_ERR(tp));
713 return PTR_ERR(tp);
714 }
715
716 /* parse arguments */
717 ret = 0;
718 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
719 /* Parse argument name */
720 arg = strchr(argv[i], '=');
721 if (arg)
722 *arg++ = '\0';
723 else
724 arg = argv[i];
725
726 if (conflict_field_name(argv[i], tp->args, i)) {
727 pr_info("Argument%d name '%s' conflicts with "
728 "another field.\n", i, argv[i]);
729 ret = -EINVAL;
730 goto error;
731 }
732
733 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
734 if (!tp->args[i].name) {
735 pr_info("Failed to allocate argument%d name '%s'.\n",
736 i, argv[i]);
737 ret = -ENOMEM;
738 goto error;
739 }
740
741 /* Parse fetch argument */
742 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
743 if (ret) {
744 pr_info("Parse error at argument%d. (%d)\n", i, ret);
745 kfree(tp->args[i].name);
746 goto error;
747 }
748
749 tp->nr_args++;
750 }
751
752 ret = register_trace_probe(tp);
753 if (ret)
754 goto error;
755 return 0;
756
757error:
758 free_trace_probe(tp);
759 return ret;
760}
761
762static void cleanup_all_probes(void)
763{
764 struct trace_probe *tp;
765
766 mutex_lock(&probe_lock);
767 /* TODO: Use batch unregistration */
768 while (!list_empty(&probe_list)) {
769 tp = list_entry(probe_list.next, struct trace_probe, list);
770 unregister_trace_probe(tp);
771 free_trace_probe(tp);
772 }
773 mutex_unlock(&probe_lock);
774}
775
776
777/* Probes listing interfaces */
778static void *probes_seq_start(struct seq_file *m, loff_t *pos)
779{
780 mutex_lock(&probe_lock);
781 return seq_list_start(&probe_list, *pos);
782}
783
784static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
785{
786 return seq_list_next(v, &probe_list, pos);
787}
788
789static void probes_seq_stop(struct seq_file *m, void *v)
790{
791 mutex_unlock(&probe_lock);
792}
793
794static int probes_seq_show(struct seq_file *m, void *v)
795{
796 struct trace_probe *tp = v;
797 int i, ret;
798 char buf[MAX_ARGSTR_LEN + 1];
799
800 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
801 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
802
803 if (!tp->symbol)
804 seq_printf(m, " 0x%p", tp->rp.kp.addr);
805 else if (tp->rp.kp.offset)
806 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
807 else
808 seq_printf(m, " %s", probe_symbol(tp));
809
810 for (i = 0; i < tp->nr_args; i++) {
811 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
812 if (ret < 0) {
813 pr_warning("Argument%d decoding error(%d).\n", i, ret);
814 return ret;
815 }
816 seq_printf(m, " %s=%s", tp->args[i].name, buf);
817 }
818 seq_printf(m, "\n");
819 return 0;
820}
821
822static const struct seq_operations probes_seq_op = {
823 .start = probes_seq_start,
824 .next = probes_seq_next,
825 .stop = probes_seq_stop,
826 .show = probes_seq_show
827};
828
829static int probes_open(struct inode *inode, struct file *file)
830{
831 if ((file->f_mode & FMODE_WRITE) &&
832 (file->f_flags & O_TRUNC))
833 cleanup_all_probes();
834
835 return seq_open(file, &probes_seq_op);
836}
837
838static int command_trace_probe(const char *buf)
839{
840 char **argv;
841 int argc = 0, ret = 0;
842
843 argv = argv_split(GFP_KERNEL, buf, &argc);
844 if (!argv)
845 return -ENOMEM;
846
847 if (argc)
848 ret = create_trace_probe(argc, argv);
849
850 argv_free(argv);
851 return ret;
852}
853
854#define WRITE_BUFSIZE 128
855
856static ssize_t probes_write(struct file *file, const char __user *buffer,
857 size_t count, loff_t *ppos)
858{
859 char *kbuf, *tmp;
860 int ret;
861 size_t done;
862 size_t size;
863
864 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
865 if (!kbuf)
866 return -ENOMEM;
867
868 ret = done = 0;
869 while (done < count) {
870 size = count - done;
871 if (size >= WRITE_BUFSIZE)
872 size = WRITE_BUFSIZE - 1;
873 if (copy_from_user(kbuf, buffer + done, size)) {
874 ret = -EFAULT;
875 goto out;
876 }
877 kbuf[size] = '\0';
878 tmp = strchr(kbuf, '\n');
879 if (tmp) {
880 *tmp = '\0';
881 size = tmp - kbuf + 1;
882 } else if (done + size < count) {
883 pr_warning("Line length is too long: "
884 "Should be less than %d.", WRITE_BUFSIZE);
885 ret = -EINVAL;
886 goto out;
887 }
888 done += size;
889 /* Remove comments */
890 tmp = strchr(kbuf, '#');
891 if (tmp)
892 *tmp = '\0';
893
894 ret = command_trace_probe(kbuf);
895 if (ret)
896 goto out;
897 }
898 ret = done;
899out:
900 kfree(kbuf);
901 return ret;
902}
903
904static const struct file_operations kprobe_events_ops = {
905 .owner = THIS_MODULE,
906 .open = probes_open,
907 .read = seq_read,
908 .llseek = seq_lseek,
909 .release = seq_release,
910 .write = probes_write,
911};
912
913/* Probes profiling interfaces */
914static int probes_profile_seq_show(struct seq_file *m, void *v)
915{
916 struct trace_probe *tp = v;
917
918 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
919 tp->rp.kp.nmissed);
920
921 return 0;
922}
923
924static const struct seq_operations profile_seq_op = {
925 .start = probes_seq_start,
926 .next = probes_seq_next,
927 .stop = probes_seq_stop,
928 .show = probes_profile_seq_show
929};
930
931static int profile_open(struct inode *inode, struct file *file)
932{
933 return seq_open(file, &profile_seq_op);
934}
935
936static const struct file_operations kprobe_profile_ops = {
937 .owner = THIS_MODULE,
938 .open = profile_open,
939 .read = seq_read,
940 .llseek = seq_lseek,
941 .release = seq_release,
942};
943
944/* Kprobe handler */
945static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
946{
947 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
948 struct kprobe_trace_entry *entry;
949 struct ring_buffer_event *event;
950 struct ring_buffer *buffer;
951 int size, i, pc;
952 unsigned long irq_flags;
953 struct ftrace_event_call *call = &tp->call;
954
955 tp->nhit++;
956
957 local_save_flags(irq_flags);
958 pc = preempt_count();
959
960 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
961
962 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
963 irq_flags, pc);
964 if (!event)
965 return;
966
967 entry = ring_buffer_event_data(event);
968 entry->nargs = tp->nr_args;
969 entry->ip = (unsigned long)kp->addr;
970 for (i = 0; i < tp->nr_args; i++)
971 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
972
973 if (!filter_current_check_discard(buffer, call, entry, event))
974 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
975}
976
977/* Kretprobe handler */
978static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
979 struct pt_regs *regs)
980{
981 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
982 struct kretprobe_trace_entry *entry;
983 struct ring_buffer_event *event;
984 struct ring_buffer *buffer;
985 int size, i, pc;
986 unsigned long irq_flags;
987 struct ftrace_event_call *call = &tp->call;
988
989 local_save_flags(irq_flags);
990 pc = preempt_count();
991
992 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
993
994 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
995 irq_flags, pc);
996 if (!event)
997 return;
998
999 entry = ring_buffer_event_data(event);
1000 entry->nargs = tp->nr_args;
1001 entry->func = (unsigned long)tp->rp.kp.addr;
1002 entry->ret_ip = (unsigned long)ri->ret_addr;
1003 for (i = 0; i < tp->nr_args; i++)
1004 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1005
1006 if (!filter_current_check_discard(buffer, call, entry, event))
1007 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1008}
1009
1010/* Event entry printers */
1011enum print_line_t
1012print_kprobe_event(struct trace_iterator *iter, int flags)
1013{
1014 struct kprobe_trace_entry *field;
1015 struct trace_seq *s = &iter->seq;
1016 struct trace_event *event;
1017 struct trace_probe *tp;
1018 int i;
1019
1020 field = (struct kprobe_trace_entry *)iter->ent;
1021 event = ftrace_find_event(field->ent.type);
1022 tp = container_of(event, struct trace_probe, event);
1023
1024 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1025 goto partial;
1026
1027 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1028 goto partial;
1029
1030 if (!trace_seq_puts(s, ")"))
1031 goto partial;
1032
1033 for (i = 0; i < field->nargs; i++)
1034 if (!trace_seq_printf(s, " %s=%lx",
1035 tp->args[i].name, field->args[i]))
1036 goto partial;
1037
1038 if (!trace_seq_puts(s, "\n"))
1039 goto partial;
1040
1041 return TRACE_TYPE_HANDLED;
1042partial:
1043 return TRACE_TYPE_PARTIAL_LINE;
1044}
1045
1046enum print_line_t
1047print_kretprobe_event(struct trace_iterator *iter, int flags)
1048{
1049 struct kretprobe_trace_entry *field;
1050 struct trace_seq *s = &iter->seq;
1051 struct trace_event *event;
1052 struct trace_probe *tp;
1053 int i;
1054
1055 field = (struct kretprobe_trace_entry *)iter->ent;
1056 event = ftrace_find_event(field->ent.type);
1057 tp = container_of(event, struct trace_probe, event);
1058
1059 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1060 goto partial;
1061
1062 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1063 goto partial;
1064
1065 if (!trace_seq_puts(s, " <- "))
1066 goto partial;
1067
1068 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1069 goto partial;
1070
1071 if (!trace_seq_puts(s, ")"))
1072 goto partial;
1073
1074 for (i = 0; i < field->nargs; i++)
1075 if (!trace_seq_printf(s, " %s=%lx",
1076 tp->args[i].name, field->args[i]))
1077 goto partial;
1078
1079 if (!trace_seq_puts(s, "\n"))
1080 goto partial;
1081
1082 return TRACE_TYPE_HANDLED;
1083partial:
1084 return TRACE_TYPE_PARTIAL_LINE;
1085}
1086
1087static int probe_event_enable(struct ftrace_event_call *call)
1088{
1089 struct trace_probe *tp = (struct trace_probe *)call->data;
1090
1091 tp->flags |= TP_FLAG_TRACE;
1092 if (probe_is_return(tp))
1093 return enable_kretprobe(&tp->rp);
1094 else
1095 return enable_kprobe(&tp->rp.kp);
1096}
1097
1098static void probe_event_disable(struct ftrace_event_call *call)
1099{
1100 struct trace_probe *tp = (struct trace_probe *)call->data;
1101
1102 tp->flags &= ~TP_FLAG_TRACE;
1103 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1104 if (probe_is_return(tp))
1105 disable_kretprobe(&tp->rp);
1106 else
1107 disable_kprobe(&tp->rp.kp);
1108 }
1109}
1110
1111static int probe_event_raw_init(struct ftrace_event_call *event_call)
1112{
1113 INIT_LIST_HEAD(&event_call->fields);
1114
1115 return 0;
1116}
1117
1118#undef DEFINE_FIELD
1119#define DEFINE_FIELD(type, item, name, is_signed) \
1120 do { \
1121 ret = trace_define_field(event_call, #type, name, \
1122 offsetof(typeof(field), item), \
1123 sizeof(field.item), is_signed, \
1124 FILTER_OTHER); \
1125 if (ret) \
1126 return ret; \
1127 } while (0)
1128
1129static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1130{
1131 int ret, i;
1132 struct kprobe_trace_entry field;
1133 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1134
1135 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1136 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1137 /* Set argument names as fields */
1138 for (i = 0; i < tp->nr_args; i++)
1139 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1140 return 0;
1141}
1142
1143static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1144{
1145 int ret, i;
1146 struct kretprobe_trace_entry field;
1147 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1148
1149 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1150 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1151 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1152 /* Set argument names as fields */
1153 for (i = 0; i < tp->nr_args; i++)
1154 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1155 return 0;
1156}
1157
1158static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1159{
1160 int i;
1161 int pos = 0;
1162
1163 const char *fmt, *arg;
1164
1165 if (!probe_is_return(tp)) {
1166 fmt = "(%lx)";
1167 arg = "REC->" FIELD_STRING_IP;
1168 } else {
1169 fmt = "(%lx <- %lx)";
1170 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1171 }
1172
1173 /* When len=0, we just calculate the needed length */
1174#define LEN_OR_ZERO (len ? len - pos : 0)
1175
1176 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1177
1178 for (i = 0; i < tp->nr_args; i++) {
1179 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx",
1180 tp->args[i].name);
1181 }
1182
1183 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1184
1185 for (i = 0; i < tp->nr_args; i++) {
1186 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1187 tp->args[i].name);
1188 }
1189
1190#undef LEN_OR_ZERO
1191
1192 /* return the length of print_fmt */
1193 return pos;
1194}
1195
1196static int set_print_fmt(struct trace_probe *tp)
1197{
1198 int len;
1199 char *print_fmt;
1200
1201 /* First: called with 0 length to calculate the needed length */
1202 len = __set_print_fmt(tp, NULL, 0);
1203 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1204 if (!print_fmt)
1205 return -ENOMEM;
1206
1207 /* Second: actually write the @print_fmt */
1208 __set_print_fmt(tp, print_fmt, len + 1);
1209 tp->call.print_fmt = print_fmt;
1210
1211 return 0;
1212}
1213
1214#ifdef CONFIG_PERF_EVENTS
1215
1216/* Kprobe profile handler */
1217static __kprobes void kprobe_profile_func(struct kprobe *kp,
1218 struct pt_regs *regs)
1219{
1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1221 struct ftrace_event_call *call = &tp->call;
1222 struct kprobe_trace_entry *entry;
1223 int size, __size, i;
1224 unsigned long irq_flags;
1225 int rctx;
1226
1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1229 size -= sizeof(u32);
1230 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1231 "profile buffer not large enough"))
1232 return;
1233
1234 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
1235 if (!entry)
1236 return;
1237
1238 entry->nargs = tp->nr_args;
1239 entry->ip = (unsigned long)kp->addr;
1240 for (i = 0; i < tp->nr_args; i++)
1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1242
1243 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
1244}
1245
1246/* Kretprobe profile handler */
1247static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1248 struct pt_regs *regs)
1249{
1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1251 struct ftrace_event_call *call = &tp->call;
1252 struct kretprobe_trace_entry *entry;
1253 int size, __size, i;
1254 unsigned long irq_flags;
1255 int rctx;
1256
1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1259 size -= sizeof(u32);
1260 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1261 "profile buffer not large enough"))
1262 return;
1263
1264 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
1265 if (!entry)
1266 return;
1267
1268 entry->nargs = tp->nr_args;
1269 entry->func = (unsigned long)tp->rp.kp.addr;
1270 entry->ret_ip = (unsigned long)ri->ret_addr;
1271 for (i = 0; i < tp->nr_args; i++)
1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1273
1274 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
1275}
1276
1277static int probe_profile_enable(struct ftrace_event_call *call)
1278{
1279 struct trace_probe *tp = (struct trace_probe *)call->data;
1280
1281 tp->flags |= TP_FLAG_PROFILE;
1282
1283 if (probe_is_return(tp))
1284 return enable_kretprobe(&tp->rp);
1285 else
1286 return enable_kprobe(&tp->rp.kp);
1287}
1288
1289static void probe_profile_disable(struct ftrace_event_call *call)
1290{
1291 struct trace_probe *tp = (struct trace_probe *)call->data;
1292
1293 tp->flags &= ~TP_FLAG_PROFILE;
1294
1295 if (!(tp->flags & TP_FLAG_TRACE)) {
1296 if (probe_is_return(tp))
1297 disable_kretprobe(&tp->rp);
1298 else
1299 disable_kprobe(&tp->rp.kp);
1300 }
1301}
1302#endif /* CONFIG_PERF_EVENTS */
1303
1304
1305static __kprobes
1306int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1307{
1308 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1309
1310 if (tp->flags & TP_FLAG_TRACE)
1311 kprobe_trace_func(kp, regs);
1312#ifdef CONFIG_PERF_EVENTS
1313 if (tp->flags & TP_FLAG_PROFILE)
1314 kprobe_profile_func(kp, regs);
1315#endif
1316 return 0; /* We don't tweek kernel, so just return 0 */
1317}
1318
1319static __kprobes
1320int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1321{
1322 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1323
1324 if (tp->flags & TP_FLAG_TRACE)
1325 kretprobe_trace_func(ri, regs);
1326#ifdef CONFIG_PERF_EVENTS
1327 if (tp->flags & TP_FLAG_PROFILE)
1328 kretprobe_profile_func(ri, regs);
1329#endif
1330 return 0; /* We don't tweek kernel, so just return 0 */
1331}
1332
1333static int register_probe_event(struct trace_probe *tp)
1334{
1335 struct ftrace_event_call *call = &tp->call;
1336 int ret;
1337
1338 /* Initialize ftrace_event_call */
1339 if (probe_is_return(tp)) {
1340 tp->event.trace = print_kretprobe_event;
1341 call->raw_init = probe_event_raw_init;
1342 call->define_fields = kretprobe_event_define_fields;
1343 } else {
1344 tp->event.trace = print_kprobe_event;
1345 call->raw_init = probe_event_raw_init;
1346 call->define_fields = kprobe_event_define_fields;
1347 }
1348 if (set_print_fmt(tp) < 0)
1349 return -ENOMEM;
1350 call->event = &tp->event;
1351 call->id = register_ftrace_event(&tp->event);
1352 if (!call->id) {
1353 kfree(call->print_fmt);
1354 return -ENODEV;
1355 }
1356 call->enabled = 0;
1357 call->regfunc = probe_event_enable;
1358 call->unregfunc = probe_event_disable;
1359
1360#ifdef CONFIG_PERF_EVENTS
1361 call->profile_enable = probe_profile_enable;
1362 call->profile_disable = probe_profile_disable;
1363#endif
1364 call->data = tp;
1365 ret = trace_add_event_call(call);
1366 if (ret) {
1367 pr_info("Failed to register kprobe event: %s\n", call->name);
1368 kfree(call->print_fmt);
1369 unregister_ftrace_event(&tp->event);
1370 }
1371 return ret;
1372}
1373
1374static void unregister_probe_event(struct trace_probe *tp)
1375{
1376 /* tp->event is unregistered in trace_remove_event_call() */
1377 trace_remove_event_call(&tp->call);
1378 kfree(tp->call.print_fmt);
1379}
1380
1381/* Make a debugfs interface for controling probe points */
1382static __init int init_kprobe_trace(void)
1383{
1384 struct dentry *d_tracer;
1385 struct dentry *entry;
1386
1387 d_tracer = tracing_init_dentry();
1388 if (!d_tracer)
1389 return 0;
1390
1391 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1392 NULL, &kprobe_events_ops);
1393
1394 /* Event list interface */
1395 if (!entry)
1396 pr_warning("Could not create debugfs "
1397 "'kprobe_events' entry\n");
1398
1399 /* Profile interface */
1400 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1401 NULL, &kprobe_profile_ops);
1402
1403 if (!entry)
1404 pr_warning("Could not create debugfs "
1405 "'kprobe_profile' entry\n");
1406 return 0;
1407}
1408fs_initcall(init_kprobe_trace);
1409
1410
1411#ifdef CONFIG_FTRACE_STARTUP_TEST
1412
1413static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1414 int a4, int a5, int a6)
1415{
1416 return a1 + a2 + a3 + a4 + a5 + a6;
1417}
1418
1419static __init int kprobe_trace_self_tests_init(void)
1420{
1421 int ret, warn = 0;
1422 int (*target)(int, int, int, int, int, int);
1423 struct trace_probe *tp;
1424
1425 target = kprobe_trace_selftest_target;
1426
1427 pr_info("Testing kprobe tracing: ");
1428
1429 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1430 "$stack $stack0 +0($stack)");
1431 if (WARN_ON_ONCE(ret)) {
1432 pr_warning("error on probing function entry.\n");
1433 warn++;
1434 } else {
1435 /* Enable trace point */
1436 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1437 if (WARN_ON_ONCE(tp == NULL)) {
1438 pr_warning("error on getting new probe.\n");
1439 warn++;
1440 } else
1441 probe_event_enable(&tp->call);
1442 }
1443
1444 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1445 "$retval");
1446 if (WARN_ON_ONCE(ret)) {
1447 pr_warning("error on probing function return.\n");
1448 warn++;
1449 } else {
1450 /* Enable trace point */
1451 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1452 if (WARN_ON_ONCE(tp == NULL)) {
1453 pr_warning("error on getting new probe.\n");
1454 warn++;
1455 } else
1456 probe_event_enable(&tp->call);
1457 }
1458
1459 if (warn)
1460 goto end;
1461
1462 ret = target(1, 2, 3, 4, 5, 6);
1463
1464 ret = command_trace_probe("-:testprobe");
1465 if (WARN_ON_ONCE(ret)) {
1466 pr_warning("error on deleting a probe.\n");
1467 warn++;
1468 }
1469
1470 ret = command_trace_probe("-:testprobe2");
1471 if (WARN_ON_ONCE(ret)) {
1472 pr_warning("error on deleting a probe.\n");
1473 warn++;
1474 }
1475
1476end:
1477 cleanup_all_probes();
1478 if (warn)
1479 pr_cont("NG: Some tests are failed. Please check them.\n");
1480 else
1481 pr_cont("OK\n");
1482 return 0;
1483}
1484
1485late_initcall(kprobe_trace_self_tests_init);
1486
1487#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..94103cdcf9d8
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,519 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace.h"
30
31#include <linux/hw_breakpoint.h>
32#include <asm/hw_breakpoint.h>
33
34#include <asm/atomic.h>
35
36/*
37 * For now, let us restrict the no. of symbols traced simultaneously to number
38 * of available hardware breakpoint registers.
39 */
40#define KSYM_TRACER_MAX HBP_NUM
41
42#define KSYM_TRACER_OP_LEN 3 /* rw- */
43
44struct trace_ksym {
45 struct perf_event **ksym_hbp;
46 struct perf_event_attr attr;
47#ifdef CONFIG_PROFILE_KSYM_TRACER
48 atomic64_t counter;
49#endif
50 struct hlist_node ksym_hlist;
51};
52
53static struct trace_array *ksym_trace_array;
54
55static unsigned int ksym_filter_entry_count;
56static unsigned int ksym_tracing_enabled;
57
58static HLIST_HEAD(ksym_filter_head);
59
60static DEFINE_MUTEX(ksym_tracer_mutex);
61
62#ifdef CONFIG_PROFILE_KSYM_TRACER
63
64#define MAX_UL_INT 0xffffffff
65
66void ksym_collect_stats(unsigned long hbp_hit_addr)
67{
68 struct hlist_node *node;
69 struct trace_ksym *entry;
70
71 rcu_read_lock();
72 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
73 if (entry->attr.bp_addr == hbp_hit_addr) {
74 atomic64_inc(&entry->counter);
75 break;
76 }
77 }
78 rcu_read_unlock();
79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81
82void ksym_hbp_handler(struct perf_event *hbp, int nmi,
83 struct perf_sample_data *data,
84 struct pt_regs *regs)
85{
86 struct ring_buffer_event *event;
87 struct ksym_trace_entry *entry;
88 struct ring_buffer *buffer;
89 int pc;
90
91 if (!ksym_tracing_enabled)
92 return;
93
94 buffer = ksym_trace_array->buffer;
95
96 pc = preempt_count();
97
98 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
99 sizeof(*entry), 0, pc);
100 if (!event)
101 return;
102
103 entry = ring_buffer_event_data(event);
104 entry->ip = instruction_pointer(regs);
105 entry->type = hw_breakpoint_type(hbp);
106 entry->addr = hw_breakpoint_addr(hbp);
107 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
108
109#ifdef CONFIG_PROFILE_KSYM_TRACER
110 ksym_collect_stats(hw_breakpoint_addr(hbp));
111#endif /* CONFIG_PROFILE_KSYM_TRACER */
112
113 trace_buffer_unlock_commit(buffer, event, 0, pc);
114}
115
116/* Valid access types are represented as
117 *
118 * rw- : Set Read/Write Access Breakpoint
119 * -w- : Set Write Access Breakpoint
120 * --- : Clear Breakpoints
121 * --x : Set Execution Break points (Not available yet)
122 *
123 */
124static int ksym_trace_get_access_type(char *str)
125{
126 int access = 0;
127
128 if (str[0] == 'r')
129 access |= HW_BREAKPOINT_R;
130
131 if (str[1] == 'w')
132 access |= HW_BREAKPOINT_W;
133
134 if (str[2] == 'x')
135 access |= HW_BREAKPOINT_X;
136
137 switch (access) {
138 case HW_BREAKPOINT_R:
139 case HW_BREAKPOINT_W:
140 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
141 return access;
142 default:
143 return -EINVAL;
144 }
145}
146
147/*
148 * There can be several possible malformed requests and we attempt to capture
149 * all of them. We enumerate some of the rules
150 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
151 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
152 * <module>:<ksym_name>:<op>.
153 * 2. No delimiter symbol ':' in the input string
154 * 3. Spurious operator symbols or symbols not in their respective positions
155 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
156 * 5. Kernel symbol not a part of /proc/kallsyms
157 * 6. Duplicate requests
158 */
159static int parse_ksym_trace_str(char *input_string, char **ksymname,
160 unsigned long *addr)
161{
162 int ret;
163
164 *ksymname = strsep(&input_string, ":");
165 *addr = kallsyms_lookup_name(*ksymname);
166
167 /* Check for malformed request: (2), (1) and (5) */
168 if ((!input_string) ||
169 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
170 (*addr == 0))
171 return -EINVAL;;
172
173 ret = ksym_trace_get_access_type(input_string);
174
175 return ret;
176}
177
178int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
179{
180 struct trace_ksym *entry;
181 int ret = -ENOMEM;
182
183 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
184 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
185 " new requests for tracing can be accepted now.\n",
186 KSYM_TRACER_MAX);
187 return -ENOSPC;
188 }
189
190 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
191 if (!entry)
192 return -ENOMEM;
193
194 hw_breakpoint_init(&entry->attr);
195
196 entry->attr.bp_type = op;
197 entry->attr.bp_addr = addr;
198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
199
200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
201 ksym_hbp_handler);
202
203 if (IS_ERR(entry->ksym_hbp)) {
204 ret = PTR_ERR(entry->ksym_hbp);
205 printk(KERN_INFO "ksym_tracer request failed. Try again"
206 " later!!\n");
207 goto err;
208 }
209
210 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
211 ksym_filter_entry_count++;
212
213 return 0;
214
215err:
216 kfree(entry);
217
218 return ret;
219}
220
221static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
222 size_t count, loff_t *ppos)
223{
224 struct trace_ksym *entry;
225 struct hlist_node *node;
226 struct trace_seq *s;
227 ssize_t cnt = 0;
228 int ret;
229
230 s = kmalloc(sizeof(*s), GFP_KERNEL);
231 if (!s)
232 return -ENOMEM;
233 trace_seq_init(s);
234
235 mutex_lock(&ksym_tracer_mutex);
236
237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
238 ret = trace_seq_printf(s, "%pS:",
239 (void *)(unsigned long)entry->attr.bp_addr);
240 if (entry->attr.bp_type == HW_BREAKPOINT_R)
241 ret = trace_seq_puts(s, "r--\n");
242 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
243 ret = trace_seq_puts(s, "-w-\n");
244 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
245 ret = trace_seq_puts(s, "rw-\n");
246 WARN_ON_ONCE(!ret);
247 }
248
249 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
250
251 mutex_unlock(&ksym_tracer_mutex);
252
253 kfree(s);
254
255 return cnt;
256}
257
258static void __ksym_trace_reset(void)
259{
260 struct trace_ksym *entry;
261 struct hlist_node *node, *node1;
262
263 mutex_lock(&ksym_tracer_mutex);
264 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
265 ksym_hlist) {
266 unregister_wide_hw_breakpoint(entry->ksym_hbp);
267 ksym_filter_entry_count--;
268 hlist_del_rcu(&(entry->ksym_hlist));
269 synchronize_rcu();
270 kfree(entry);
271 }
272 mutex_unlock(&ksym_tracer_mutex);
273}
274
275static ssize_t ksym_trace_filter_write(struct file *file,
276 const char __user *buffer,
277 size_t count, loff_t *ppos)
278{
279 struct trace_ksym *entry;
280 struct hlist_node *node;
281 char *buf, *input_string, *ksymname = NULL;
282 unsigned long ksym_addr = 0;
283 int ret, op, changed = 0;
284
285 buf = kzalloc(count + 1, GFP_KERNEL);
286 if (!buf)
287 return -ENOMEM;
288
289 ret = -EFAULT;
290 if (copy_from_user(buf, buffer, count))
291 goto out;
292
293 buf[count] = '\0';
294 input_string = strstrip(buf);
295
296 /*
297 * Clear all breakpoints if:
298 * 1: echo > ksym_trace_filter
299 * 2: echo 0 > ksym_trace_filter
300 * 3: echo "*:---" > ksym_trace_filter
301 */
302 if (!input_string[0] || !strcmp(input_string, "0") ||
303 !strcmp(input_string, "*:---")) {
304 __ksym_trace_reset();
305 ret = 0;
306 goto out;
307 }
308
309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
310 if (ret < 0)
311 goto out;
312
313 mutex_lock(&ksym_tracer_mutex);
314
315 ret = -EINVAL;
316 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
317 if (entry->attr.bp_addr == ksym_addr) {
318 /* Check for malformed request: (6) */
319 if (entry->attr.bp_type != op)
320 changed = 1;
321 else
322 goto out_unlock;
323 break;
324 }
325 }
326 if (changed) {
327 unregister_wide_hw_breakpoint(entry->ksym_hbp);
328 entry->attr.bp_type = op;
329 ret = 0;
330 if (op > 0) {
331 entry->ksym_hbp =
332 register_wide_hw_breakpoint(&entry->attr,
333 ksym_hbp_handler);
334 if (IS_ERR(entry->ksym_hbp))
335 ret = PTR_ERR(entry->ksym_hbp);
336 else
337 goto out_unlock;
338 }
339 /* Error or "symbol:---" case: drop it */
340 ksym_filter_entry_count--;
341 hlist_del_rcu(&(entry->ksym_hlist));
342 synchronize_rcu();
343 kfree(entry);
344 goto out_unlock;
345 } else {
346 /* Check for malformed request: (4) */
347 if (op)
348 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
349 }
350out_unlock:
351 mutex_unlock(&ksym_tracer_mutex);
352out:
353 kfree(buf);
354 return !ret ? count : ret;
355}
356
357static const struct file_operations ksym_tracing_fops = {
358 .open = tracing_open_generic,
359 .read = ksym_trace_filter_read,
360 .write = ksym_trace_filter_write,
361};
362
363static void ksym_trace_reset(struct trace_array *tr)
364{
365 ksym_tracing_enabled = 0;
366 __ksym_trace_reset();
367}
368
369static int ksym_trace_init(struct trace_array *tr)
370{
371 int cpu, ret = 0;
372
373 for_each_online_cpu(cpu)
374 tracing_reset(tr, cpu);
375 ksym_tracing_enabled = 1;
376 ksym_trace_array = tr;
377
378 return ret;
379}
380
381static void ksym_trace_print_header(struct seq_file *m)
382{
383 seq_puts(m,
384 "# TASK-PID CPU# Symbol "
385 "Type Function\n");
386 seq_puts(m,
387 "# | | | "
388 " | |\n");
389}
390
391static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
392{
393 struct trace_entry *entry = iter->ent;
394 struct trace_seq *s = &iter->seq;
395 struct ksym_trace_entry *field;
396 char str[KSYM_SYMBOL_LEN];
397 int ret;
398
399 if (entry->type != TRACE_KSYM)
400 return TRACE_TYPE_UNHANDLED;
401
402 trace_assign_type(field, entry);
403
404 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
405 entry->pid, iter->cpu, (char *)field->addr);
406 if (!ret)
407 return TRACE_TYPE_PARTIAL_LINE;
408
409 switch (field->type) {
410 case HW_BREAKPOINT_R:
411 ret = trace_seq_printf(s, " R ");
412 break;
413 case HW_BREAKPOINT_W:
414 ret = trace_seq_printf(s, " W ");
415 break;
416 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
417 ret = trace_seq_printf(s, " RW ");
418 break;
419 default:
420 return TRACE_TYPE_PARTIAL_LINE;
421 }
422
423 if (!ret)
424 return TRACE_TYPE_PARTIAL_LINE;
425
426 sprint_symbol(str, field->ip);
427 ret = trace_seq_printf(s, "%s\n", str);
428 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE;
430
431 return TRACE_TYPE_HANDLED;
432}
433
434struct tracer ksym_tracer __read_mostly =
435{
436 .name = "ksym_tracer",
437 .init = ksym_trace_init,
438 .reset = ksym_trace_reset,
439#ifdef CONFIG_FTRACE_SELFTEST
440 .selftest = trace_selftest_startup_ksym,
441#endif
442 .print_header = ksym_trace_print_header,
443 .print_line = ksym_trace_output
444};
445
446#ifdef CONFIG_PROFILE_KSYM_TRACER
447static int ksym_profile_show(struct seq_file *m, void *v)
448{
449 struct hlist_node *node;
450 struct trace_ksym *entry;
451 int access_type = 0;
452 char fn_name[KSYM_NAME_LEN];
453
454 seq_puts(m, " Access Type ");
455 seq_puts(m, " Symbol Counter\n");
456 seq_puts(m, " ----------- ");
457 seq_puts(m, " ------ -------\n");
458
459 rcu_read_lock();
460 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
461
462 access_type = entry->attr.bp_type;
463
464 switch (access_type) {
465 case HW_BREAKPOINT_R:
466 seq_puts(m, " R ");
467 break;
468 case HW_BREAKPOINT_W:
469 seq_puts(m, " W ");
470 break;
471 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
472 seq_puts(m, " RW ");
473 break;
474 default:
475 seq_puts(m, " NA ");
476 }
477
478 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
479 seq_printf(m, " %-36s", fn_name);
480 else
481 seq_printf(m, " %-36s", "<NA>");
482 seq_printf(m, " %15llu\n",
483 (unsigned long long)atomic64_read(&entry->counter));
484 }
485 rcu_read_unlock();
486
487 return 0;
488}
489
490static int ksym_profile_open(struct inode *node, struct file *file)
491{
492 return single_open(file, ksym_profile_show, NULL);
493}
494
495static const struct file_operations ksym_profile_fops = {
496 .open = ksym_profile_open,
497 .read = seq_read,
498 .llseek = seq_lseek,
499 .release = single_release,
500};
501#endif /* CONFIG_PROFILE_KSYM_TRACER */
502
503__init static int init_ksym_trace(void)
504{
505 struct dentry *d_tracer;
506
507 d_tracer = tracing_init_dentry();
508
509 trace_create_file("ksym_trace_filter", 0644, d_tracer,
510 NULL, &ksym_tracing_fops);
511
512#ifdef CONFIG_PROFILE_KSYM_TRACER
513 trace_create_file("ksym_profile", 0444, d_tracer,
514 NULL, &ksym_profile_fops);
515#endif
516
517 return register_tracer(&ksym_tracer);
518}
519device_initcall(init_ksym_trace);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
85 va_list ap; 93 va_list ap;
86 int ret; 94 int ret;
87 95
88 if (!len) 96 if (s->full || !len)
89 return 0; 97 return 0;
90 98
91 va_start(ap, fmt); 99 va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
93 va_end(ap); 101 va_end(ap);
94 102
95 /* If we can't write it all, don't bother writing anything */ 103 /* If we can't write it all, don't bother writing anything */
96 if (ret >= len) 104 if (ret >= len) {
105 s->full = 1;
97 return 0; 106 return 0;
107 }
98 108
99 s->len += ret; 109 s->len += ret;
100 110
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
119 int len = (PAGE_SIZE - 1) - s->len; 129 int len = (PAGE_SIZE - 1) - s->len;
120 int ret; 130 int ret;
121 131
122 if (!len) 132 if (s->full || !len)
123 return 0; 133 return 0;
124 134
125 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 135 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
126 136
127 /* If we can't write it all, don't bother writing anything */ 137 /* If we can't write it all, don't bother writing anything */
128 if (ret >= len) 138 if (ret >= len) {
139 s->full = 1;
129 return 0; 140 return 0;
141 }
130 142
131 s->len += ret; 143 s->len += ret;
132 144
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
139 int len = (PAGE_SIZE - 1) - s->len; 151 int len = (PAGE_SIZE - 1) - s->len;
140 int ret; 152 int ret;
141 153
142 if (!len) 154 if (s->full || !len)
143 return 0; 155 return 0;
144 156
145 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 157 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
146 158
147 /* If we can't write it all, don't bother writing anything */ 159 /* If we can't write it all, don't bother writing anything */
148 if (ret >= len) 160 if (ret >= len) {
161 s->full = 1;
149 return 0; 162 return 0;
163 }
150 164
151 s->len += ret; 165 s->len += ret;
152 166
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
167{ 181{
168 int len = strlen(str); 182 int len = strlen(str);
169 183
170 if (len > ((PAGE_SIZE - 1) - s->len)) 184 if (s->full)
185 return 0;
186
187 if (len > ((PAGE_SIZE - 1) - s->len)) {
188 s->full = 1;
171 return 0; 189 return 0;
190 }
172 191
173 memcpy(s->buffer + s->len, str, len); 192 memcpy(s->buffer + s->len, str, len);
174 s->len += len; 193 s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
178 197
179int trace_seq_putc(struct trace_seq *s, unsigned char c) 198int trace_seq_putc(struct trace_seq *s, unsigned char c)
180{ 199{
181 if (s->len >= (PAGE_SIZE - 1)) 200 if (s->full)
182 return 0; 201 return 0;
183 202
203 if (s->len >= (PAGE_SIZE - 1)) {
204 s->full = 1;
205 return 0;
206 }
207
184 s->buffer[s->len++] = c; 208 s->buffer[s->len++] = c;
185 209
186 return 1; 210 return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
188 212
189int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
190{ 214{
191 if (len > ((PAGE_SIZE - 1) - s->len)) 215 if (s->full)
192 return 0; 216 return 0;
193 217
218 if (len > ((PAGE_SIZE - 1) - s->len)) {
219 s->full = 1;
220 return 0;
221 }
222
194 memcpy(s->buffer + s->len, mem, len); 223 memcpy(s->buffer + s->len, mem, len);
195 s->len += len; 224 s->len += len;
196 225
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
203 const unsigned char *data = mem; 232 const unsigned char *data = mem;
204 int i, j; 233 int i, j;
205 234
235 if (s->full)
236 return 0;
237
206#ifdef __BIG_ENDIAN 238#ifdef __BIG_ENDIAN
207 for (i = 0, j = 0; i < len; i++) { 239 for (i = 0, j = 0; i < len; i++) {
208#else 240#else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
220{ 252{
221 void *ret; 253 void *ret;
222 254
223 if (len > ((PAGE_SIZE - 1) - s->len)) 255 if (s->full)
256 return 0;
257
258 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1;
224 return NULL; 260 return NULL;
261 }
225 262
226 ret = s->buffer + s->len; 263 ret = s->buffer + s->len;
227 s->len += len; 264 s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
233{ 270{
234 unsigned char *p; 271 unsigned char *p;
235 272
236 if (s->len >= (PAGE_SIZE - 1)) 273 if (s->full)
274 return 0;
275
276 if (s->len >= (PAGE_SIZE - 1)) {
277 s->full = 1;
237 return 0; 278 return 0;
279 }
280
238 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 281 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
239 if (!IS_ERR(p)) { 282 if (!IS_ERR(p)) {
240 p = mangle_path(s->buffer + s->len, p, "\n"); 283 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
247 return 1; 290 return 1;
248 } 291 }
249 292
293 s->full = 1;
250 return 0; 294 return 0;
251} 295}
252 296
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
373 unsigned long vmstart = 0; 417 unsigned long vmstart = 0;
374 int ret = 1; 418 int ret = 1;
375 419
420 if (s->full)
421 return 0;
422
376 if (mm) { 423 if (mm) {
377 const struct vm_area_struct *vma; 424 const struct vm_area_struct *vma;
378 425
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int wakeup_current_cpu;
28static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
29static int wakeup_rt; 29static int wakeup_rt;
30 30
31static raw_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void __wakeup_reset(struct trace_array *tr); 34static void __wakeup_reset(struct trace_array *tr);
35 35
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
143 goto out; 143 goto out;
144 144
145 local_irq_save(flags); 145 local_irq_save(flags);
146 __raw_spin_lock(&wakeup_lock); 146 arch_spin_lock(&wakeup_lock);
147 147
148 /* We could race with grabbing wakeup_lock */ 148 /* We could race with grabbing wakeup_lock */
149 if (unlikely(!tracer_enabled || next != wakeup_task)) 149 if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
169 169
170out_unlock: 170out_unlock:
171 __wakeup_reset(wakeup_trace); 171 __wakeup_reset(wakeup_trace);
172 __raw_spin_unlock(&wakeup_lock); 172 arch_spin_unlock(&wakeup_lock);
173 local_irq_restore(flags); 173 local_irq_restore(flags);
174out: 174out:
175 atomic_dec(&wakeup_trace->data[cpu]->disabled); 175 atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
193 tracing_reset_online_cpus(tr); 193 tracing_reset_online_cpus(tr);
194 194
195 local_irq_save(flags); 195 local_irq_save(flags);
196 __raw_spin_lock(&wakeup_lock); 196 arch_spin_lock(&wakeup_lock);
197 __wakeup_reset(tr); 197 __wakeup_reset(tr);
198 __raw_spin_unlock(&wakeup_lock); 198 arch_spin_unlock(&wakeup_lock);
199 local_irq_restore(flags); 199 local_irq_restore(flags);
200} 200}
201 201
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
225 goto out; 225 goto out;
226 226
227 /* interrupts should be off from try_to_wake_up */ 227 /* interrupts should be off from try_to_wake_up */
228 __raw_spin_lock(&wakeup_lock); 228 arch_spin_lock(&wakeup_lock);
229 229
230 /* check for races. */ 230 /* check for races. */
231 if (!tracer_enabled || p->prio >= wakeup_prio) 231 if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 256
257out_locked: 257out_locked:
258 __raw_spin_unlock(&wakeup_lock); 258 arch_spin_unlock(&wakeup_lock);
259out: 259out:
260 atomic_dec(&wakeup_trace->data[cpu]->disabled); 260 atomic_dec(&wakeup_trace->data[cpu]->disabled);
261} 261}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -66,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
66 67
67 /* Don't allow flipping of max traces now */ 68 /* Don't allow flipping of max traces now */
68 local_irq_save(flags); 69 local_irq_save(flags);
69 __raw_spin_lock(&ftrace_max_lock); 70 arch_spin_lock(&ftrace_max_lock);
70 71
71 cnt = ring_buffer_entries(tr->buffer); 72 cnt = ring_buffer_entries(tr->buffer);
72 73
@@ -84,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
84 break; 85 break;
85 } 86 }
86 tracing_on(); 87 tracing_on();
87 __raw_spin_unlock(&ftrace_max_lock); 88 arch_spin_unlock(&ftrace_max_lock);
88 local_irq_restore(flags); 89 local_irq_restore(flags);
89 90
90 if (count) 91 if (count)
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
27}; 27};
28 28
29static unsigned long max_stack_size; 29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 32
33static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
54 return; 54 return;
55 55
56 local_irq_save(flags); 56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock); 57 arch_spin_lock(&max_stack_lock);
58 58
59 /* a race could have already updated it */ 59 /* a race could have already updated it */
60 if (this_size <= max_stack_size) 60 if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
103 } 103 }
104 104
105 out: 105 out:
106 __raw_spin_unlock(&max_stack_lock); 106 arch_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags); 107 local_irq_restore(flags);
108} 108}
109 109
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock); 175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -206,8 +218,14 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 218
207static void *t_start(struct seq_file *m, loff_t *pos) 219static void *t_start(struct seq_file *m, loff_t *pos)
208{ 220{
221 int cpu;
222
209 local_irq_disable(); 223 local_irq_disable();
210 __raw_spin_lock(&max_stack_lock); 224
225 cpu = smp_processor_id();
226 per_cpu(trace_active, cpu)++;
227
228 arch_spin_lock(&max_stack_lock);
211 229
212 if (*pos == 0) 230 if (*pos == 0)
213 return SEQ_START_TOKEN; 231 return SEQ_START_TOKEN;
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 235
218static void t_stop(struct seq_file *m, void *p) 236static void t_stop(struct seq_file *m, void *p)
219{ 237{
220 __raw_spin_unlock(&max_stack_lock); 238 int cpu;
239
240 arch_spin_unlock(&max_stack_lock);
241
242 cpu = smp_processor_id();
243 per_cpu(trace_active, cpu)--;
244
221 local_irq_enable(); 245 local_irq_enable();
222} 246}
223 247
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 527e17eae575..cba47d7935cc 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -14,6 +14,43 @@ static int sys_refcount_exit;
14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16 16
17extern unsigned long __start_syscalls_metadata[];
18extern unsigned long __stop_syscalls_metadata[];
19
20static struct syscall_metadata **syscalls_metadata;
21
22static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
23{
24 struct syscall_metadata *start;
25 struct syscall_metadata *stop;
26 char str[KSYM_SYMBOL_LEN];
27
28
29 start = (struct syscall_metadata *)__start_syscalls_metadata;
30 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
32
33 for ( ; start < stop; start++) {
34 /*
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
38 * mismatch.
39 */
40 if (start->name && !strcmp(start->name + 3, str + 3))
41 return start;
42 }
43 return NULL;
44}
45
46static struct syscall_metadata *syscall_nr_to_meta(int nr)
47{
48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
49 return NULL;
50
51 return syscalls_metadata[nr];
52}
53
17enum print_line_t 54enum print_line_t
18print_syscall_enter(struct trace_iterator *iter, int flags) 55print_syscall_enter(struct trace_iterator *iter, int flags)
19{ 56{
@@ -30,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
30 if (!entry) 67 if (!entry)
31 goto end; 68 goto end;
32 69
33 if (entry->enter_id != ent->type) { 70 if (entry->enter_event->id != ent->type) {
34 WARN_ON_ONCE(1); 71 WARN_ON_ONCE(1);
35 goto end; 72 goto end;
36 } 73 }
@@ -85,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
85 return TRACE_TYPE_HANDLED; 122 return TRACE_TYPE_HANDLED;
86 } 123 }
87 124
88 if (entry->exit_id != ent->type) { 125 if (entry->exit_event->id != ent->type) {
89 WARN_ON_ONCE(1); 126 WARN_ON_ONCE(1);
90 return TRACE_TYPE_UNHANDLED; 127 return TRACE_TYPE_UNHANDLED;
91 } 128 }
@@ -103,92 +140,79 @@ extern char *__bad_type_size(void);
103#define SYSCALL_FIELD(type, name) \ 140#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \ 141 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \ 142 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 143 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type)
107 145
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 146static
147int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
109{ 148{
110 int i; 149 int i;
111 int nr; 150 int pos = 0;
112 int ret;
113 struct syscall_metadata *entry;
114 struct syscall_trace_enter trace;
115 int offset = offsetof(struct syscall_trace_enter, args);
116 151
117 nr = syscall_name_to_nr(call->data); 152 /* When len=0, we just calculate the needed length */
118 entry = syscall_nr_to_meta(nr); 153#define LEN_OR_ZERO (len ? len - pos : 0)
119
120 if (!entry)
121 return 0;
122
123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
124 SYSCALL_FIELD(int, nr));
125 if (!ret)
126 return 0;
127 154
155 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
128 for (i = 0; i < entry->nb_args; i++) { 156 for (i = 0; i < entry->nb_args; i++) {
129 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 157 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
130 entry->args[i]); 158 entry->args[i], sizeof(unsigned long),
131 if (!ret) 159 i == entry->nb_args - 1 ? "" : ", ");
132 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
134 sizeof(unsigned long));
135 if (!ret)
136 return 0;
137 offset += sizeof(unsigned long);
138 } 160 }
161 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
139 162
140 trace_seq_puts(s, "\nprint fmt: \"");
141 for (i = 0; i < entry->nb_args; i++) { 163 for (i = 0; i < entry->nb_args; i++) {
142 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 164 pos += snprintf(buf + pos, LEN_OR_ZERO,
143 sizeof(unsigned long), 165 ", ((unsigned long)(REC->%s))", entry->args[i]);
144 i == entry->nb_args - 1 ? "" : ", ");
145 if (!ret)
146 return 0;
147 } 166 }
148 trace_seq_putc(s, '"');
149 167
150 for (i = 0; i < entry->nb_args; i++) { 168#undef LEN_OR_ZERO
151 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
152 entry->args[i]);
153 if (!ret)
154 return 0;
155 }
156 169
157 return trace_seq_putc(s, '\n'); 170 /* return the length of print_fmt */
171 return pos;
158} 172}
159 173
160int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 174static int set_syscall_print_fmt(struct ftrace_event_call *call)
161{ 175{
162 int ret; 176 char *print_fmt;
163 struct syscall_trace_exit trace; 177 int len;
178 struct syscall_metadata *entry = call->data;
164 179
165 ret = trace_seq_printf(s, 180 if (entry->enter_event != call) {
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 181 call->print_fmt = "\"0x%lx\", REC->ret";
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
168 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(long, ret));
170 if (!ret)
171 return 0; 182 return 0;
183 }
184
185 /* First: called with 0 length to calculate the needed length */
186 len = __set_enter_print_fmt(entry, NULL, 0);
187
188 print_fmt = kmalloc(len + 1, GFP_KERNEL);
189 if (!print_fmt)
190 return -ENOMEM;
191
192 /* Second: actually write the @print_fmt */
193 __set_enter_print_fmt(entry, print_fmt, len + 1);
194 call->print_fmt = print_fmt;
195
196 return 0;
197}
198
199static void free_syscall_print_fmt(struct ftrace_event_call *call)
200{
201 struct syscall_metadata *entry = call->data;
172 202
173 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 203 if (entry->enter_event == call)
204 kfree(call->print_fmt);
174} 205}
175 206
176int syscall_enter_define_fields(struct ftrace_event_call *call) 207int syscall_enter_define_fields(struct ftrace_event_call *call)
177{ 208{
178 struct syscall_trace_enter trace; 209 struct syscall_trace_enter trace;
179 struct syscall_metadata *meta; 210 struct syscall_metadata *meta = call->data;
180 int ret; 211 int ret;
181 int nr;
182 int i; 212 int i;
183 int offset = offsetof(typeof(trace), args); 213 int offset = offsetof(typeof(trace), args);
184 214
185 nr = syscall_name_to_nr(call->data); 215 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
186 meta = syscall_nr_to_meta(nr);
187
188 if (!meta)
189 return 0;
190
191 ret = trace_define_common_fields(call);
192 if (ret) 216 if (ret)
193 return ret; 217 return ret;
194 218
@@ -208,11 +232,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
208 struct syscall_trace_exit trace; 232 struct syscall_trace_exit trace;
209 int ret; 233 int ret;
210 234
211 ret = trace_define_common_fields(call); 235 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
212 if (ret) 236 if (ret)
213 return ret; 237 return ret;
214 238
215 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, 239 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
216 FILTER_OTHER); 240 FILTER_OTHER);
217 241
218 return ret; 242 return ret;
@@ -239,8 +263,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
239 263
240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 264 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
241 265
242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 266 event = trace_current_buffer_lock_reserve(&buffer,
243 size, 0, 0); 267 sys_data->enter_event->id, size, 0, 0);
244 if (!event) 268 if (!event)
245 return; 269 return;
246 270
@@ -271,8 +295,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
271 if (!sys_data) 295 if (!sys_data)
272 return; 296 return;
273 297
274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 298 event = trace_current_buffer_lock_reserve(&buffer,
275 sizeof(*entry), 0, 0); 299 sys_data->exit_event->id, sizeof(*entry), 0, 0);
276 if (!event) 300 if (!event)
277 return; 301 return;
278 302
@@ -285,23 +309,18 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
285 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 309 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
286} 310}
287 311
288int reg_event_syscall_enter(void *ptr) 312int reg_event_syscall_enter(struct ftrace_event_call *call)
289{ 313{
290 int ret = 0; 314 int ret = 0;
291 int num; 315 int num;
292 char *name;
293 316
294 name = (char *)ptr; 317 num = ((struct syscall_metadata *)call->data)->syscall_nr;
295 num = syscall_name_to_nr(name);
296 if (num < 0 || num >= NR_syscalls) 318 if (num < 0 || num >= NR_syscalls)
297 return -ENOSYS; 319 return -ENOSYS;
298 mutex_lock(&syscall_trace_lock); 320 mutex_lock(&syscall_trace_lock);
299 if (!sys_refcount_enter) 321 if (!sys_refcount_enter)
300 ret = register_trace_sys_enter(ftrace_syscall_enter); 322 ret = register_trace_sys_enter(ftrace_syscall_enter);
301 if (ret) { 323 if (!ret) {
302 pr_info("event trace: Could not activate"
303 "syscall entry trace point");
304 } else {
305 set_bit(num, enabled_enter_syscalls); 324 set_bit(num, enabled_enter_syscalls);
306 sys_refcount_enter++; 325 sys_refcount_enter++;
307 } 326 }
@@ -309,13 +328,11 @@ int reg_event_syscall_enter(void *ptr)
309 return ret; 328 return ret;
310} 329}
311 330
312void unreg_event_syscall_enter(void *ptr) 331void unreg_event_syscall_enter(struct ftrace_event_call *call)
313{ 332{
314 int num; 333 int num;
315 char *name;
316 334
317 name = (char *)ptr; 335 num = ((struct syscall_metadata *)call->data)->syscall_nr;
318 num = syscall_name_to_nr(name);
319 if (num < 0 || num >= NR_syscalls) 336 if (num < 0 || num >= NR_syscalls)
320 return; 337 return;
321 mutex_lock(&syscall_trace_lock); 338 mutex_lock(&syscall_trace_lock);
@@ -326,23 +343,18 @@ void unreg_event_syscall_enter(void *ptr)
326 mutex_unlock(&syscall_trace_lock); 343 mutex_unlock(&syscall_trace_lock);
327} 344}
328 345
329int reg_event_syscall_exit(void *ptr) 346int reg_event_syscall_exit(struct ftrace_event_call *call)
330{ 347{
331 int ret = 0; 348 int ret = 0;
332 int num; 349 int num;
333 char *name;
334 350
335 name = (char *)ptr; 351 num = ((struct syscall_metadata *)call->data)->syscall_nr;
336 num = syscall_name_to_nr(name);
337 if (num < 0 || num >= NR_syscalls) 352 if (num < 0 || num >= NR_syscalls)
338 return -ENOSYS; 353 return -ENOSYS;
339 mutex_lock(&syscall_trace_lock); 354 mutex_lock(&syscall_trace_lock);
340 if (!sys_refcount_exit) 355 if (!sys_refcount_exit)
341 ret = register_trace_sys_exit(ftrace_syscall_exit); 356 ret = register_trace_sys_exit(ftrace_syscall_exit);
342 if (ret) { 357 if (!ret) {
343 pr_info("event trace: Could not activate"
344 "syscall exit trace point");
345 } else {
346 set_bit(num, enabled_exit_syscalls); 358 set_bit(num, enabled_exit_syscalls);
347 sys_refcount_exit++; 359 sys_refcount_exit++;
348 } 360 }
@@ -350,13 +362,11 @@ int reg_event_syscall_exit(void *ptr)
350 return ret; 362 return ret;
351} 363}
352 364
353void unreg_event_syscall_exit(void *ptr) 365void unreg_event_syscall_exit(struct ftrace_event_call *call)
354{ 366{
355 int num; 367 int num;
356 char *name;
357 368
358 name = (char *)ptr; 369 num = ((struct syscall_metadata *)call->data)->syscall_nr;
359 num = syscall_name_to_nr(name);
360 if (num < 0 || num >= NR_syscalls) 370 if (num < 0 || num >= NR_syscalls)
361 return; 371 return;
362 mutex_lock(&syscall_trace_lock); 372 mutex_lock(&syscall_trace_lock);
@@ -367,15 +377,56 @@ void unreg_event_syscall_exit(void *ptr)
367 mutex_unlock(&syscall_trace_lock); 377 mutex_unlock(&syscall_trace_lock);
368} 378}
369 379
370struct trace_event event_syscall_enter = { 380int init_syscall_trace(struct ftrace_event_call *call)
371 .trace = print_syscall_enter, 381{
372}; 382 int id;
383
384 if (set_syscall_print_fmt(call) < 0)
385 return -ENOMEM;
373 386
374struct trace_event event_syscall_exit = { 387 id = trace_event_raw_init(call);
375 .trace = print_syscall_exit,
376};
377 388
378#ifdef CONFIG_EVENT_PROFILE 389 if (id < 0) {
390 free_syscall_print_fmt(call);
391 return id;
392 }
393
394 return id;
395}
396
397unsigned long __init arch_syscall_addr(int nr)
398{
399 return (unsigned long)sys_call_table[nr];
400}
401
402int __init init_ftrace_syscalls(void)
403{
404 struct syscall_metadata *meta;
405 unsigned long addr;
406 int i;
407
408 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
409 NR_syscalls, GFP_KERNEL);
410 if (!syscalls_metadata) {
411 WARN_ON(1);
412 return -ENOMEM;
413 }
414
415 for (i = 0; i < NR_syscalls; i++) {
416 addr = arch_syscall_addr(i);
417 meta = find_syscall_meta(addr);
418 if (!meta)
419 continue;
420
421 meta->syscall_nr = i;
422 syscalls_metadata[i] = meta;
423 }
424
425 return 0;
426}
427core_initcall(init_ftrace_syscalls);
428
429#ifdef CONFIG_PERF_EVENTS
379 430
380static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 431static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
381static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 432static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -387,10 +438,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
387 struct syscall_metadata *sys_data; 438 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec; 439 struct syscall_trace_enter *rec;
389 unsigned long flags; 440 unsigned long flags;
390 char *raw_data;
391 int syscall_nr; 441 int syscall_nr;
442 int rctx;
392 int size; 443 int size;
393 int cpu;
394 444
395 syscall_nr = syscall_get_nr(current, regs); 445 syscall_nr = syscall_get_nr(current, regs);
396 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 446 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -409,44 +459,23 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
409 "profile buffer not large enough")) 459 "profile buffer not large enough"))
410 return; 460 return;
411 461
412 /* Protect the per cpu buffer, begin the rcu read side */ 462 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
413 local_irq_save(flags); 463 sys_data->enter_event->id, &rctx, &flags);
414 464 if (!rec)
415 cpu = smp_processor_id(); 465 return;
416
417 if (in_nmi())
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421
422 if (!raw_data)
423 goto end;
424
425 raw_data = per_cpu_ptr(raw_data, cpu);
426
427 /* zero the dead bytes from align to not leak stack to user */
428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
429 466
430 rec = (struct syscall_trace_enter *) raw_data;
431 tracing_generic_entry_update(&rec->ent, 0, 0);
432 rec->ent.type = sys_data->enter_id;
433 rec->nr = syscall_nr; 467 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 468 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args); 469 (unsigned long *)&rec->args);
436 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
437
438end:
439 local_irq_restore(flags);
440} 471}
441 472
442int reg_prof_syscall_enter(char *name) 473int prof_sysenter_enable(struct ftrace_event_call *call)
443{ 474{
444 int ret = 0; 475 int ret = 0;
445 int num; 476 int num;
446 477
447 num = syscall_name_to_nr(name); 478 num = ((struct syscall_metadata *)call->data)->syscall_nr;
448 if (num < 0 || num >= NR_syscalls)
449 return -ENOSYS;
450 479
451 mutex_lock(&syscall_trace_lock); 480 mutex_lock(&syscall_trace_lock);
452 if (!sys_prof_refcount_enter) 481 if (!sys_prof_refcount_enter)
@@ -462,13 +491,11 @@ int reg_prof_syscall_enter(char *name)
462 return ret; 491 return ret;
463} 492}
464 493
465void unreg_prof_syscall_enter(char *name) 494void prof_sysenter_disable(struct ftrace_event_call *call)
466{ 495{
467 int num; 496 int num;
468 497
469 num = syscall_name_to_nr(name); 498 num = ((struct syscall_metadata *)call->data)->syscall_nr;
470 if (num < 0 || num >= NR_syscalls)
471 return;
472 499
473 mutex_lock(&syscall_trace_lock); 500 mutex_lock(&syscall_trace_lock);
474 sys_prof_refcount_enter--; 501 sys_prof_refcount_enter--;
@@ -484,9 +511,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
484 struct syscall_trace_exit *rec; 511 struct syscall_trace_exit *rec;
485 unsigned long flags; 512 unsigned long flags;
486 int syscall_nr; 513 int syscall_nr;
487 char *raw_data; 514 int rctx;
488 int size; 515 int size;
489 int cpu;
490 516
491 syscall_nr = syscall_get_nr(current, regs); 517 syscall_nr = syscall_get_nr(current, regs);
492 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 518 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -508,51 +534,30 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
508 "exit event has grown above profile buffer size")) 534 "exit event has grown above profile buffer size"))
509 return; 535 return;
510 536
511 /* Protect the per cpu buffer, begin the rcu read side */ 537 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
512 local_irq_save(flags); 538 sys_data->exit_event->id, &rctx, &flags);
513 cpu = smp_processor_id(); 539 if (!rec)
514 540 return;
515 if (in_nmi())
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519
520 if (!raw_data)
521 goto end;
522
523 raw_data = per_cpu_ptr(raw_data, cpu);
524
525 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
527
528 rec = (struct syscall_trace_exit *)raw_data;
529 541
530 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id;
532 rec->nr = syscall_nr; 542 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs); 543 rec->ret = syscall_get_return_value(current, regs);
534 544
535 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
536
537end:
538 local_irq_restore(flags);
539} 546}
540 547
541int reg_prof_syscall_exit(char *name) 548int prof_sysexit_enable(struct ftrace_event_call *call)
542{ 549{
543 int ret = 0; 550 int ret = 0;
544 int num; 551 int num;
545 552
546 num = syscall_name_to_nr(name); 553 num = ((struct syscall_metadata *)call->data)->syscall_nr;
547 if (num < 0 || num >= NR_syscalls)
548 return -ENOSYS;
549 554
550 mutex_lock(&syscall_trace_lock); 555 mutex_lock(&syscall_trace_lock);
551 if (!sys_prof_refcount_exit) 556 if (!sys_prof_refcount_exit)
552 ret = register_trace_sys_exit(prof_syscall_exit); 557 ret = register_trace_sys_exit(prof_syscall_exit);
553 if (ret) { 558 if (ret) {
554 pr_info("event trace: Could not activate" 559 pr_info("event trace: Could not activate"
555 "syscall entry trace point"); 560 "syscall exit trace point");
556 } else { 561 } else {
557 set_bit(num, enabled_prof_exit_syscalls); 562 set_bit(num, enabled_prof_exit_syscalls);
558 sys_prof_refcount_exit++; 563 sys_prof_refcount_exit++;
@@ -561,13 +566,11 @@ int reg_prof_syscall_exit(char *name)
561 return ret; 566 return ret;
562} 567}
563 568
564void unreg_prof_syscall_exit(char *name) 569void prof_sysexit_disable(struct ftrace_event_call *call)
565{ 570{
566 int num; 571 int num;
567 572
568 num = syscall_name_to_nr(name); 573 num = ((struct syscall_metadata *)call->data)->syscall_nr;
569 if (num < 0 || num >= NR_syscalls)
570 return;
571 574
572 mutex_lock(&syscall_trace_lock); 575 mutex_lock(&syscall_trace_lock);
573 sys_prof_refcount_exit--; 576 sys_prof_refcount_exit--;
@@ -577,6 +580,5 @@ void unreg_prof_syscall_exit(char *name)
577 mutex_unlock(&syscall_trace_lock); 580 mutex_unlock(&syscall_trace_lock);
578} 581}
579 582
580#endif 583#endif /* CONFIG_PERF_EVENTS */
581
582 584
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index f6693969287d..a7974a552ca9 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = {
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
95 .address = backtrace_address, 95 .address = backtrace_address,
96 .walk_stack = print_context_stack,
96}; 97};
97 98
98static int 99static int