aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig125
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c135
-rw-r--r--kernel/trace/power-traces.c2
-rw-r--r--kernel/trace/ring_buffer.c73
-rw-r--r--kernel/trace/trace.c452
-rw-r--r--kernel/trace/trace.h33
-rw-r--r--kernel/trace/trace_branch.c19
-rw-r--r--kernel/trace/trace_clock.c8
-rw-r--r--kernel/trace/trace_event_profile.c58
-rw-r--r--kernel/trace/trace_events.c122
-rw-r--r--kernel/trace/trace_events_filter.c33
-rw-r--r--kernel/trace/trace_export.c98
-rw-r--r--kernel/trace/trace_functions_graph.c241
-rw-r--r--kernel/trace/trace_hw_branches.c51
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c382
-rw-r--r--kernel/trace/trace_ksym.c193
-rw-r--r--kernel/trace/trace_output.c75
-rw-r--r--kernel/trace/trace_sched_wakeup.c16
-rw-r--r--kernel/trace/trace_selftest.c4
-rw-r--r--kernel/trace/trace_stack.c40
-rw-r--r--kernel/trace/trace_syscalls.c207
-rw-r--r--kernel/trace/trace_sysprof.c1
24 files changed, 1278 insertions, 1096 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d006554888dc..13e13d428cd3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,39 +12,37 @@ config NOP_TRACER
12config HAVE_FTRACE_NMI_ENTER 12config HAVE_FTRACE_NMI_ENTER
13 bool 13 bool
14 help 14 help
15 See Documentation/trace/ftrace-implementation.txt 15 See Documentation/trace/ftrace-design.txt
16 16
17config HAVE_FUNCTION_TRACER 17config HAVE_FUNCTION_TRACER
18 bool 18 bool
19 help 19 help
20 See Documentation/trace/ftrace-implementation.txt 20 See Documentation/trace/ftrace-design.txt
21 21
22config HAVE_FUNCTION_GRAPH_TRACER 22config HAVE_FUNCTION_GRAPH_TRACER
23 bool 23 bool
24 help 24 help
25 See Documentation/trace/ftrace-implementation.txt 25 See Documentation/trace/ftrace-design.txt
26 26
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
36 help 34 help
37 See Documentation/trace/ftrace-implementation.txt 35 See Documentation/trace/ftrace-design.txt
38 36
39config HAVE_DYNAMIC_FTRACE 37config HAVE_DYNAMIC_FTRACE
40 bool 38 bool
41 help 39 help
42 See Documentation/trace/ftrace-implementation.txt 40 See Documentation/trace/ftrace-design.txt
43 41
44config HAVE_FTRACE_MCOUNT_RECORD 42config HAVE_FTRACE_MCOUNT_RECORD
45 bool 43 bool
46 help 44 help
47 See Documentation/trace/ftrace-implementation.txt 45 See Documentation/trace/ftrace-design.txt
48 46
49config HAVE_HW_BRANCH_TRACER 47config HAVE_HW_BRANCH_TRACER
50 bool 48 bool
@@ -52,7 +50,7 @@ config HAVE_HW_BRANCH_TRACER
52config HAVE_SYSCALL_TRACEPOINTS 50config HAVE_SYSCALL_TRACEPOINTS
53 bool 51 bool
54 help 52 help
55 See Documentation/trace/ftrace-implementation.txt 53 See Documentation/trace/ftrace-design.txt
56 54
57config TRACER_MAX_TRACE 55config TRACER_MAX_TRACE
58 bool 56 bool
@@ -83,7 +81,7 @@ config RING_BUFFER_ALLOW_SWAP
83# This allows those options to appear when no other tracer is selected. But the 81# This allows those options to appear when no other tracer is selected. But the
84# options do not appear when something else selects it. We need the two options 82# options do not appear when something else selects it. We need the two options
85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 83# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
86# hidding of the automatic options. 84# hiding of the automatic options.
87 85
88config TRACING 86config TRACING
89 bool 87 bool
@@ -119,7 +117,7 @@ menuconfig FTRACE
119 bool "Tracers" 117 bool "Tracers"
120 default y if DEBUG_KERNEL 118 default y if DEBUG_KERNEL
121 help 119 help
122 Enable the kernel tracing infrastructure. 120 Enable the kernel tracing infrastructure.
123 121
124if FTRACE 122if FTRACE
125 123
@@ -133,7 +131,7 @@ config FUNCTION_TRACER
133 help 131 help
134 Enable the kernel to trace every kernel function. This is done 132 Enable the kernel to trace every kernel function. This is done
135 by using a compiler feature to insert a small, 5-byte No-Operation 133 by using a compiler feature to insert a small, 5-byte No-Operation
136 instruction to the beginning of every kernel function, which NOP 134 instruction at the beginning of every kernel function, which NOP
137 sequence is then dynamically patched into a tracer call when 135 sequence is then dynamically patched into a tracer call when
138 tracing is enabled by the administrator. If it's runtime disabled 136 tracing is enabled by the administrator. If it's runtime disabled
139 (the bootup default), then the overhead of the instructions is very 137 (the bootup default), then the overhead of the instructions is very
@@ -150,7 +148,7 @@ config FUNCTION_GRAPH_TRACER
150 and its entry. 148 and its entry.
151 Its first purpose is to trace the duration of functions and 149 Its first purpose is to trace the duration of functions and
152 draw a call graph for each thread with some information like 150 draw a call graph for each thread with some information like
153 the return value. This is done by setting the current return 151 the return value. This is done by setting the current return
154 address on the current task structure into a stack of calls. 152 address on the current task structure into a stack of calls.
155 153
156 154
@@ -173,7 +171,7 @@ config IRQSOFF_TRACER
173 171
174 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 172 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
175 173
176 (Note that kernel size and overhead increases with this option 174 (Note that kernel size and overhead increase with this option
177 enabled. This option and the preempt-off timing option can be 175 enabled. This option and the preempt-off timing option can be
178 used together or separately.) 176 used together or separately.)
179 177
@@ -186,7 +184,7 @@ config PREEMPT_TRACER
186 select TRACER_MAX_TRACE 184 select TRACER_MAX_TRACE
187 select RING_BUFFER_ALLOW_SWAP 185 select RING_BUFFER_ALLOW_SWAP
188 help 186 help
189 This option measures the time spent in preemption off critical 187 This option measures the time spent in preemption-off critical
190 sections, with microsecond accuracy. 188 sections, with microsecond accuracy.
191 189
192 The default measurement method is a maximum search, which is 190 The default measurement method is a maximum search, which is
@@ -195,7 +193,7 @@ config PREEMPT_TRACER
195 193
196 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 194 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
197 195
198 (Note that kernel size and overhead increases with this option 196 (Note that kernel size and overhead increase with this option
199 enabled. This option and the irqs-off timing option can be 197 enabled. This option and the irqs-off timing option can be
200 used together or separately.) 198 used together or separately.)
201 199
@@ -222,7 +220,7 @@ config ENABLE_DEFAULT_TRACERS
222 depends on !GENERIC_TRACER 220 depends on !GENERIC_TRACER
223 select TRACING 221 select TRACING
224 help 222 help
225 This tracer hooks to various trace points in the kernel 223 This tracer hooks to various trace points in the kernel,
226 allowing the user to pick and choose which trace point they 224 allowing the user to pick and choose which trace point they
227 want to trace. It also includes the sched_switch tracer plugin. 225 want to trace. It also includes the sched_switch tracer plugin.
228 226
@@ -265,19 +263,19 @@ choice
265 The likely/unlikely profiler only looks at the conditions that 263 The likely/unlikely profiler only looks at the conditions that
266 are annotated with a likely or unlikely macro. 264 are annotated with a likely or unlikely macro.
267 265
268 The "all branch" profiler will profile every if statement in the 266 The "all branch" profiler will profile every if-statement in the
269 kernel. This profiler will also enable the likely/unlikely 267 kernel. This profiler will also enable the likely/unlikely
270 profiler as well. 268 profiler.
271 269
272 Either of the above profilers add a bit of overhead to the system. 270 Either of the above profilers adds a bit of overhead to the system.
273 If unsure choose "No branch profiling". 271 If unsure, choose "No branch profiling".
274 272
275config BRANCH_PROFILE_NONE 273config BRANCH_PROFILE_NONE
276 bool "No branch profiling" 274 bool "No branch profiling"
277 help 275 help
278 No branch profiling. Branch profiling adds a bit of overhead. 276 No branch profiling. Branch profiling adds a bit of overhead.
279 Only enable it if you want to analyse the branching behavior. 277 Only enable it if you want to analyse the branching behavior.
280 Otherwise keep it disabled. 278 Otherwise keep it disabled.
281 279
282config PROFILE_ANNOTATED_BRANCHES 280config PROFILE_ANNOTATED_BRANCHES
283 bool "Trace likely/unlikely profiler" 281 bool "Trace likely/unlikely profiler"
@@ -288,7 +286,7 @@ config PROFILE_ANNOTATED_BRANCHES
288 286
289 /sys/kernel/debug/tracing/profile_annotated_branch 287 /sys/kernel/debug/tracing/profile_annotated_branch
290 288
291 Note: this will add a significant overhead, only turn this 289 Note: this will add a significant overhead; only turn this
292 on if you need to profile the system's use of these macros. 290 on if you need to profile the system's use of these macros.
293 291
294config PROFILE_ALL_BRANCHES 292config PROFILE_ALL_BRANCHES
@@ -305,7 +303,7 @@ config PROFILE_ALL_BRANCHES
305 303
306 This configuration, when enabled, will impose a great overhead 304 This configuration, when enabled, will impose a great overhead
307 on the system. This should only be enabled when the system 305 on the system. This should only be enabled when the system
308 is to be analyzed 306 is to be analyzed in much detail.
309endchoice 307endchoice
310 308
311config TRACING_BRANCHES 309config TRACING_BRANCHES
@@ -330,15 +328,6 @@ config BRANCH_TRACER
330 328
331 Say N if unsure. 329 Say N if unsure.
332 330
333config POWER_TRACER
334 bool "Trace power consumption behavior"
335 depends on X86
336 select GENERIC_TRACER
337 help
338 This tracer helps developers to analyze and optimize the kernels
339 power management decisions, specifically the C-state and P-state
340 behavior.
341
342config KSYM_TRACER 331config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations" 332 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT 333 depends on HAVE_HW_BREAKPOINT
@@ -391,14 +380,14 @@ config HW_BRANCH_TRACER
391 select GENERIC_TRACER 380 select GENERIC_TRACER
392 help 381 help
393 This tracer records all branches on the system in a circular 382 This tracer records all branches on the system in a circular
394 buffer giving access to the last N branches for each cpu. 383 buffer, giving access to the last N branches for each cpu.
395 384
396config KMEMTRACE 385config KMEMTRACE
397 bool "Trace SLAB allocations" 386 bool "Trace SLAB allocations"
398 select GENERIC_TRACER 387 select GENERIC_TRACER
399 help 388 help
400 kmemtrace provides tracing for slab allocator functions, such as 389 kmemtrace provides tracing for slab allocator functions, such as
401 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected 390 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
402 data is then fed to the userspace application in order to analyse 391 data is then fed to the userspace application in order to analyse
403 allocation hotspots, internal fragmentation and so on, making it 392 allocation hotspots, internal fragmentation and so on, making it
404 possible to see how well an allocator performs, as well as debug 393 possible to see how well an allocator performs, as well as debug
@@ -417,15 +406,15 @@ config WORKQUEUE_TRACER
417 bool "Trace workqueues" 406 bool "Trace workqueues"
418 select GENERIC_TRACER 407 select GENERIC_TRACER
419 help 408 help
420 The workqueue tracer provides some statistical informations 409 The workqueue tracer provides some statistical information
421 about each cpu workqueue thread such as the number of the 410 about each cpu workqueue thread such as the number of the
422 works inserted and executed since their creation. It can help 411 works inserted and executed since their creation. It can help
423 to evaluate the amount of work each of them have to perform. 412 to evaluate the amount of work each of them has to perform.
424 For example it can help a developer to decide whether he should 413 For example it can help a developer to decide whether he should
425 choose a per cpu workqueue instead of a singlethreaded one. 414 choose a per-cpu workqueue instead of a singlethreaded one.
426 415
427config BLK_DEV_IO_TRACE 416config BLK_DEV_IO_TRACE
428 bool "Support for tracing block io actions" 417 bool "Support for tracing block IO actions"
429 depends on SYSFS 418 depends on SYSFS
430 depends on BLOCK 419 depends on BLOCK
431 select RELAY 420 select RELAY
@@ -451,20 +440,20 @@ config BLK_DEV_IO_TRACE
451 440
452config KPROBE_EVENT 441config KPROBE_EVENT
453 depends on KPROBES 442 depends on KPROBES
454 depends on X86 443 depends on HAVE_REGS_AND_STACK_ACCESS_API
455 bool "Enable kprobes-based dynamic events" 444 bool "Enable kprobes-based dynamic events"
456 select TRACING 445 select TRACING
457 default y 446 default y
458 help 447 help
459 This allows the user to add tracing events (similar to tracepoints) on the fly 448 This allows the user to add tracing events (similar to tracepoints)
460 via the ftrace interface. See Documentation/trace/kprobetrace.txt 449 on the fly via the ftrace interface. See
461 for more details. 450 Documentation/trace/kprobetrace.txt for more details.
462 451
463 Those events can be inserted wherever kprobes can probe, and record 452 Those events can be inserted wherever kprobes can probe, and record
464 various register and memory values. 453 various register and memory values.
465 454
466 This option is also required by perf-probe subcommand of perf tools. If 455 This option is also required by perf-probe subcommand of perf tools.
467 you want to use perf tools, this option is strongly recommended. 456 If you want to use perf tools, this option is strongly recommended.
468 457
469config DYNAMIC_FTRACE 458config DYNAMIC_FTRACE
470 bool "enable/disable ftrace tracepoints dynamically" 459 bool "enable/disable ftrace tracepoints dynamically"
@@ -472,32 +461,32 @@ config DYNAMIC_FTRACE
472 depends on HAVE_DYNAMIC_FTRACE 461 depends on HAVE_DYNAMIC_FTRACE
473 default y 462 default y
474 help 463 help
475 This option will modify all the calls to ftrace dynamically 464 This option will modify all the calls to ftrace dynamically
476 (will patch them out of the binary image and replaces them 465 (will patch them out of the binary image and replace them
477 with a No-Op instruction) as they are called. A table is 466 with a No-Op instruction) as they are called. A table is
478 created to dynamically enable them again. 467 created to dynamically enable them again.
479 468
480 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise 469 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
481 has native performance as long as no tracing is active. 470 otherwise has native performance as long as no tracing is active.
482 471
483 The changes to the code are done by a kernel thread that 472 The changes to the code are done by a kernel thread that
484 wakes up once a second and checks to see if any ftrace calls 473 wakes up once a second and checks to see if any ftrace calls
485 were made. If so, it runs stop_machine (stops all CPUS) 474 were made. If so, it runs stop_machine (stops all CPUS)
486 and modifies the code to jump over the call to ftrace. 475 and modifies the code to jump over the call to ftrace.
487 476
488config FUNCTION_PROFILER 477config FUNCTION_PROFILER
489 bool "Kernel function profiler" 478 bool "Kernel function profiler"
490 depends on FUNCTION_TRACER 479 depends on FUNCTION_TRACER
491 default n 480 default n
492 help 481 help
493 This option enables the kernel function profiler. A file is created 482 This option enables the kernel function profiler. A file is created
494 in debugfs called function_profile_enabled which defaults to zero. 483 in debugfs called function_profile_enabled which defaults to zero.
495 When a 1 is echoed into this file profiling begins, and when a 484 When a 1 is echoed into this file profiling begins, and when a
496 zero is entered, profiling stops. A file in the trace_stats 485 zero is entered, profiling stops. A "functions" file is created in
497 directory called functions, that show the list of functions that 486 the trace_stats directory; this file shows the list of functions that
498 have been hit and their counters. 487 have been hit and their counters.
499 488
500 If in doubt, say N 489 If in doubt, say N.
501 490
502config FTRACE_MCOUNT_RECORD 491config FTRACE_MCOUNT_RECORD
503 def_bool y 492 def_bool y
@@ -556,8 +545,8 @@ config RING_BUFFER_BENCHMARK
556 tristate "Ring buffer benchmark stress tester" 545 tristate "Ring buffer benchmark stress tester"
557 depends on RING_BUFFER 546 depends on RING_BUFFER
558 help 547 help
559 This option creates a test to stress the ring buffer and bench mark it. 548 This option creates a test to stress the ring buffer and benchmark it.
560 It creates its own ring buffer such that it will not interfer with 549 It creates its own ring buffer such that it will not interfere with
561 any other users of the ring buffer (such as ftrace). It then creates 550 any other users of the ring buffer (such as ftrace). It then creates
562 a producer and consumer that will run for 10 seconds and sleep for 551 a producer and consumer that will run for 10 seconds and sleep for
563 10 seconds. Each interval it will print out the number of events 552 10 seconds. Each interval it will print out the number of events
@@ -566,7 +555,7 @@ config RING_BUFFER_BENCHMARK
566 It does not disable interrupts or raise its priority, so it may be 555 It does not disable interrupts or raise its priority, so it may be
567 affected by processes that are running. 556 affected by processes that are running.
568 557
569 If unsure, say N 558 If unsure, say N.
570 559
571endif # FTRACE 560endif # FTRACE
572 561
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..d00c6fe23f54 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@ endif
51obj-$(CONFIG_EVENT_TRACING) += trace_events.o 51obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
56endif
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o 59obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e51a1bcb7bed..83783579378f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,7 +22,6 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
28#include <linux/ctype.h> 27#include <linux/ctype.h>
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
898 } \ 897 } \
899 } 898 }
900 899
901#ifdef CONFIG_KPROBES
902
903static int frozen_record_count;
904
905static inline void freeze_record(struct dyn_ftrace *rec)
906{
907 if (!(rec->flags & FTRACE_FL_FROZEN)) {
908 rec->flags |= FTRACE_FL_FROZEN;
909 frozen_record_count++;
910 }
911}
912
913static inline void unfreeze_record(struct dyn_ftrace *rec)
914{
915 if (rec->flags & FTRACE_FL_FROZEN) {
916 rec->flags &= ~FTRACE_FL_FROZEN;
917 frozen_record_count--;
918 }
919}
920
921static inline int record_frozen(struct dyn_ftrace *rec)
922{
923 return rec->flags & FTRACE_FL_FROZEN;
924}
925#else
926# define freeze_record(rec) ({ 0; })
927# define unfreeze_record(rec) ({ 0; })
928# define record_frozen(rec) ({ 0; })
929#endif /* CONFIG_KPROBES */
930
931static void ftrace_free_rec(struct dyn_ftrace *rec) 900static void ftrace_free_rec(struct dyn_ftrace *rec)
932{ 901{
933 rec->freelist = ftrace_free_records; 902 rec->freelist = ftrace_free_records;
@@ -1025,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip)
1025} 994}
1026 995
1027 996
997/* Return 1 if the address range is reserved for ftrace */
998int ftrace_text_reserved(void *start, void *end)
999{
1000 struct dyn_ftrace *rec;
1001 struct ftrace_page *pg;
1002
1003 do_for_each_ftrace_rec(pg, rec) {
1004 if (rec->ip <= (unsigned long)end &&
1005 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1006 return 1;
1007 } while_for_each_ftrace_rec();
1008 return 0;
1009}
1010
1011
1028static int 1012static int
1029__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1013__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1030{ 1014{
@@ -1076,14 +1060,6 @@ static void ftrace_replace_code(int enable)
1076 !(rec->flags & FTRACE_FL_CONVERTED)) 1060 !(rec->flags & FTRACE_FL_CONVERTED))
1077 continue; 1061 continue;
1078 1062
1079 /* ignore updates to this record's mcount site */
1080 if (get_kprobe((void *)rec->ip)) {
1081 freeze_record(rec);
1082 continue;
1083 } else {
1084 unfreeze_record(rec);
1085 }
1086
1087 failed = __ftrace_replace_code(rec, enable); 1063 failed = __ftrace_replace_code(rec, enable);
1088 if (failed) { 1064 if (failed) {
1089 rec->flags |= FTRACE_FL_FAILED; 1065 rec->flags |= FTRACE_FL_FAILED;
@@ -1690,7 +1666,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1690static int ftrace_match(char *str, char *regex, int len, int type) 1666static int ftrace_match(char *str, char *regex, int len, int type)
1691{ 1667{
1692 int matched = 0; 1668 int matched = 0;
1693 char *ptr; 1669 int slen;
1694 1670
1695 switch (type) { 1671 switch (type) {
1696 case MATCH_FULL: 1672 case MATCH_FULL:
@@ -1706,8 +1682,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
1706 matched = 1; 1682 matched = 1;
1707 break; 1683 break;
1708 case MATCH_END_ONLY: 1684 case MATCH_END_ONLY:
1709 ptr = strstr(str, regex); 1685 slen = strlen(str);
1710 if (ptr && (ptr[len] == 0)) 1686 if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
1711 matched = 1; 1687 matched = 1;
1712 break; 1688 break;
1713 } 1689 }
@@ -1724,7 +1700,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1724 return ftrace_match(str, regex, len, type); 1700 return ftrace_match(str, regex, len, type);
1725} 1701}
1726 1702
1727static void ftrace_match_records(char *buff, int len, int enable) 1703static int ftrace_match_records(char *buff, int len, int enable)
1728{ 1704{
1729 unsigned int search_len; 1705 unsigned int search_len;
1730 struct ftrace_page *pg; 1706 struct ftrace_page *pg;
@@ -1733,6 +1709,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1733 char *search; 1709 char *search;
1734 int type; 1710 int type;
1735 int not; 1711 int not;
1712 int found = 0;
1736 1713
1737 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1714 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1738 type = filter_parse_regex(buff, len, &search, &not); 1715 type = filter_parse_regex(buff, len, &search, &not);
@@ -1750,6 +1727,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1750 rec->flags &= ~flag; 1727 rec->flags &= ~flag;
1751 else 1728 else
1752 rec->flags |= flag; 1729 rec->flags |= flag;
1730 found = 1;
1753 } 1731 }
1754 /* 1732 /*
1755 * Only enable filtering if we have a function that 1733 * Only enable filtering if we have a function that
@@ -1759,6 +1737,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
1759 ftrace_filtered = 1; 1737 ftrace_filtered = 1;
1760 } while_for_each_ftrace_rec(); 1738 } while_for_each_ftrace_rec();
1761 mutex_unlock(&ftrace_lock); 1739 mutex_unlock(&ftrace_lock);
1740
1741 return found;
1762} 1742}
1763 1743
1764static int 1744static int
@@ -1780,7 +1760,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1780 return 1; 1760 return 1;
1781} 1761}
1782 1762
1783static void ftrace_match_module_records(char *buff, char *mod, int enable) 1763static int ftrace_match_module_records(char *buff, char *mod, int enable)
1784{ 1764{
1785 unsigned search_len = 0; 1765 unsigned search_len = 0;
1786 struct ftrace_page *pg; 1766 struct ftrace_page *pg;
@@ -1789,6 +1769,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1789 char *search = buff; 1769 char *search = buff;
1790 unsigned long flag; 1770 unsigned long flag;
1791 int not = 0; 1771 int not = 0;
1772 int found = 0;
1792 1773
1793 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1774 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1794 1775
@@ -1819,12 +1800,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1819 rec->flags &= ~flag; 1800 rec->flags &= ~flag;
1820 else 1801 else
1821 rec->flags |= flag; 1802 rec->flags |= flag;
1803 found = 1;
1822 } 1804 }
1823 if (enable && (rec->flags & FTRACE_FL_FILTER)) 1805 if (enable && (rec->flags & FTRACE_FL_FILTER))
1824 ftrace_filtered = 1; 1806 ftrace_filtered = 1;
1825 1807
1826 } while_for_each_ftrace_rec(); 1808 } while_for_each_ftrace_rec();
1827 mutex_unlock(&ftrace_lock); 1809 mutex_unlock(&ftrace_lock);
1810
1811 return found;
1828} 1812}
1829 1813
1830/* 1814/*
@@ -1853,8 +1837,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1853 if (!strlen(mod)) 1837 if (!strlen(mod))
1854 return -EINVAL; 1838 return -EINVAL;
1855 1839
1856 ftrace_match_module_records(func, mod, enable); 1840 if (ftrace_match_module_records(func, mod, enable))
1857 return 0; 1841 return 0;
1842 return -EINVAL;
1858} 1843}
1859 1844
1860static struct ftrace_func_command ftrace_mod_cmd = { 1845static struct ftrace_func_command ftrace_mod_cmd = {
@@ -2151,8 +2136,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2151 func = strsep(&next, ":"); 2136 func = strsep(&next, ":");
2152 2137
2153 if (!next) { 2138 if (!next) {
2154 ftrace_match_records(func, len, enable); 2139 if (ftrace_match_records(func, len, enable))
2155 return 0; 2140 return 0;
2141 return ret;
2156 } 2142 }
2157 2143
2158 /* command found */ 2144 /* command found */
@@ -2198,10 +2184,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2198 !trace_parser_cont(parser)) { 2184 !trace_parser_cont(parser)) {
2199 ret = ftrace_process_regex(parser->buffer, 2185 ret = ftrace_process_regex(parser->buffer,
2200 parser->idx, enable); 2186 parser->idx, enable);
2187 trace_parser_clear(parser);
2201 if (ret) 2188 if (ret)
2202 goto out_unlock; 2189 goto out_unlock;
2203
2204 trace_parser_clear(parser);
2205 } 2190 }
2206 2191
2207 ret = read; 2192 ret = read;
@@ -2417,6 +2402,7 @@ static const struct file_operations ftrace_notrace_fops = {
2417static DEFINE_MUTEX(graph_lock); 2402static DEFINE_MUTEX(graph_lock);
2418 2403
2419int ftrace_graph_count; 2404int ftrace_graph_count;
2405int ftrace_graph_filter_enabled;
2420unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2406unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2421 2407
2422static void * 2408static void *
@@ -2439,7 +2425,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
2439 mutex_lock(&graph_lock); 2425 mutex_lock(&graph_lock);
2440 2426
2441 /* Nothing, tell g_show to print all functions are enabled */ 2427 /* Nothing, tell g_show to print all functions are enabled */
2442 if (!ftrace_graph_count && !*pos) 2428 if (!ftrace_graph_filter_enabled && !*pos)
2443 return (void *)1; 2429 return (void *)1;
2444 2430
2445 return __g_next(m, pos); 2431 return __g_next(m, pos);
@@ -2485,6 +2471,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2485 mutex_lock(&graph_lock); 2471 mutex_lock(&graph_lock);
2486 if ((file->f_mode & FMODE_WRITE) && 2472 if ((file->f_mode & FMODE_WRITE) &&
2487 (file->f_flags & O_TRUNC)) { 2473 (file->f_flags & O_TRUNC)) {
2474 ftrace_graph_filter_enabled = 0;
2488 ftrace_graph_count = 0; 2475 ftrace_graph_count = 0;
2489 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2476 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2490 } 2477 }
@@ -2510,7 +2497,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2510 struct dyn_ftrace *rec; 2497 struct dyn_ftrace *rec;
2511 struct ftrace_page *pg; 2498 struct ftrace_page *pg;
2512 int search_len; 2499 int search_len;
2513 int found = 0; 2500 int fail = 1;
2514 int type, not; 2501 int type, not;
2515 char *search; 2502 char *search;
2516 bool exists; 2503 bool exists;
@@ -2521,38 +2508,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2521 2508
2522 /* decode regex */ 2509 /* decode regex */
2523 type = filter_parse_regex(buffer, strlen(buffer), &search, &not); 2510 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2524 if (not) 2511 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2525 return -EINVAL; 2512 return -EBUSY;
2526 2513
2527 search_len = strlen(search); 2514 search_len = strlen(search);
2528 2515
2529 mutex_lock(&ftrace_lock); 2516 mutex_lock(&ftrace_lock);
2530 do_for_each_ftrace_rec(pg, rec) { 2517 do_for_each_ftrace_rec(pg, rec) {
2531 2518
2532 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2533 break;
2534
2535 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2519 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2536 continue; 2520 continue;
2537 2521
2538 if (ftrace_match_record(rec, search, search_len, type)) { 2522 if (ftrace_match_record(rec, search, search_len, type)) {
2539 /* ensure it is not already in the array */ 2523 /* if it is in the array */
2540 exists = false; 2524 exists = false;
2541 for (i = 0; i < *idx; i++) 2525 for (i = 0; i < *idx; i++) {
2542 if (array[i] == rec->ip) { 2526 if (array[i] == rec->ip) {
2543 exists = true; 2527 exists = true;
2544 break; 2528 break;
2545 } 2529 }
2546 if (!exists) { 2530 }
2547 array[(*idx)++] = rec->ip; 2531
2548 found = 1; 2532 if (!not) {
2533 fail = 0;
2534 if (!exists) {
2535 array[(*idx)++] = rec->ip;
2536 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2537 goto out;
2538 }
2539 } else {
2540 if (exists) {
2541 array[i] = array[--(*idx)];
2542 array[*idx] = 0;
2543 fail = 0;
2544 }
2549 } 2545 }
2550 } 2546 }
2551 } while_for_each_ftrace_rec(); 2547 } while_for_each_ftrace_rec();
2552 2548out:
2553 mutex_unlock(&ftrace_lock); 2549 mutex_unlock(&ftrace_lock);
2554 2550
2555 return found ? 0 : -EINVAL; 2551 if (fail)
2552 return -EINVAL;
2553
2554 ftrace_graph_filter_enabled = 1;
2555 return 0;
2556} 2556}
2557 2557
2558static ssize_t 2558static ssize_t
@@ -2562,16 +2562,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2562 struct trace_parser parser; 2562 struct trace_parser parser;
2563 ssize_t read, ret; 2563 ssize_t read, ret;
2564 2564
2565 if (!cnt || cnt < 0) 2565 if (!cnt)
2566 return 0; 2566 return 0;
2567 2567
2568 mutex_lock(&graph_lock); 2568 mutex_lock(&graph_lock);
2569 2569
2570 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2571 ret = -EBUSY;
2572 goto out_unlock;
2573 }
2574
2575 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2570 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2576 ret = -ENOMEM; 2571 ret = -ENOMEM;
2577 goto out_unlock; 2572 goto out_unlock;
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..9f4f565b01e6 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
14#define CREATE_TRACE_POINTS 14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 15#include <trace/events/power.h>
16 16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 17EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20 18
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a1ca4956ab5e..8c1b2d290718 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -423,7 +423,7 @@ struct ring_buffer_per_cpu {
423 int cpu; 423 int cpu;
424 struct ring_buffer *buffer; 424 struct ring_buffer *buffer;
425 spinlock_t reader_lock; /* serialize readers */ 425 spinlock_t reader_lock; /* serialize readers */
426 raw_spinlock_t lock; 426 arch_spinlock_t lock;
427 struct lock_class_key lock_key; 427 struct lock_class_key lock_key;
428 struct list_head *pages; 428 struct list_head *pages;
429 struct buffer_page *head_page; /* read from head */ 429 struct buffer_page *head_page; /* read from head */
@@ -464,6 +464,8 @@ struct ring_buffer_iter {
464 struct ring_buffer_per_cpu *cpu_buffer; 464 struct ring_buffer_per_cpu *cpu_buffer;
465 unsigned long head; 465 unsigned long head;
466 struct buffer_page *head_page; 466 struct buffer_page *head_page;
467 struct buffer_page *cache_reader_page;
468 unsigned long cache_read;
467 u64 read_stamp; 469 u64 read_stamp;
468}; 470};
469 471
@@ -998,7 +1000,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
998 cpu_buffer->buffer = buffer; 1000 cpu_buffer->buffer = buffer;
999 spin_lock_init(&cpu_buffer->reader_lock); 1001 spin_lock_init(&cpu_buffer->reader_lock);
1000 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1002 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1001 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1003 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1002 1004
1003 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1005 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1004 GFP_KERNEL, cpu_to_node(cpu)); 1006 GFP_KERNEL, cpu_to_node(cpu));
@@ -1193,9 +1195,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1193 struct list_head *p; 1195 struct list_head *p;
1194 unsigned i; 1196 unsigned i;
1195 1197
1196 atomic_inc(&cpu_buffer->record_disabled);
1197 synchronize_sched();
1198
1199 spin_lock_irq(&cpu_buffer->reader_lock); 1198 spin_lock_irq(&cpu_buffer->reader_lock);
1200 rb_head_page_deactivate(cpu_buffer); 1199 rb_head_page_deactivate(cpu_buffer);
1201 1200
@@ -1211,12 +1210,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1211 return; 1210 return;
1212 1211
1213 rb_reset_cpu(cpu_buffer); 1212 rb_reset_cpu(cpu_buffer);
1214 spin_unlock_irq(&cpu_buffer->reader_lock);
1215
1216 rb_check_pages(cpu_buffer); 1213 rb_check_pages(cpu_buffer);
1217 1214
1218 atomic_dec(&cpu_buffer->record_disabled); 1215 spin_unlock_irq(&cpu_buffer->reader_lock);
1219
1220} 1216}
1221 1217
1222static void 1218static void
@@ -1227,9 +1223,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1227 struct list_head *p; 1223 struct list_head *p;
1228 unsigned i; 1224 unsigned i;
1229 1225
1230 atomic_inc(&cpu_buffer->record_disabled);
1231 synchronize_sched();
1232
1233 spin_lock_irq(&cpu_buffer->reader_lock); 1226 spin_lock_irq(&cpu_buffer->reader_lock);
1234 rb_head_page_deactivate(cpu_buffer); 1227 rb_head_page_deactivate(cpu_buffer);
1235 1228
@@ -1242,11 +1235,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1242 list_add_tail(&bpage->list, cpu_buffer->pages); 1235 list_add_tail(&bpage->list, cpu_buffer->pages);
1243 } 1236 }
1244 rb_reset_cpu(cpu_buffer); 1237 rb_reset_cpu(cpu_buffer);
1245 spin_unlock_irq(&cpu_buffer->reader_lock);
1246
1247 rb_check_pages(cpu_buffer); 1238 rb_check_pages(cpu_buffer);
1248 1239
1249 atomic_dec(&cpu_buffer->record_disabled); 1240 spin_unlock_irq(&cpu_buffer->reader_lock);
1250} 1241}
1251 1242
1252/** 1243/**
@@ -1254,11 +1245,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1254 * @buffer: the buffer to resize. 1245 * @buffer: the buffer to resize.
1255 * @size: the new size. 1246 * @size: the new size.
1256 * 1247 *
1257 * The tracer is responsible for making sure that the buffer is
1258 * not being used while changing the size.
1259 * Note: We may be able to change the above requirement by using
1260 * RCU synchronizations.
1261 *
1262 * Minimum size is 2 * BUF_PAGE_SIZE. 1248 * Minimum size is 2 * BUF_PAGE_SIZE.
1263 * 1249 *
1264 * Returns -1 on failure. 1250 * Returns -1 on failure.
@@ -1290,6 +1276,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1290 if (size == buffer_size) 1276 if (size == buffer_size)
1291 return size; 1277 return size;
1292 1278
1279 atomic_inc(&buffer->record_disabled);
1280
1281 /* Make sure all writers are done with this buffer. */
1282 synchronize_sched();
1283
1293 mutex_lock(&buffer->mutex); 1284 mutex_lock(&buffer->mutex);
1294 get_online_cpus(); 1285 get_online_cpus();
1295 1286
@@ -1352,6 +1343,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1352 put_online_cpus(); 1343 put_online_cpus();
1353 mutex_unlock(&buffer->mutex); 1344 mutex_unlock(&buffer->mutex);
1354 1345
1346 atomic_dec(&buffer->record_disabled);
1347
1355 return size; 1348 return size;
1356 1349
1357 free_pages: 1350 free_pages:
@@ -1361,6 +1354,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1361 } 1354 }
1362 put_online_cpus(); 1355 put_online_cpus();
1363 mutex_unlock(&buffer->mutex); 1356 mutex_unlock(&buffer->mutex);
1357 atomic_dec(&buffer->record_disabled);
1364 return -ENOMEM; 1358 return -ENOMEM;
1365 1359
1366 /* 1360 /*
@@ -1370,6 +1364,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1370 out_fail: 1364 out_fail:
1371 put_online_cpus(); 1365 put_online_cpus();
1372 mutex_unlock(&buffer->mutex); 1366 mutex_unlock(&buffer->mutex);
1367 atomic_dec(&buffer->record_disabled);
1373 return -1; 1368 return -1;
1374} 1369}
1375EXPORT_SYMBOL_GPL(ring_buffer_resize); 1370EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -2723,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2723 iter->read_stamp = cpu_buffer->read_stamp; 2718 iter->read_stamp = cpu_buffer->read_stamp;
2724 else 2719 else
2725 iter->read_stamp = iter->head_page->page->time_stamp; 2720 iter->read_stamp = iter->head_page->page->time_stamp;
2721 iter->cache_reader_page = cpu_buffer->reader_page;
2722 iter->cache_read = cpu_buffer->read;
2726} 2723}
2727 2724
2728/** 2725/**
@@ -2834,7 +2831,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2834 int ret; 2831 int ret;
2835 2832
2836 local_irq_save(flags); 2833 local_irq_save(flags);
2837 __raw_spin_lock(&cpu_buffer->lock); 2834 arch_spin_lock(&cpu_buffer->lock);
2838 2835
2839 again: 2836 again:
2840 /* 2837 /*
@@ -2876,7 +2873,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2876 * Splice the empty reader page into the list around the head. 2873 * Splice the empty reader page into the list around the head.
2877 */ 2874 */
2878 reader = rb_set_head_page(cpu_buffer); 2875 reader = rb_set_head_page(cpu_buffer);
2879 cpu_buffer->reader_page->list.next = reader->list.next; 2876 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
2880 cpu_buffer->reader_page->list.prev = reader->list.prev; 2877 cpu_buffer->reader_page->list.prev = reader->list.prev;
2881 2878
2882 /* 2879 /*
@@ -2913,7 +2910,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2913 * 2910 *
2914 * Now make the new head point back to the reader page. 2911 * Now make the new head point back to the reader page.
2915 */ 2912 */
2916 reader->list.next->prev = &cpu_buffer->reader_page->list; 2913 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
2917 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2914 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2918 2915
2919 /* Finally update the reader page to the new head */ 2916 /* Finally update the reader page to the new head */
@@ -2923,7 +2920,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2923 goto again; 2920 goto again;
2924 2921
2925 out: 2922 out:
2926 __raw_spin_unlock(&cpu_buffer->lock); 2923 arch_spin_unlock(&cpu_buffer->lock);
2927 local_irq_restore(flags); 2924 local_irq_restore(flags);
2928 2925
2929 return reader; 2926 return reader;
@@ -3067,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3067 struct ring_buffer_event *event; 3064 struct ring_buffer_event *event;
3068 int nr_loops = 0; 3065 int nr_loops = 0;
3069 3066
3070 if (ring_buffer_iter_empty(iter))
3071 return NULL;
3072
3073 cpu_buffer = iter->cpu_buffer; 3067 cpu_buffer = iter->cpu_buffer;
3074 buffer = cpu_buffer->buffer; 3068 buffer = cpu_buffer->buffer;
3075 3069
3070 /*
3071 * Check if someone performed a consuming read to
3072 * the buffer. A consuming read invalidates the iterator
3073 * and we need to reset the iterator in this case.
3074 */
3075 if (unlikely(iter->cache_read != cpu_buffer->read ||
3076 iter->cache_reader_page != cpu_buffer->reader_page))
3077 rb_iter_reset(iter);
3078
3076 again: 3079 again:
3080 if (ring_buffer_iter_empty(iter))
3081 return NULL;
3082
3077 /* 3083 /*
3078 * We repeat when a timestamp is encountered. 3084 * We repeat when a timestamp is encountered.
3079 * We can get multiple timestamps by nested interrupts or also 3085 * We can get multiple timestamps by nested interrupts or also
@@ -3088,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3088 if (rb_per_cpu_empty(cpu_buffer)) 3094 if (rb_per_cpu_empty(cpu_buffer))
3089 return NULL; 3095 return NULL;
3090 3096
3097 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3098 rb_inc_iter(iter);
3099 goto again;
3100 }
3101
3091 event = rb_iter_head_event(iter); 3102 event = rb_iter_head_event(iter);
3092 3103
3093 switch (event->type_len) { 3104 switch (event->type_len) {
@@ -3286,9 +3297,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3286 synchronize_sched(); 3297 synchronize_sched();
3287 3298
3288 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3299 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3289 __raw_spin_lock(&cpu_buffer->lock); 3300 arch_spin_lock(&cpu_buffer->lock);
3290 rb_iter_reset(iter); 3301 rb_iter_reset(iter);
3291 __raw_spin_unlock(&cpu_buffer->lock); 3302 arch_spin_unlock(&cpu_buffer->lock);
3292 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3303 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3293 3304
3294 return iter; 3305 return iter;
@@ -3408,11 +3419,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3408 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3419 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3409 goto out; 3420 goto out;
3410 3421
3411 __raw_spin_lock(&cpu_buffer->lock); 3422 arch_spin_lock(&cpu_buffer->lock);
3412 3423
3413 rb_reset_cpu(cpu_buffer); 3424 rb_reset_cpu(cpu_buffer);
3414 3425
3415 __raw_spin_unlock(&cpu_buffer->lock); 3426 arch_spin_unlock(&cpu_buffer->lock);
3416 3427
3417 out: 3428 out:
3418 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3429 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 874f2893cff0..032c57ca6502 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h> 14#include <linux/ring_buffer.h>
15#include <linux/utsrelease.h> 15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h> 16#include <linux/stacktrace.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
@@ -32,6 +32,7 @@
32#include <linux/splice.h> 32#include <linux/splice.h>
33#include <linux/kdebug.h> 33#include <linux/kdebug.h>
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/rwsem.h>
35#include <linux/ctype.h> 36#include <linux/ctype.h>
36#include <linux/init.h> 37#include <linux/init.h>
37#include <linux/poll.h> 38#include <linux/poll.h>
@@ -86,25 +87,22 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
86 */ 87 */
87static int tracing_disabled = 1; 88static int tracing_disabled = 1;
88 89
89DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 90DEFINE_PER_CPU(int, ftrace_cpu_disabled);
90 91
91static inline void ftrace_disable_cpu(void) 92static inline void ftrace_disable_cpu(void)
92{ 93{
93 preempt_disable(); 94 preempt_disable();
94 local_inc(&__get_cpu_var(ftrace_cpu_disabled)); 95 __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
95} 96}
96 97
97static inline void ftrace_enable_cpu(void) 98static inline void ftrace_enable_cpu(void)
98{ 99{
99 local_dec(&__get_cpu_var(ftrace_cpu_disabled)); 100 __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
100 preempt_enable(); 101 preempt_enable();
101} 102}
102 103
103static cpumask_var_t __read_mostly tracing_buffer_mask; 104static cpumask_var_t __read_mostly tracing_buffer_mask;
104 105
105/* Define which cpu buffers are currently read in trace_pipe */
106static cpumask_var_t tracing_reader_cpumask;
107
108#define for_each_tracing_cpu(cpu) \ 106#define for_each_tracing_cpu(cpu) \
109 for_each_cpu(cpu, tracing_buffer_mask) 107 for_each_cpu(cpu, tracing_buffer_mask)
110 108
@@ -203,7 +201,7 @@ cycle_t ftrace_now(int cpu)
203 */ 201 */
204static struct trace_array max_tr; 202static struct trace_array max_tr;
205 203
206static DEFINE_PER_CPU(struct trace_array_cpu, max_data); 204static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
207 205
208/* tracer_enabled is used to toggle activation of a tracer */ 206/* tracer_enabled is used to toggle activation of a tracer */
209static int tracer_enabled = 1; 207static int tracer_enabled = 1;
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly;
243 241
244/* 242/*
245 * trace_types_lock is used to protect the trace_types list. 243 * trace_types_lock is used to protect the trace_types list.
246 * This lock is also used to keep user access serialized.
247 * Accesses from userspace will grab this lock while userspace
248 * activities happen inside the kernel.
249 */ 244 */
250static DEFINE_MUTEX(trace_types_lock); 245static DEFINE_MUTEX(trace_types_lock);
251 246
247/*
248 * serialize the access of the ring buffer
249 *
250 * ring buffer serializes readers, but it is low level protection.
251 * The validity of the events (which returns by ring_buffer_peek() ..etc)
252 * are not protected by ring buffer.
253 *
254 * The content of events may become garbage if we allow other process consumes
255 * these events concurrently:
256 * A) the page of the consumed events may become a normal page
257 * (not reader page) in ring buffer, and this page will be rewrited
258 * by events producer.
259 * B) The page of the consumed events may become a page for splice_read,
260 * and this page will be returned to system.
261 *
262 * These primitives allow multi process access to different cpu ring buffer
263 * concurrently.
264 *
265 * These primitives don't distinguish read-only and read-consume access.
266 * Multi read-only access are also serialized.
267 */
268
269#ifdef CONFIG_SMP
270static DECLARE_RWSEM(all_cpu_access_lock);
271static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
272
273static inline void trace_access_lock(int cpu)
274{
275 if (cpu == TRACE_PIPE_ALL_CPU) {
276 /* gain it for accessing the whole ring buffer. */
277 down_write(&all_cpu_access_lock);
278 } else {
279 /* gain it for accessing a cpu ring buffer. */
280
281 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
282 down_read(&all_cpu_access_lock);
283
284 /* Secondly block other access to this @cpu ring buffer. */
285 mutex_lock(&per_cpu(cpu_access_lock, cpu));
286 }
287}
288
289static inline void trace_access_unlock(int cpu)
290{
291 if (cpu == TRACE_PIPE_ALL_CPU) {
292 up_write(&all_cpu_access_lock);
293 } else {
294 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
295 up_read(&all_cpu_access_lock);
296 }
297}
298
299static inline void trace_access_lock_init(void)
300{
301 int cpu;
302
303 for_each_possible_cpu(cpu)
304 mutex_init(&per_cpu(cpu_access_lock, cpu));
305}
306
307#else
308
309static DEFINE_MUTEX(access_lock);
310
311static inline void trace_access_lock(int cpu)
312{
313 (void)cpu;
314 mutex_lock(&access_lock);
315}
316
317static inline void trace_access_unlock(int cpu)
318{
319 (void)cpu;
320 mutex_unlock(&access_lock);
321}
322
323static inline void trace_access_lock_init(void)
324{
325}
326
327#endif
328
252/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 329/* trace_wait is a waitqueue for tasks blocked on trace_poll */
253static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 330static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
254 331
@@ -313,7 +390,6 @@ static const char *trace_options[] = {
313 "bin", 390 "bin",
314 "block", 391 "block",
315 "stacktrace", 392 "stacktrace",
316 "sched-tree",
317 "trace_printk", 393 "trace_printk",
318 "ftrace_preempt", 394 "ftrace_preempt",
319 "branch", 395 "branch",
@@ -493,15 +569,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
493 * protected by per_cpu spinlocks. But the action of the swap 569 * protected by per_cpu spinlocks. But the action of the swap
494 * needs its own lock. 570 * needs its own lock.
495 * 571 *
496 * This is defined as a raw_spinlock_t in order to help 572 * This is defined as a arch_spinlock_t in order to help
497 * with performance when lockdep debugging is enabled. 573 * with performance when lockdep debugging is enabled.
498 * 574 *
499 * It is also used in other places outside the update_max_tr 575 * It is also used in other places outside the update_max_tr
500 * so it needs to be defined outside of the 576 * so it needs to be defined outside of the
501 * CONFIG_TRACER_MAX_TRACE. 577 * CONFIG_TRACER_MAX_TRACE.
502 */ 578 */
503static raw_spinlock_t ftrace_max_lock = 579static arch_spinlock_t ftrace_max_lock =
504 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 580 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
505 581
506#ifdef CONFIG_TRACER_MAX_TRACE 582#ifdef CONFIG_TRACER_MAX_TRACE
507unsigned long __read_mostly tracing_max_latency; 583unsigned long __read_mostly tracing_max_latency;
@@ -555,13 +631,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
555 return; 631 return;
556 632
557 WARN_ON_ONCE(!irqs_disabled()); 633 WARN_ON_ONCE(!irqs_disabled());
558 __raw_spin_lock(&ftrace_max_lock); 634 arch_spin_lock(&ftrace_max_lock);
559 635
560 tr->buffer = max_tr.buffer; 636 tr->buffer = max_tr.buffer;
561 max_tr.buffer = buf; 637 max_tr.buffer = buf;
562 638
563 __update_max_tr(tr, tsk, cpu); 639 __update_max_tr(tr, tsk, cpu);
564 __raw_spin_unlock(&ftrace_max_lock); 640 arch_spin_unlock(&ftrace_max_lock);
565} 641}
566 642
567/** 643/**
@@ -581,7 +657,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
581 return; 657 return;
582 658
583 WARN_ON_ONCE(!irqs_disabled()); 659 WARN_ON_ONCE(!irqs_disabled());
584 __raw_spin_lock(&ftrace_max_lock); 660 arch_spin_lock(&ftrace_max_lock);
585 661
586 ftrace_disable_cpu(); 662 ftrace_disable_cpu();
587 663
@@ -603,7 +679,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
603 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 679 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
604 680
605 __update_max_tr(tr, tsk, cpu); 681 __update_max_tr(tr, tsk, cpu);
606 __raw_spin_unlock(&ftrace_max_lock); 682 arch_spin_unlock(&ftrace_max_lock);
607} 683}
608#endif /* CONFIG_TRACER_MAX_TRACE */ 684#endif /* CONFIG_TRACER_MAX_TRACE */
609 685
@@ -802,7 +878,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
802static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 878static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
803static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 879static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
804static int cmdline_idx; 880static int cmdline_idx;
805static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; 881static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
806 882
807/* temporary disable recording */ 883/* temporary disable recording */
808static atomic_t trace_record_cmdline_disabled __read_mostly; 884static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +991,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
915 * nor do we want to disable interrupts, 991 * nor do we want to disable interrupts,
916 * so if we miss here, then better luck next time. 992 * so if we miss here, then better luck next time.
917 */ 993 */
918 if (!__raw_spin_trylock(&trace_cmdline_lock)) 994 if (!arch_spin_trylock(&trace_cmdline_lock))
919 return; 995 return;
920 996
921 idx = map_pid_to_cmdline[tsk->pid]; 997 idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +1016,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
940 1016
941 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 1017 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
942 1018
943 __raw_spin_unlock(&trace_cmdline_lock); 1019 arch_spin_unlock(&trace_cmdline_lock);
944} 1020}
945 1021
946void trace_find_cmdline(int pid, char comm[]) 1022void trace_find_cmdline(int pid, char comm[])
@@ -952,20 +1028,25 @@ void trace_find_cmdline(int pid, char comm[])
952 return; 1028 return;
953 } 1029 }
954 1030
1031 if (WARN_ON_ONCE(pid < 0)) {
1032 strcpy(comm, "<XXX>");
1033 return;
1034 }
1035
955 if (pid > PID_MAX_DEFAULT) { 1036 if (pid > PID_MAX_DEFAULT) {
956 strcpy(comm, "<...>"); 1037 strcpy(comm, "<...>");
957 return; 1038 return;
958 } 1039 }
959 1040
960 preempt_disable(); 1041 preempt_disable();
961 __raw_spin_lock(&trace_cmdline_lock); 1042 arch_spin_lock(&trace_cmdline_lock);
962 map = map_pid_to_cmdline[pid]; 1043 map = map_pid_to_cmdline[pid];
963 if (map != NO_CMDLINE_MAP) 1044 if (map != NO_CMDLINE_MAP)
964 strcpy(comm, saved_cmdlines[map]); 1045 strcpy(comm, saved_cmdlines[map]);
965 else 1046 else
966 strcpy(comm, "<...>"); 1047 strcpy(comm, "<...>");
967 1048
968 __raw_spin_unlock(&trace_cmdline_lock); 1049 arch_spin_unlock(&trace_cmdline_lock);
969 preempt_enable(); 1050 preempt_enable();
970} 1051}
971 1052
@@ -1085,7 +1166,7 @@ trace_function(struct trace_array *tr,
1085 struct ftrace_entry *entry; 1166 struct ftrace_entry *entry;
1086 1167
1087 /* If we are reading the ring buffer, don't trace */ 1168 /* If we are reading the ring buffer, don't trace */
1088 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 1169 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
1089 return; 1170 return;
1090 1171
1091 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1172 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1151,6 +1232,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1151 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1232 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1152} 1233}
1153 1234
1235/**
1236 * trace_dump_stack - record a stack back trace in the trace buffer
1237 */
1238void trace_dump_stack(void)
1239{
1240 unsigned long flags;
1241
1242 if (tracing_disabled || tracing_selftest_running)
1243 return;
1244
1245 local_save_flags(flags);
1246
1247 /* skipping 3 traces, seems to get us at the caller of this function */
1248 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1249}
1250
1154void 1251void
1155ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1252ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1156{ 1253{
@@ -1251,8 +1348,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1251 */ 1348 */
1252int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1349int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1253{ 1350{
1254 static raw_spinlock_t trace_buf_lock = 1351 static arch_spinlock_t trace_buf_lock =
1255 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1352 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1256 static u32 trace_buf[TRACE_BUF_SIZE]; 1353 static u32 trace_buf[TRACE_BUF_SIZE];
1257 1354
1258 struct ftrace_event_call *call = &event_bprint; 1355 struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1380,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1283 1380
1284 /* Lockdep uses trace_printk for lock tracing */ 1381 /* Lockdep uses trace_printk for lock tracing */
1285 local_irq_save(flags); 1382 local_irq_save(flags);
1286 __raw_spin_lock(&trace_buf_lock); 1383 arch_spin_lock(&trace_buf_lock);
1287 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1384 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1288 1385
1289 if (len > TRACE_BUF_SIZE || len < 0) 1386 if (len > TRACE_BUF_SIZE || len < 0)
@@ -1300,11 +1397,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1300 entry->fmt = fmt; 1397 entry->fmt = fmt;
1301 1398
1302 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1399 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1303 if (!filter_check_discard(call, entry, buffer, event)) 1400 if (!filter_check_discard(call, entry, buffer, event)) {
1304 ring_buffer_unlock_commit(buffer, event); 1401 ring_buffer_unlock_commit(buffer, event);
1402 ftrace_trace_stack(buffer, flags, 6, pc);
1403 }
1305 1404
1306out_unlock: 1405out_unlock:
1307 __raw_spin_unlock(&trace_buf_lock); 1406 arch_spin_unlock(&trace_buf_lock);
1308 local_irq_restore(flags); 1407 local_irq_restore(flags);
1309 1408
1310out: 1409out:
@@ -1334,7 +1433,7 @@ int trace_array_printk(struct trace_array *tr,
1334int trace_array_vprintk(struct trace_array *tr, 1433int trace_array_vprintk(struct trace_array *tr,
1335 unsigned long ip, const char *fmt, va_list args) 1434 unsigned long ip, const char *fmt, va_list args)
1336{ 1435{
1337 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1436 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1338 static char trace_buf[TRACE_BUF_SIZE]; 1437 static char trace_buf[TRACE_BUF_SIZE];
1339 1438
1340 struct ftrace_event_call *call = &event_print; 1439 struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1459,8 @@ int trace_array_vprintk(struct trace_array *tr,
1360 1459
1361 pause_graph_tracing(); 1460 pause_graph_tracing();
1362 raw_local_irq_save(irq_flags); 1461 raw_local_irq_save(irq_flags);
1363 __raw_spin_lock(&trace_buf_lock); 1462 arch_spin_lock(&trace_buf_lock);
1364 if (args == NULL) { 1463 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 strncpy(trace_buf, fmt, TRACE_BUF_SIZE);
1366 len = strlen(trace_buf);
1367 } else
1368 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1369 1464
1370 size = sizeof(*entry) + len + 1; 1465 size = sizeof(*entry) + len + 1;
1371 buffer = tr->buffer; 1466 buffer = tr->buffer;
@@ -1378,11 +1473,13 @@ int trace_array_vprintk(struct trace_array *tr,
1378 1473
1379 memcpy(&entry->buf, trace_buf, len); 1474 memcpy(&entry->buf, trace_buf, len);
1380 entry->buf[len] = '\0'; 1475 entry->buf[len] = '\0';
1381 if (!filter_check_discard(call, entry, buffer, event)) 1476 if (!filter_check_discard(call, entry, buffer, event)) {
1382 ring_buffer_unlock_commit(buffer, event); 1477 ring_buffer_unlock_commit(buffer, event);
1478 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1479 }
1383 1480
1384 out_unlock: 1481 out_unlock:
1385 __raw_spin_unlock(&trace_buf_lock); 1482 arch_spin_unlock(&trace_buf_lock);
1386 raw_local_irq_restore(irq_flags); 1483 raw_local_irq_restore(irq_flags);
1387 unpause_graph_tracing(); 1484 unpause_graph_tracing();
1388 out: 1485 out:
@@ -1516,6 +1613,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1516 int i = (int)*pos; 1613 int i = (int)*pos;
1517 void *ent; 1614 void *ent;
1518 1615
1616 WARN_ON_ONCE(iter->leftover);
1617
1519 (*pos)++; 1618 (*pos)++;
1520 1619
1521 /* can't go backwards */ 1620 /* can't go backwards */
@@ -1567,12 +1666,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1567} 1666}
1568 1667
1569/* 1668/*
1570 * No necessary locking here. The worst thing which can
1571 * happen is loosing events consumed at the same time
1572 * by a trace_pipe reader.
1573 * Other than that, we don't risk to crash the ring buffer
1574 * because it serializes the readers.
1575 *
1576 * The current tracer is copied to avoid a global locking 1669 * The current tracer is copied to avoid a global locking
1577 * all around. 1670 * all around.
1578 */ 1671 */
@@ -1614,17 +1707,29 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1614 ; 1707 ;
1615 1708
1616 } else { 1709 } else {
1617 l = *pos - 1; 1710 /*
1618 p = s_next(m, p, &l); 1711 * If we overflowed the seq_file before, then we want
1712 * to just reuse the trace_seq buffer again.
1713 */
1714 if (iter->leftover)
1715 p = iter;
1716 else {
1717 l = *pos - 1;
1718 p = s_next(m, p, &l);
1719 }
1619 } 1720 }
1620 1721
1621 trace_event_read_lock(); 1722 trace_event_read_lock();
1723 trace_access_lock(cpu_file);
1622 return p; 1724 return p;
1623} 1725}
1624 1726
1625static void s_stop(struct seq_file *m, void *p) 1727static void s_stop(struct seq_file *m, void *p)
1626{ 1728{
1729 struct trace_iterator *iter = m->private;
1730
1627 atomic_dec(&trace_record_cmdline_disabled); 1731 atomic_dec(&trace_record_cmdline_disabled);
1732 trace_access_unlock(iter->cpu_file);
1628 trace_event_read_unlock(); 1733 trace_event_read_unlock();
1629} 1734}
1630 1735
@@ -1923,6 +2028,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1923static int s_show(struct seq_file *m, void *v) 2028static int s_show(struct seq_file *m, void *v)
1924{ 2029{
1925 struct trace_iterator *iter = v; 2030 struct trace_iterator *iter = v;
2031 int ret;
1926 2032
1927 if (iter->ent == NULL) { 2033 if (iter->ent == NULL) {
1928 if (iter->tr) { 2034 if (iter->tr) {
@@ -1942,9 +2048,27 @@ static int s_show(struct seq_file *m, void *v)
1942 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2048 if (!(trace_flags & TRACE_ITER_VERBOSE))
1943 print_func_help_header(m); 2049 print_func_help_header(m);
1944 } 2050 }
2051 } else if (iter->leftover) {
2052 /*
2053 * If we filled the seq_file buffer earlier, we
2054 * want to just show it now.
2055 */
2056 ret = trace_print_seq(m, &iter->seq);
2057
2058 /* ret should this time be zero, but you never know */
2059 iter->leftover = ret;
2060
1945 } else { 2061 } else {
1946 print_trace_line(iter); 2062 print_trace_line(iter);
1947 trace_print_seq(m, &iter->seq); 2063 ret = trace_print_seq(m, &iter->seq);
2064 /*
2065 * If we overflow the seq_file buffer, then it will
2066 * ask us for this data again at start up.
2067 * Use that instead.
2068 * ret is 0 if seq_file write succeeded.
2069 * -1 otherwise.
2070 */
2071 iter->leftover = ret;
1948 } 2072 }
1949 2073
1950 return 0; 2074 return 0;
@@ -2254,7 +2378,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2254 mutex_lock(&tracing_cpumask_update_lock); 2378 mutex_lock(&tracing_cpumask_update_lock);
2255 2379
2256 local_irq_disable(); 2380 local_irq_disable();
2257 __raw_spin_lock(&ftrace_max_lock); 2381 arch_spin_lock(&ftrace_max_lock);
2258 for_each_tracing_cpu(cpu) { 2382 for_each_tracing_cpu(cpu) {
2259 /* 2383 /*
2260 * Increase/decrease the disabled counter if we are 2384 * Increase/decrease the disabled counter if we are
@@ -2269,7 +2393,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2269 atomic_dec(&global_trace.data[cpu]->disabled); 2393 atomic_dec(&global_trace.data[cpu]->disabled);
2270 } 2394 }
2271 } 2395 }
2272 __raw_spin_unlock(&ftrace_max_lock); 2396 arch_spin_unlock(&ftrace_max_lock);
2273 local_irq_enable(); 2397 local_irq_enable();
2274 2398
2275 cpumask_copy(tracing_cpumask, tracing_cpumask_new); 2399 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2291,67 +2415,49 @@ static const struct file_operations tracing_cpumask_fops = {
2291 .write = tracing_cpumask_write, 2415 .write = tracing_cpumask_write,
2292}; 2416};
2293 2417
2294static ssize_t 2418static int tracing_trace_options_show(struct seq_file *m, void *v)
2295tracing_trace_options_read(struct file *filp, char __user *ubuf,
2296 size_t cnt, loff_t *ppos)
2297{ 2419{
2298 struct tracer_opt *trace_opts; 2420 struct tracer_opt *trace_opts;
2299 u32 tracer_flags; 2421 u32 tracer_flags;
2300 int len = 0;
2301 char *buf;
2302 int r = 0;
2303 int i; 2422 int i;
2304 2423
2305
2306 /* calculate max size */
2307 for (i = 0; trace_options[i]; i++) {
2308 len += strlen(trace_options[i]);
2309 len += 3; /* "no" and newline */
2310 }
2311
2312 mutex_lock(&trace_types_lock); 2424 mutex_lock(&trace_types_lock);
2313 tracer_flags = current_trace->flags->val; 2425 tracer_flags = current_trace->flags->val;
2314 trace_opts = current_trace->flags->opts; 2426 trace_opts = current_trace->flags->opts;
2315 2427
2316 /*
2317 * Increase the size with names of options specific
2318 * of the current tracer.
2319 */
2320 for (i = 0; trace_opts[i].name; i++) {
2321 len += strlen(trace_opts[i].name);
2322 len += 3; /* "no" and newline */
2323 }
2324
2325 /* +1 for \0 */
2326 buf = kmalloc(len + 1, GFP_KERNEL);
2327 if (!buf) {
2328 mutex_unlock(&trace_types_lock);
2329 return -ENOMEM;
2330 }
2331
2332 for (i = 0; trace_options[i]; i++) { 2428 for (i = 0; trace_options[i]; i++) {
2333 if (trace_flags & (1 << i)) 2429 if (trace_flags & (1 << i))
2334 r += sprintf(buf + r, "%s\n", trace_options[i]); 2430 seq_printf(m, "%s\n", trace_options[i]);
2335 else 2431 else
2336 r += sprintf(buf + r, "no%s\n", trace_options[i]); 2432 seq_printf(m, "no%s\n", trace_options[i]);
2337 } 2433 }
2338 2434
2339 for (i = 0; trace_opts[i].name; i++) { 2435 for (i = 0; trace_opts[i].name; i++) {
2340 if (tracer_flags & trace_opts[i].bit) 2436 if (tracer_flags & trace_opts[i].bit)
2341 r += sprintf(buf + r, "%s\n", 2437 seq_printf(m, "%s\n", trace_opts[i].name);
2342 trace_opts[i].name);
2343 else 2438 else
2344 r += sprintf(buf + r, "no%s\n", 2439 seq_printf(m, "no%s\n", trace_opts[i].name);
2345 trace_opts[i].name);
2346 } 2440 }
2347 mutex_unlock(&trace_types_lock); 2441 mutex_unlock(&trace_types_lock);
2348 2442
2349 WARN_ON(r >= len + 1); 2443 return 0;
2444}
2350 2445
2351 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2446static int __set_tracer_option(struct tracer *trace,
2447 struct tracer_flags *tracer_flags,
2448 struct tracer_opt *opts, int neg)
2449{
2450 int ret;
2352 2451
2353 kfree(buf); 2452 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2354 return r; 2453 if (ret)
2454 return ret;
2455
2456 if (neg)
2457 tracer_flags->val &= ~opts->bit;
2458 else
2459 tracer_flags->val |= opts->bit;
2460 return 0;
2355} 2461}
2356 2462
2357/* Try to assign a tracer specific option */ 2463/* Try to assign a tracer specific option */
@@ -2359,33 +2465,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2359{ 2465{
2360 struct tracer_flags *tracer_flags = trace->flags; 2466 struct tracer_flags *tracer_flags = trace->flags;
2361 struct tracer_opt *opts = NULL; 2467 struct tracer_opt *opts = NULL;
2362 int ret = 0, i = 0; 2468 int i;
2363 int len;
2364 2469
2365 for (i = 0; tracer_flags->opts[i].name; i++) { 2470 for (i = 0; tracer_flags->opts[i].name; i++) {
2366 opts = &tracer_flags->opts[i]; 2471 opts = &tracer_flags->opts[i];
2367 len = strlen(opts->name);
2368 2472
2369 if (strncmp(cmp, opts->name, len) == 0) { 2473 if (strcmp(cmp, opts->name) == 0)
2370 ret = trace->set_flag(tracer_flags->val, 2474 return __set_tracer_option(trace, trace->flags,
2371 opts->bit, !neg); 2475 opts, neg);
2372 break;
2373 }
2374 } 2476 }
2375 /* Not found */
2376 if (!tracer_flags->opts[i].name)
2377 return -EINVAL;
2378 2477
2379 /* Refused to handle */ 2478 return -EINVAL;
2380 if (ret)
2381 return ret;
2382
2383 if (neg)
2384 tracer_flags->val &= ~opts->bit;
2385 else
2386 tracer_flags->val |= opts->bit;
2387
2388 return 0;
2389} 2479}
2390 2480
2391static void set_tracer_flags(unsigned int mask, int enabled) 2481static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2405,7 +2495,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2405 size_t cnt, loff_t *ppos) 2495 size_t cnt, loff_t *ppos)
2406{ 2496{
2407 char buf[64]; 2497 char buf[64];
2408 char *cmp = buf; 2498 char *cmp;
2409 int neg = 0; 2499 int neg = 0;
2410 int ret; 2500 int ret;
2411 int i; 2501 int i;
@@ -2417,16 +2507,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2417 return -EFAULT; 2507 return -EFAULT;
2418 2508
2419 buf[cnt] = 0; 2509 buf[cnt] = 0;
2510 cmp = strstrip(buf);
2420 2511
2421 if (strncmp(buf, "no", 2) == 0) { 2512 if (strncmp(cmp, "no", 2) == 0) {
2422 neg = 1; 2513 neg = 1;
2423 cmp += 2; 2514 cmp += 2;
2424 } 2515 }
2425 2516
2426 for (i = 0; trace_options[i]; i++) { 2517 for (i = 0; trace_options[i]; i++) {
2427 int len = strlen(trace_options[i]); 2518 if (strcmp(cmp, trace_options[i]) == 0) {
2428
2429 if (strncmp(cmp, trace_options[i], len) == 0) {
2430 set_tracer_flags(1 << i, !neg); 2519 set_tracer_flags(1 << i, !neg);
2431 break; 2520 break;
2432 } 2521 }
@@ -2446,9 +2535,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2446 return cnt; 2535 return cnt;
2447} 2536}
2448 2537
2538static int tracing_trace_options_open(struct inode *inode, struct file *file)
2539{
2540 if (tracing_disabled)
2541 return -ENODEV;
2542 return single_open(file, tracing_trace_options_show, NULL);
2543}
2544
2449static const struct file_operations tracing_iter_fops = { 2545static const struct file_operations tracing_iter_fops = {
2450 .open = tracing_open_generic, 2546 .open = tracing_trace_options_open,
2451 .read = tracing_trace_options_read, 2547 .read = seq_read,
2548 .llseek = seq_lseek,
2549 .release = single_release,
2452 .write = tracing_trace_options_write, 2550 .write = tracing_trace_options_write,
2453}; 2551};
2454 2552
@@ -2822,22 +2920,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2822 2920
2823 mutex_lock(&trace_types_lock); 2921 mutex_lock(&trace_types_lock);
2824 2922
2825 /* We only allow one reader per cpu */
2826 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2827 if (!cpumask_empty(tracing_reader_cpumask)) {
2828 ret = -EBUSY;
2829 goto out;
2830 }
2831 cpumask_setall(tracing_reader_cpumask);
2832 } else {
2833 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2834 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2835 else {
2836 ret = -EBUSY;
2837 goto out;
2838 }
2839 }
2840
2841 /* create a buffer to store the information to pass to userspace */ 2923 /* create a buffer to store the information to pass to userspace */
2842 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2924 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2843 if (!iter) { 2925 if (!iter) {
@@ -2893,10 +2975,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2893 2975
2894 mutex_lock(&trace_types_lock); 2976 mutex_lock(&trace_types_lock);
2895 2977
2896 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) 2978 if (iter->trace->pipe_close)
2897 cpumask_clear(tracing_reader_cpumask); 2979 iter->trace->pipe_close(iter);
2898 else
2899 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2900 2980
2901 mutex_unlock(&trace_types_lock); 2981 mutex_unlock(&trace_types_lock);
2902 2982
@@ -3056,6 +3136,7 @@ waitagain:
3056 iter->pos = -1; 3136 iter->pos = -1;
3057 3137
3058 trace_event_read_lock(); 3138 trace_event_read_lock();
3139 trace_access_lock(iter->cpu_file);
3059 while (find_next_entry_inc(iter) != NULL) { 3140 while (find_next_entry_inc(iter) != NULL) {
3060 enum print_line_t ret; 3141 enum print_line_t ret;
3061 int len = iter->seq.len; 3142 int len = iter->seq.len;
@@ -3072,6 +3153,7 @@ waitagain:
3072 if (iter->seq.len >= cnt) 3153 if (iter->seq.len >= cnt)
3073 break; 3154 break;
3074 } 3155 }
3156 trace_access_unlock(iter->cpu_file);
3075 trace_event_read_unlock(); 3157 trace_event_read_unlock();
3076 3158
3077 /* Now copy what we have to the user */ 3159 /* Now copy what we have to the user */
@@ -3104,7 +3186,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3104 __free_page(spd->pages[idx]); 3186 __free_page(spd->pages[idx]);
3105} 3187}
3106 3188
3107static struct pipe_buf_operations tracing_pipe_buf_ops = { 3189static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3108 .can_merge = 0, 3190 .can_merge = 0,
3109 .map = generic_pipe_buf_map, 3191 .map = generic_pipe_buf_map,
3110 .unmap = generic_pipe_buf_unmap, 3192 .unmap = generic_pipe_buf_unmap,
@@ -3197,6 +3279,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3197 } 3279 }
3198 3280
3199 trace_event_read_lock(); 3281 trace_event_read_lock();
3282 trace_access_lock(iter->cpu_file);
3200 3283
3201 /* Fill as many pages as possible. */ 3284 /* Fill as many pages as possible. */
3202 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3285 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3220,6 +3303,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3220 trace_seq_init(&iter->seq); 3303 trace_seq_init(&iter->seq);
3221 } 3304 }
3222 3305
3306 trace_access_unlock(iter->cpu_file);
3223 trace_event_read_unlock(); 3307 trace_event_read_unlock();
3224 mutex_unlock(&iter->mutex); 3308 mutex_unlock(&iter->mutex);
3225 3309
@@ -3320,6 +3404,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3320 return cnt; 3404 return cnt;
3321} 3405}
3322 3406
3407static int mark_printk(const char *fmt, ...)
3408{
3409 int ret;
3410 va_list args;
3411 va_start(args, fmt);
3412 ret = trace_vprintk(0, fmt, args);
3413 va_end(args);
3414 return ret;
3415}
3416
3323static ssize_t 3417static ssize_t
3324tracing_mark_write(struct file *filp, const char __user *ubuf, 3418tracing_mark_write(struct file *filp, const char __user *ubuf,
3325 size_t cnt, loff_t *fpos) 3419 size_t cnt, loff_t *fpos)
@@ -3346,28 +3440,25 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3346 } else 3440 } else
3347 buf[cnt] = '\0'; 3441 buf[cnt] = '\0';
3348 3442
3349 cnt = trace_vprintk(0, buf, NULL); 3443 cnt = mark_printk("%s", buf);
3350 kfree(buf); 3444 kfree(buf);
3351 *fpos += cnt; 3445 *fpos += cnt;
3352 3446
3353 return cnt; 3447 return cnt;
3354} 3448}
3355 3449
3356static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf, 3450static int tracing_clock_show(struct seq_file *m, void *v)
3357 size_t cnt, loff_t *ppos)
3358{ 3451{
3359 char buf[64];
3360 int bufiter = 0;
3361 int i; 3452 int i;
3362 3453
3363 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 3454 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3364 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, 3455 seq_printf(m,
3365 "%s%s%s%s", i ? " " : "", 3456 "%s%s%s%s", i ? " " : "",
3366 i == trace_clock_id ? "[" : "", trace_clocks[i].name, 3457 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3367 i == trace_clock_id ? "]" : ""); 3458 i == trace_clock_id ? "]" : "");
3368 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n"); 3459 seq_putc(m, '\n');
3369 3460
3370 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter); 3461 return 0;
3371} 3462}
3372 3463
3373static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 3464static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3409,6 +3500,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3409 return cnt; 3500 return cnt;
3410} 3501}
3411 3502
3503static int tracing_clock_open(struct inode *inode, struct file *file)
3504{
3505 if (tracing_disabled)
3506 return -ENODEV;
3507 return single_open(file, tracing_clock_show, NULL);
3508}
3509
3412static const struct file_operations tracing_max_lat_fops = { 3510static const struct file_operations tracing_max_lat_fops = {
3413 .open = tracing_open_generic, 3511 .open = tracing_open_generic,
3414 .read = tracing_max_lat_read, 3512 .read = tracing_max_lat_read,
@@ -3447,8 +3545,10 @@ static const struct file_operations tracing_mark_fops = {
3447}; 3545};
3448 3546
3449static const struct file_operations trace_clock_fops = { 3547static const struct file_operations trace_clock_fops = {
3450 .open = tracing_open_generic, 3548 .open = tracing_clock_open,
3451 .read = tracing_clock_read, 3549 .read = seq_read,
3550 .llseek = seq_lseek,
3551 .release = single_release,
3452 .write = tracing_clock_write, 3552 .write = tracing_clock_write,
3453}; 3553};
3454 3554
@@ -3505,10 +3605,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3505 3605
3506 info->read = 0; 3606 info->read = 0;
3507 3607
3608 trace_access_lock(info->cpu);
3508 ret = ring_buffer_read_page(info->tr->buffer, 3609 ret = ring_buffer_read_page(info->tr->buffer,
3509 &info->spare, 3610 &info->spare,
3510 count, 3611 count,
3511 info->cpu, 0); 3612 info->cpu, 0);
3613 trace_access_unlock(info->cpu);
3512 if (ret < 0) 3614 if (ret < 0)
3513 return 0; 3615 return 0;
3514 3616
@@ -3578,7 +3680,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3578} 3680}
3579 3681
3580/* Pipe buffer operations for a buffer. */ 3682/* Pipe buffer operations for a buffer. */
3581static struct pipe_buf_operations buffer_pipe_buf_ops = { 3683static const struct pipe_buf_operations buffer_pipe_buf_ops = {
3582 .can_merge = 0, 3684 .can_merge = 0,
3583 .map = generic_pipe_buf_map, 3685 .map = generic_pipe_buf_map,
3584 .unmap = generic_pipe_buf_unmap, 3686 .unmap = generic_pipe_buf_unmap,
@@ -3636,6 +3738,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3636 len &= PAGE_MASK; 3738 len &= PAGE_MASK;
3637 } 3739 }
3638 3740
3741 trace_access_lock(info->cpu);
3639 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3742 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3640 3743
3641 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3744 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3683,6 +3786,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3683 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3786 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3684 } 3787 }
3685 3788
3789 trace_access_unlock(info->cpu);
3686 spd.nr_pages = i; 3790 spd.nr_pages = i;
3687 3791
3688 /* did we read anything? */ 3792 /* did we read anything? */
@@ -3909,39 +4013,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3909 if (ret < 0) 4013 if (ret < 0)
3910 return ret; 4014 return ret;
3911 4015
3912 ret = 0; 4016 if (val != 0 && val != 1)
3913 switch (val) { 4017 return -EINVAL;
3914 case 0:
3915 /* do nothing if already cleared */
3916 if (!(topt->flags->val & topt->opt->bit))
3917 break;
3918
3919 mutex_lock(&trace_types_lock);
3920 if (current_trace->set_flag)
3921 ret = current_trace->set_flag(topt->flags->val,
3922 topt->opt->bit, 0);
3923 mutex_unlock(&trace_types_lock);
3924 if (ret)
3925 return ret;
3926 topt->flags->val &= ~topt->opt->bit;
3927 break;
3928 case 1:
3929 /* do nothing if already set */
3930 if (topt->flags->val & topt->opt->bit)
3931 break;
3932 4018
4019 if (!!(topt->flags->val & topt->opt->bit) != val) {
3933 mutex_lock(&trace_types_lock); 4020 mutex_lock(&trace_types_lock);
3934 if (current_trace->set_flag) 4021 ret = __set_tracer_option(current_trace, topt->flags,
3935 ret = current_trace->set_flag(topt->flags->val, 4022 topt->opt, !val);
3936 topt->opt->bit, 1);
3937 mutex_unlock(&trace_types_lock); 4023 mutex_unlock(&trace_types_lock);
3938 if (ret) 4024 if (ret)
3939 return ret; 4025 return ret;
3940 topt->flags->val |= topt->opt->bit;
3941 break;
3942
3943 default:
3944 return -EINVAL;
3945 } 4026 }
3946 4027
3947 *ppos += cnt; 4028 *ppos += cnt;
@@ -4142,6 +4223,8 @@ static __init int tracer_init_debugfs(void)
4142 struct dentry *d_tracer; 4223 struct dentry *d_tracer;
4143 int cpu; 4224 int cpu;
4144 4225
4226 trace_access_lock_init();
4227
4145 d_tracer = tracing_init_dentry(); 4228 d_tracer = tracing_init_dentry();
4146 4229
4147 trace_create_file("tracing_enabled", 0644, d_tracer, 4230 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4268,8 +4351,8 @@ trace_printk_seq(struct trace_seq *s)
4268 4351
4269static void __ftrace_dump(bool disable_tracing) 4352static void __ftrace_dump(bool disable_tracing)
4270{ 4353{
4271 static raw_spinlock_t ftrace_dump_lock = 4354 static arch_spinlock_t ftrace_dump_lock =
4272 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 4355 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
4273 /* use static because iter can be a bit big for the stack */ 4356 /* use static because iter can be a bit big for the stack */
4274 static struct trace_iterator iter; 4357 static struct trace_iterator iter;
4275 unsigned int old_userobj; 4358 unsigned int old_userobj;
@@ -4279,7 +4362,7 @@ static void __ftrace_dump(bool disable_tracing)
4279 4362
4280 /* only one dump */ 4363 /* only one dump */
4281 local_irq_save(flags); 4364 local_irq_save(flags);
4282 __raw_spin_lock(&ftrace_dump_lock); 4365 arch_spin_lock(&ftrace_dump_lock);
4283 if (dump_ran) 4366 if (dump_ran)
4284 goto out; 4367 goto out;
4285 4368
@@ -4354,7 +4437,7 @@ static void __ftrace_dump(bool disable_tracing)
4354 } 4437 }
4355 4438
4356 out: 4439 out:
4357 __raw_spin_unlock(&ftrace_dump_lock); 4440 arch_spin_unlock(&ftrace_dump_lock);
4358 local_irq_restore(flags); 4441 local_irq_restore(flags);
4359} 4442}
4360 4443
@@ -4376,9 +4459,6 @@ __init static int tracer_alloc_buffers(void)
4376 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4459 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4377 goto out_free_buffer_mask; 4460 goto out_free_buffer_mask;
4378 4461
4379 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4380 goto out_free_tracing_cpumask;
4381
4382 /* To save memory, keep the ring buffer size to its minimum */ 4462 /* To save memory, keep the ring buffer size to its minimum */
4383 if (ring_buffer_expanded) 4463 if (ring_buffer_expanded)
4384 ring_buf_size = trace_buf_size; 4464 ring_buf_size = trace_buf_size;
@@ -4415,7 +4495,7 @@ __init static int tracer_alloc_buffers(void)
4415 /* Allocate the first page for all buffers */ 4495 /* Allocate the first page for all buffers */
4416 for_each_tracing_cpu(i) { 4496 for_each_tracing_cpu(i) {
4417 global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4497 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4418 max_tr.data[i] = &per_cpu(max_data, i); 4498 max_tr.data[i] = &per_cpu(max_tr_data, i);
4419 } 4499 }
4420 4500
4421 trace_init_cmdlines(); 4501 trace_init_cmdlines();
@@ -4436,8 +4516,6 @@ __init static int tracer_alloc_buffers(void)
4436 return 0; 4516 return 0;
4437 4517
4438out_free_cpumask: 4518out_free_cpumask:
4439 free_cpumask_var(tracing_reader_cpumask);
4440out_free_tracing_cpumask:
4441 free_cpumask_var(tracing_cpumask); 4519 free_cpumask_var(tracing_cpumask);
4442out_free_buffer_mask: 4520out_free_buffer_mask:
4443 free_cpumask_var(tracing_buffer_mask); 4521 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1d7f4830a80d..fd05bcaf91b0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,6 +272,7 @@ struct tracer_flags {
272 * @pipe_open: called when the trace_pipe file is opened 272 * @pipe_open: called when the trace_pipe file is opened
273 * @wait_pipe: override how the user waits for traces on trace_pipe 273 * @wait_pipe: override how the user waits for traces on trace_pipe
274 * @close: called when the trace file is released 274 * @close: called when the trace file is released
275 * @pipe_close: called when the trace_pipe file is released
275 * @read: override the default read callback on trace_pipe 276 * @read: override the default read callback on trace_pipe
276 * @splice_read: override the default splice_read callback on trace_pipe 277 * @splice_read: override the default splice_read callback on trace_pipe
277 * @selftest: selftest to run on boot (see trace_selftest.c) 278 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -290,6 +291,7 @@ struct tracer {
290 void (*pipe_open)(struct trace_iterator *iter); 291 void (*pipe_open)(struct trace_iterator *iter);
291 void (*wait_pipe)(struct trace_iterator *iter); 292 void (*wait_pipe)(struct trace_iterator *iter);
292 void (*close)(struct trace_iterator *iter); 293 void (*close)(struct trace_iterator *iter);
294 void (*pipe_close)(struct trace_iterator *iter);
293 ssize_t (*read)(struct trace_iterator *iter, 295 ssize_t (*read)(struct trace_iterator *iter,
294 struct file *filp, char __user *ubuf, 296 struct file *filp, char __user *ubuf,
295 size_t cnt, loff_t *ppos); 297 size_t cnt, loff_t *ppos);
@@ -441,7 +443,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
441 443
442extern int ring_buffer_expanded; 444extern int ring_buffer_expanded;
443extern bool tracing_selftest_disabled; 445extern bool tracing_selftest_disabled;
444DECLARE_PER_CPU(local_t, ftrace_cpu_disabled); 446DECLARE_PER_CPU(int, ftrace_cpu_disabled);
445 447
446#ifdef CONFIG_FTRACE_STARTUP_TEST 448#ifdef CONFIG_FTRACE_STARTUP_TEST
447extern int trace_selftest_startup_function(struct tracer *trace, 449extern int trace_selftest_startup_function(struct tracer *trace,
@@ -495,6 +497,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
495#ifdef CONFIG_DYNAMIC_FTRACE 497#ifdef CONFIG_DYNAMIC_FTRACE
496/* TODO: make this variable */ 498/* TODO: make this variable */
497#define FTRACE_GRAPH_MAX_FUNCS 32 499#define FTRACE_GRAPH_MAX_FUNCS 32
500extern int ftrace_graph_filter_enabled;
498extern int ftrace_graph_count; 501extern int ftrace_graph_count;
499extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 502extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
500 503
@@ -502,7 +505,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
502{ 505{
503 int i; 506 int i;
504 507
505 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 508 if (!ftrace_graph_filter_enabled)
506 return 1; 509 return 1;
507 510
508 for (i = 0; i < ftrace_graph_count; i++) { 511 for (i = 0; i < ftrace_graph_count; i++) {
@@ -595,18 +598,17 @@ enum trace_iterator_flags {
595 TRACE_ITER_BIN = 0x40, 598 TRACE_ITER_BIN = 0x40,
596 TRACE_ITER_BLOCK = 0x80, 599 TRACE_ITER_BLOCK = 0x80,
597 TRACE_ITER_STACKTRACE = 0x100, 600 TRACE_ITER_STACKTRACE = 0x100,
598 TRACE_ITER_SCHED_TREE = 0x200, 601 TRACE_ITER_PRINTK = 0x200,
599 TRACE_ITER_PRINTK = 0x400, 602 TRACE_ITER_PREEMPTONLY = 0x400,
600 TRACE_ITER_PREEMPTONLY = 0x800, 603 TRACE_ITER_BRANCH = 0x800,
601 TRACE_ITER_BRANCH = 0x1000, 604 TRACE_ITER_ANNOTATE = 0x1000,
602 TRACE_ITER_ANNOTATE = 0x2000, 605 TRACE_ITER_USERSTACKTRACE = 0x2000,
603 TRACE_ITER_USERSTACKTRACE = 0x4000, 606 TRACE_ITER_SYM_USEROBJ = 0x4000,
604 TRACE_ITER_SYM_USEROBJ = 0x8000, 607 TRACE_ITER_PRINTK_MSGONLY = 0x8000,
605 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 608 TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
606 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 609 TRACE_ITER_LATENCY_FMT = 0x20000,
607 TRACE_ITER_LATENCY_FMT = 0x40000, 610 TRACE_ITER_SLEEP_TIME = 0x40000,
608 TRACE_ITER_SLEEP_TIME = 0x80000, 611 TRACE_ITER_GRAPH_TIME = 0x80000,
609 TRACE_ITER_GRAPH_TIME = 0x100000,
610}; 612};
611 613
612/* 614/*
@@ -790,7 +792,8 @@ extern const char *__stop___trace_bprintk_fmt[];
790 792
791#undef FTRACE_ENTRY 793#undef FTRACE_ENTRY
792#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 794#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
793 extern struct ftrace_event_call event_##call; 795 extern struct ftrace_event_call \
796 __attribute__((__aligned__(4))) event_##call;
794#undef FTRACE_ENTRY_DUP 797#undef FTRACE_ENTRY_DUP
795#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 798#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
796 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 799 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..b9bc4d470177 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
307 return -1; 307 return -1;
308 if (percent_a > percent_b) 308 if (percent_a > percent_b)
309 return 1; 309 return 1;
310 else 310
311 return 0; 311 if (a->incorrect < b->incorrect)
312 return -1;
313 if (a->incorrect > b->incorrect)
314 return 1;
315
316 /*
317 * Since the above shows worse (incorrect) cases
318 * first, we continue that by showing best (correct)
319 * cases last.
320 */
321 if (a->correct > b->correct)
322 return -1;
323 if (a->correct < b->correct)
324 return 1;
325
326 return 0;
312} 327}
313 328
314static struct tracer_stat annotated_branch_stats = { 329static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 878c03f386ba..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -71,10 +71,10 @@ u64 notrace trace_clock(void)
71/* keep prev_time and lock in the same cacheline. */ 71/* keep prev_time and lock in the same cacheline. */
72static struct { 72static struct {
73 u64 prev_time; 73 u64 prev_time;
74 raw_spinlock_t lock; 74 arch_spinlock_t lock;
75} trace_clock_struct ____cacheline_aligned_in_smp = 75} trace_clock_struct ____cacheline_aligned_in_smp =
76 { 76 {
77 .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, 77 .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
78 }; 78 };
79 79
80u64 notrace trace_clock_global(void) 80u64 notrace trace_clock_global(void)
@@ -94,7 +94,7 @@ u64 notrace trace_clock_global(void)
94 if (unlikely(in_nmi())) 94 if (unlikely(in_nmi()))
95 goto out; 95 goto out;
96 96
97 __raw_spin_lock(&trace_clock_struct.lock); 97 arch_spin_lock(&trace_clock_struct.lock);
98 98
99 /* 99 /*
100 * TODO: if this happens often then maybe we should reset 100 * TODO: if this happens often then maybe we should reset
@@ -106,7 +106,7 @@ u64 notrace trace_clock_global(void)
106 106
107 trace_clock_struct.prev_time = now; 107 trace_clock_struct.prev_time = now;
108 108
109 __raw_spin_unlock(&trace_clock_struct.lock); 109 arch_spin_unlock(&trace_clock_struct.lock);
110 110
111 out: 111 out:
112 raw_local_irq_restore(flags); 112 raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f80aa0d..f0d693005075 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -6,14 +6,12 @@
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/kprobes.h>
9#include "trace.h" 10#include "trace.h"
10 11
11 12
12char *perf_trace_buf; 13static char *perf_trace_buf;
13EXPORT_SYMBOL_GPL(perf_trace_buf); 14static char *perf_trace_buf_nmi;
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
17 15
18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; 16typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
19 17
@@ -25,7 +23,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
25 char *buf; 23 char *buf;
26 int ret = -ENOMEM; 24 int ret = -ENOMEM;
27 25
28 if (atomic_inc_return(&event->profile_count)) 26 if (event->profile_count++ > 0)
29 return 0; 27 return 0;
30 28
31 if (!total_profile_count) { 29 if (!total_profile_count) {
@@ -56,7 +54,7 @@ fail_buf_nmi:
56 perf_trace_buf = NULL; 54 perf_trace_buf = NULL;
57 } 55 }
58fail_buf: 56fail_buf:
59 atomic_dec(&event->profile_count); 57 event->profile_count--;
60 58
61 return ret; 59 return ret;
62} 60}
@@ -83,7 +81,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
83{ 81{
84 char *buf, *nmi_buf; 82 char *buf, *nmi_buf;
85 83
86 if (!atomic_add_negative(-1, &event->profile_count)) 84 if (--event->profile_count > 0)
87 return; 85 return;
88 86
89 event->profile_disable(event); 87 event->profile_disable(event);
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
120 } 118 }
121 mutex_unlock(&event_mutex); 119 mutex_unlock(&event_mutex);
122} 120}
121
122__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
123 int *rctxp, unsigned long *irq_flags)
124{
125 struct trace_entry *entry;
126 char *trace_buf, *raw_data;
127 int pc, cpu;
128
129 pc = preempt_count();
130
131 /* Protect the per cpu buffer, begin the rcu read side */
132 local_irq_save(*irq_flags);
133
134 *rctxp = perf_swevent_get_recursion_context();
135 if (*rctxp < 0)
136 goto err_recursion;
137
138 cpu = smp_processor_id();
139
140 if (in_nmi())
141 trace_buf = rcu_dereference(perf_trace_buf_nmi);
142 else
143 trace_buf = rcu_dereference(perf_trace_buf);
144
145 if (!trace_buf)
146 goto err;
147
148 raw_data = per_cpu_ptr(trace_buf, cpu);
149
150 /* zero the dead bytes from align to not leak stack to user */
151 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
152
153 entry = (struct trace_entry *)raw_data;
154 tracing_generic_entry_update(entry, *irq_flags, pc);
155 entry->type = type;
156
157 return raw_data;
158err:
159 perf_swevent_put_recursion_context(*rctxp);
160err_recursion:
161 local_irq_restore(*irq_flags);
162 return NULL;
163}
164EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315dc836..3f972ad98d04 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -60,10 +60,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
60 return 0; 60 return 0;
61 61
62err: 62err:
63 if (field) { 63 if (field)
64 kfree(field->name); 64 kfree(field->name);
65 kfree(field->type);
66 }
67 kfree(field); 65 kfree(field);
68 66
69 return -ENOMEM; 67 return -ENOMEM;
@@ -78,7 +76,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
78 if (ret) \ 76 if (ret) \
79 return ret; 77 return ret;
80 78
81int trace_define_common_fields(struct ftrace_event_call *call) 79static int trace_define_common_fields(struct ftrace_event_call *call)
82{ 80{
83 int ret; 81 int ret;
84 struct trace_entry ent; 82 struct trace_entry ent;
@@ -91,7 +89,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
91 89
92 return ret; 90 return ret;
93} 91}
94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 92
96void trace_destroy_fields(struct ftrace_event_call *call) 93void trace_destroy_fields(struct ftrace_event_call *call)
97{ 94{
@@ -105,9 +102,25 @@ void trace_destroy_fields(struct ftrace_event_call *call)
105 } 102 }
106} 103}
107 104
108static void ftrace_event_enable_disable(struct ftrace_event_call *call, 105int trace_event_raw_init(struct ftrace_event_call *call)
106{
107 int id;
108
109 id = register_ftrace_event(call->event);
110 if (!id)
111 return -ENODEV;
112 call->id = id;
113 INIT_LIST_HEAD(&call->fields);
114
115 return 0;
116}
117EXPORT_SYMBOL_GPL(trace_event_raw_init);
118
119static int ftrace_event_enable_disable(struct ftrace_event_call *call,
109 int enable) 120 int enable)
110{ 121{
122 int ret = 0;
123
111 switch (enable) { 124 switch (enable) {
112 case 0: 125 case 0:
113 if (call->enabled) { 126 if (call->enabled) {
@@ -118,12 +131,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
118 break; 131 break;
119 case 1: 132 case 1:
120 if (!call->enabled) { 133 if (!call->enabled) {
121 call->enabled = 1;
122 tracing_start_cmdline_record(); 134 tracing_start_cmdline_record();
123 call->regfunc(call); 135 ret = call->regfunc(call);
136 if (ret) {
137 tracing_stop_cmdline_record();
138 pr_info("event trace: Could not enable event "
139 "%s\n", call->name);
140 break;
141 }
142 call->enabled = 1;
124 } 143 }
125 break; 144 break;
126 } 145 }
146
147 return ret;
127} 148}
128 149
129static void ftrace_clear_events(void) 150static void ftrace_clear_events(void)
@@ -402,7 +423,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
402 case 0: 423 case 0:
403 case 1: 424 case 1:
404 mutex_lock(&event_mutex); 425 mutex_lock(&event_mutex);
405 ftrace_event_enable_disable(call, val); 426 ret = ftrace_event_enable_disable(call, val);
406 mutex_unlock(&event_mutex); 427 mutex_unlock(&event_mutex);
407 break; 428 break;
408 429
@@ -412,7 +433,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
412 433
413 *ppos += cnt; 434 *ppos += cnt;
414 435
415 return cnt; 436 return ret ? ret : cnt;
416} 437}
417 438
418static ssize_t 439static ssize_t
@@ -497,41 +518,16 @@ out:
497 return ret; 518 return ret;
498} 519}
499 520
500extern char *__bad_type_size(void);
501
502#undef FIELD
503#define FIELD(type, name) \
504 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
505 #type, "common_" #name, offsetof(typeof(field), name), \
506 sizeof(field.name), is_signed_type(type)
507
508static int trace_write_header(struct trace_seq *s)
509{
510 struct trace_entry field;
511
512 /* struct trace_entry */
513 return trace_seq_printf(s,
514 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
515 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
516 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
517 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
519 "\n",
520 FIELD(unsigned short, type),
521 FIELD(unsigned char, flags),
522 FIELD(unsigned char, preempt_count),
523 FIELD(int, pid),
524 FIELD(int, lock_depth));
525}
526
527static ssize_t 521static ssize_t
528event_format_read(struct file *filp, char __user *ubuf, size_t cnt, 522event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
529 loff_t *ppos) 523 loff_t *ppos)
530{ 524{
531 struct ftrace_event_call *call = filp->private_data; 525 struct ftrace_event_call *call = filp->private_data;
526 struct ftrace_event_field *field;
532 struct trace_seq *s; 527 struct trace_seq *s;
528 int common_field_count = 5;
533 char *buf; 529 char *buf;
534 int r; 530 int r = 0;
535 531
536 if (*ppos) 532 if (*ppos)
537 return 0; 533 return 0;
@@ -542,14 +538,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
542 538
543 trace_seq_init(s); 539 trace_seq_init(s);
544 540
545 /* If any of the first writes fail, so will the show_format. */
546
547 trace_seq_printf(s, "name: %s\n", call->name); 541 trace_seq_printf(s, "name: %s\n", call->name);
548 trace_seq_printf(s, "ID: %d\n", call->id); 542 trace_seq_printf(s, "ID: %d\n", call->id);
549 trace_seq_printf(s, "format:\n"); 543 trace_seq_printf(s, "format:\n");
550 trace_write_header(s);
551 544
552 r = call->show_format(call, s); 545 list_for_each_entry_reverse(field, &call->fields, link) {
546 /*
547 * Smartly shows the array type(except dynamic array).
548 * Normal:
549 * field:TYPE VAR
550 * If TYPE := TYPE[LEN], it is shown:
551 * field:TYPE VAR[LEN]
552 */
553 const char *array_descriptor = strchr(field->type, '[');
554
555 if (!strncmp(field->type, "__data_loc", 10))
556 array_descriptor = NULL;
557
558 if (!array_descriptor) {
559 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
560 "\tsize:%u;\tsigned:%d;\n",
561 field->type, field->name, field->offset,
562 field->size, !!field->is_signed);
563 } else {
564 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
565 "\tsize:%u;\tsigned:%d;\n",
566 (int)(array_descriptor - field->type),
567 field->type, field->name,
568 array_descriptor, field->offset,
569 field->size, !!field->is_signed);
570 }
571
572 if (--common_field_count == 0)
573 r = trace_seq_printf(s, "\n");
574
575 if (!r)
576 break;
577 }
578
579 if (r)
580 r = trace_seq_printf(s, "\nprint fmt: %s\n",
581 call->print_fmt);
582
553 if (!r) { 583 if (!r) {
554 /* 584 /*
555 * ug! The format output is bigger than a PAGE!! 585 * ug! The format output is bigger than a PAGE!!
@@ -913,7 +943,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
913 id); 943 id);
914 944
915 if (call->define_fields) { 945 if (call->define_fields) {
916 ret = call->define_fields(call); 946 ret = trace_define_common_fields(call);
947 if (!ret)
948 ret = call->define_fields(call);
917 if (ret < 0) { 949 if (ret < 0) {
918 pr_warning("Could not initialize trace point" 950 pr_warning("Could not initialize trace point"
919 " events/%s\n", call->name); 951 " events/%s\n", call->name);
@@ -923,10 +955,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
923 filter); 955 filter);
924 } 956 }
925 957
926 /* A trace may not want to export its format */
927 if (!call->show_format)
928 return 0;
929
930 trace_create_file("format", 0444, call->dir, call, 958 trace_create_file("format", 0444, call->dir, call,
931 format); 959 format);
932 960
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..4615f62a04f1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
211{ 211{
212 char **addr = (char **)(event + pred->offset); 212 char **addr = (char **)(event + pred->offset);
213 int cmp, match; 213 int cmp, match;
214 int len = strlen(*addr) + 1; /* including tailing '\0' */
214 215
215 cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); 216 cmp = pred->regex.match(*addr, &pred->regex, len);
216 217
217 match = cmp ^ pred->not; 218 match = cmp ^ pred->not;
218 219
@@ -251,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
251 return 0; 252 return 0;
252} 253}
253 254
254/* Basic regex callbacks */ 255/*
256 * regex_match_foo - Basic regex callbacks
257 *
258 * @str: the string to be searched
259 * @r: the regex structure containing the pattern string
260 * @len: the length of the string to be searched (including '\0')
261 *
262 * Note:
263 * - @str might not be NULL-terminated if it's of type DYN_STRING
264 * or STATIC_STRING
265 */
266
255static int regex_match_full(char *str, struct regex *r, int len) 267static int regex_match_full(char *str, struct regex *r, int len)
256{ 268{
257 if (strncmp(str, r->pattern, len) == 0) 269 if (strncmp(str, r->pattern, len) == 0)
@@ -261,23 +273,24 @@ static int regex_match_full(char *str, struct regex *r, int len)
261 273
262static int regex_match_front(char *str, struct regex *r, int len) 274static int regex_match_front(char *str, struct regex *r, int len)
263{ 275{
264 if (strncmp(str, r->pattern, len) == 0) 276 if (strncmp(str, r->pattern, r->len) == 0)
265 return 1; 277 return 1;
266 return 0; 278 return 0;
267} 279}
268 280
269static int regex_match_middle(char *str, struct regex *r, int len) 281static int regex_match_middle(char *str, struct regex *r, int len)
270{ 282{
271 if (strstr(str, r->pattern)) 283 if (strnstr(str, r->pattern, len))
272 return 1; 284 return 1;
273 return 0; 285 return 0;
274} 286}
275 287
276static int regex_match_end(char *str, struct regex *r, int len) 288static int regex_match_end(char *str, struct regex *r, int len)
277{ 289{
278 char *ptr = strstr(str, r->pattern); 290 int strlen = len - 1;
279 291
280 if (ptr && (ptr[r->len] == 0)) 292 if (strlen >= r->len &&
293 memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
281 return 1; 294 return 1;
282 return 0; 295 return 0;
283} 296}
@@ -781,10 +794,8 @@ static int filter_add_pred(struct filter_parse_state *ps,
781 pred->regex.field_len = field->size; 794 pred->regex.field_len = field->size;
782 } else if (field->filter_type == FILTER_DYN_STRING) 795 } else if (field->filter_type == FILTER_DYN_STRING)
783 fn = filter_pred_strloc; 796 fn = filter_pred_strloc;
784 else { 797 else
785 fn = filter_pred_pchar; 798 fn = filter_pred_pchar;
786 pred->regex.field_len = strlen(pred->regex.pattern);
787 }
788 } else { 799 } else {
789 if (field->is_signed) 800 if (field->is_signed)
790 ret = strict_strtoll(pred->regex.pattern, 0, &val); 801 ret = strict_strtoll(pred->regex.pattern, 0, &val);
@@ -1360,7 +1371,7 @@ out_unlock:
1360 return err; 1371 return err;
1361} 1372}
1362 1373
1363#ifdef CONFIG_EVENT_PROFILE 1374#ifdef CONFIG_PERF_EVENTS
1364 1375
1365void ftrace_profile_free_filter(struct perf_event *event) 1376void ftrace_profile_free_filter(struct perf_event *event)
1366{ 1377{
@@ -1428,5 +1439,5 @@ out_unlock:
1428 return err; 1439 return err;
1429} 1440}
1430 1441
1431#endif /* CONFIG_EVENT_PROFILE */ 1442#endif /* CONFIG_PERF_EVENTS */
1432 1443
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84ddf17..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \
62 62
63#include "trace_entries.h" 63#include "trace_entries.h"
64 64
65
66#undef __field
67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \
71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \
73 return 0;
74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item), \
81 is_signed_type(type)); \
82 if (!ret) \
83 return 0;
84
85#undef __array
86#define __array(type, item, len) \
87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
89 offsetof(typeof(field), item), \
90 sizeof(field.item), is_signed_type(type)); \
91 if (!ret) \
92 return 0;
93
94#undef __array_desc
95#define __array_desc(type, container, item, len) \
96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
98 offsetof(typeof(field), container.item), \
99 sizeof(field.container.item), \
100 is_signed_type(type)); \
101 if (!ret) \
102 return 0;
103
104#undef __dynamic_array
105#define __dynamic_array(type, item) \
106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
110 if (!ret) \
111 return 0;
112
113#undef F_printk
114#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
115
116#undef __entry
117#define __entry REC
118
119#undef FTRACE_ENTRY
120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
121static int \
122ftrace_format_##name(struct ftrace_event_call *unused, \
123 struct trace_seq *s) \
124{ \
125 struct struct_name field __attribute__((unused)); \
126 int ret = 0; \
127 \
128 tstruct; \
129 \
130 trace_seq_printf(s, "\nprint fmt: " print); \
131 \
132 return ret; \
133}
134
135#include "trace_entries.h"
136
137#undef __field 65#undef __field
138#define __field(type, item) \ 66#define __field(type, item) \
139 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
@@ -158,7 +86,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
158 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 86 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
159 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 87 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
160 offsetof(typeof(field), item), \ 88 offsetof(typeof(field), item), \
161 sizeof(field.item), 0, FILTER_OTHER); \ 89 sizeof(field.item), \
90 is_signed_type(type), FILTER_OTHER); \
162 if (ret) \ 91 if (ret) \
163 return ret; 92 return ret;
164 93
@@ -168,13 +97,18 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
168 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 97 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
169 offsetof(typeof(field), \ 98 offsetof(typeof(field), \
170 container.item), \ 99 container.item), \
171 sizeof(field.container.item), 0, \ 100 sizeof(field.container.item), \
172 FILTER_OTHER); \ 101 is_signed_type(type), FILTER_OTHER); \
173 if (ret) \ 102 if (ret) \
174 return ret; 103 return ret;
175 104
176#undef __dynamic_array 105#undef __dynamic_array
177#define __dynamic_array(type, item) 106#define __dynamic_array(type, item) \
107 ret = trace_define_field(event_call, #type, #item, \
108 offsetof(typeof(field), item), \
109 0, is_signed_type(type), FILTER_OTHER);\
110 if (ret) \
111 return ret;
178 112
179#undef FTRACE_ENTRY 113#undef FTRACE_ENTRY
180#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 114#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -184,10 +118,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
184 struct struct_name field; \ 118 struct struct_name field; \
185 int ret; \ 119 int ret; \
186 \ 120 \
187 ret = trace_define_common_fields(event_call); \
188 if (ret) \
189 return ret; \
190 \
191 tstruct; \ 121 tstruct; \
192 \ 122 \
193 return ret; \ 123 return ret; \
@@ -201,6 +131,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
201 return 0; 131 return 0;
202} 132}
203 133
134#undef __entry
135#define __entry REC
136
204#undef __field 137#undef __field
205#define __field(type, item) 138#define __field(type, item)
206 139
@@ -216,6 +149,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
216#undef __dynamic_array 149#undef __dynamic_array
217#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
218 151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
219#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
220#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
221 \ 157 \
@@ -226,7 +162,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
226 .id = type, \ 162 .id = type, \
227 .system = __stringify(TRACE_SYSTEM), \ 163 .system = __stringify(TRACE_SYSTEM), \
228 .raw_init = ftrace_raw_init_event, \ 164 .raw_init = ftrace_raw_init_event, \
229 .show_format = ftrace_format_##call, \ 165 .print_fmt = print, \
230 .define_fields = ftrace_define_fields_##call, \ 166 .define_fields = ftrace_define_fields_##call, \
231}; \ 167}; \
232 168
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01b2e4d..e998a824e9db 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,21 @@
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h" 15#include "trace_output.h"
16 16
17struct fgraph_data { 17struct fgraph_cpu_data {
18 pid_t last_pid; 18 pid_t last_pid;
19 int depth; 19 int depth;
20 int ignore;
21 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
22};
23
24struct fgraph_data {
25 struct fgraph_cpu_data *cpu_data;
26
27 /* Place to preserve last processed entry. */
28 struct ftrace_graph_ent_entry ent;
29 struct ftrace_graph_ret_entry ret;
30 int failed;
31 int cpu;
20}; 32};
21 33
22#define TRACE_GRAPH_INDENT 2 34#define TRACE_GRAPH_INDENT 2
@@ -176,7 +188,7 @@ static int __trace_graph_entry(struct trace_array *tr,
176 struct ring_buffer *buffer = tr->buffer; 188 struct ring_buffer *buffer = tr->buffer;
177 struct ftrace_graph_ent_entry *entry; 189 struct ftrace_graph_ent_entry *entry;
178 190
179 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 191 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
180 return 0; 192 return 0;
181 193
182 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 194 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -201,13 +213,11 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
201 int cpu; 213 int cpu;
202 int pc; 214 int pc;
203 215
204 if (unlikely(!tr))
205 return 0;
206
207 if (!ftrace_trace_task(current)) 216 if (!ftrace_trace_task(current))
208 return 0; 217 return 0;
209 218
210 if (!ftrace_graph_addr(trace->func)) 219 /* trace it when it is-nested-in or is a function enabled. */
220 if (!(trace->depth || ftrace_graph_addr(trace->func)))
211 return 0; 221 return 0;
212 222
213 local_irq_save(flags); 223 local_irq_save(flags);
@@ -220,9 +230,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
220 } else { 230 } else {
221 ret = 0; 231 ret = 0;
222 } 232 }
223 /* Only do the atomic if it is not already set */
224 if (!test_tsk_trace_graph(current))
225 set_tsk_trace_graph(current);
226 233
227 atomic_dec(&data->disabled); 234 atomic_dec(&data->disabled);
228 local_irq_restore(flags); 235 local_irq_restore(flags);
@@ -240,7 +247,7 @@ static void __trace_graph_return(struct trace_array *tr,
240 struct ring_buffer *buffer = tr->buffer; 247 struct ring_buffer *buffer = tr->buffer;
241 struct ftrace_graph_ret_entry *entry; 248 struct ftrace_graph_ret_entry *entry;
242 249
243 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 250 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
244 return; 251 return;
245 252
246 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 253 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -270,17 +277,24 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
270 pc = preempt_count(); 277 pc = preempt_count();
271 __trace_graph_return(tr, trace, flags, pc); 278 __trace_graph_return(tr, trace, flags, pc);
272 } 279 }
273 if (!trace->depth)
274 clear_tsk_trace_graph(current);
275 atomic_dec(&data->disabled); 280 atomic_dec(&data->disabled);
276 local_irq_restore(flags); 281 local_irq_restore(flags);
277} 282}
278 283
284void set_graph_array(struct trace_array *tr)
285{
286 graph_array = tr;
287
288 /* Make graph_array visible before we start tracing */
289
290 smp_mb();
291}
292
279static int graph_trace_init(struct trace_array *tr) 293static int graph_trace_init(struct trace_array *tr)
280{ 294{
281 int ret; 295 int ret;
282 296
283 graph_array = tr; 297 set_graph_array(tr);
284 ret = register_ftrace_graph(&trace_graph_return, 298 ret = register_ftrace_graph(&trace_graph_return,
285 &trace_graph_entry); 299 &trace_graph_entry);
286 if (ret) 300 if (ret)
@@ -290,11 +304,6 @@ static int graph_trace_init(struct trace_array *tr)
290 return 0; 304 return 0;
291} 305}
292 306
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296}
297
298static void graph_trace_reset(struct trace_array *tr) 307static void graph_trace_reset(struct trace_array *tr)
299{ 308{
300 tracing_stop_cmdline_record(); 309 tracing_stop_cmdline_record();
@@ -384,7 +393,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
384 if (!data) 393 if (!data)
385 return TRACE_TYPE_HANDLED; 394 return TRACE_TYPE_HANDLED;
386 395
387 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 396 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
388 397
389 if (*last_pid == pid) 398 if (*last_pid == pid)
390 return TRACE_TYPE_HANDLED; 399 return TRACE_TYPE_HANDLED;
@@ -435,26 +444,49 @@ static struct ftrace_graph_ret_entry *
435get_return_for_leaf(struct trace_iterator *iter, 444get_return_for_leaf(struct trace_iterator *iter,
436 struct ftrace_graph_ent_entry *curr) 445 struct ftrace_graph_ent_entry *curr)
437{ 446{
438 struct ring_buffer_iter *ring_iter; 447 struct fgraph_data *data = iter->private;
448 struct ring_buffer_iter *ring_iter = NULL;
439 struct ring_buffer_event *event; 449 struct ring_buffer_event *event;
440 struct ftrace_graph_ret_entry *next; 450 struct ftrace_graph_ret_entry *next;
441 451
442 ring_iter = iter->buffer_iter[iter->cpu]; 452 /*
453 * If the previous output failed to write to the seq buffer,
454 * then we just reuse the data from before.
455 */
456 if (data && data->failed) {
457 curr = &data->ent;
458 next = &data->ret;
459 } else {
443 460
444 /* First peek to compare current entry and the next one */ 461 ring_iter = iter->buffer_iter[iter->cpu];
445 if (ring_iter) 462
446 event = ring_buffer_iter_peek(ring_iter, NULL); 463 /* First peek to compare current entry and the next one */
447 else { 464 if (ring_iter)
448 /* We need to consume the current entry to see the next one */ 465 event = ring_buffer_iter_peek(ring_iter, NULL);
449 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 466 else {
450 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 467 /*
451 NULL); 468 * We need to consume the current entry to see
452 } 469 * the next one.
470 */
471 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
472 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
473 NULL);
474 }
453 475
454 if (!event) 476 if (!event)
455 return NULL; 477 return NULL;
456 478
457 next = ring_buffer_event_data(event); 479 next = ring_buffer_event_data(event);
480
481 if (data) {
482 /*
483 * Save current and next entries for later reference
484 * if the output fails.
485 */
486 data->ent = *curr;
487 data->ret = *next;
488 }
489 }
458 490
459 if (next->ent.type != TRACE_GRAPH_RET) 491 if (next->ent.type != TRACE_GRAPH_RET)
460 return NULL; 492 return NULL;
@@ -639,15 +671,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
639 duration = graph_ret->rettime - graph_ret->calltime; 671 duration = graph_ret->rettime - graph_ret->calltime;
640 672
641 if (data) { 673 if (data) {
674 struct fgraph_cpu_data *cpu_data;
642 int cpu = iter->cpu; 675 int cpu = iter->cpu;
643 int *depth = &(per_cpu_ptr(data, cpu)->depth); 676
677 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
644 678
645 /* 679 /*
646 * Comments display at + 1 to depth. Since 680 * Comments display at + 1 to depth. Since
647 * this is a leaf function, keep the comments 681 * this is a leaf function, keep the comments
648 * equal to this depth. 682 * equal to this depth.
649 */ 683 */
650 *depth = call->depth - 1; 684 cpu_data->depth = call->depth - 1;
685
686 /* No need to keep this function around for this depth */
687 if (call->depth < FTRACE_RETFUNC_DEPTH)
688 cpu_data->enter_funcs[call->depth] = 0;
651 } 689 }
652 690
653 /* Overhead */ 691 /* Overhead */
@@ -687,10 +725,15 @@ print_graph_entry_nested(struct trace_iterator *iter,
687 int i; 725 int i;
688 726
689 if (data) { 727 if (data) {
728 struct fgraph_cpu_data *cpu_data;
690 int cpu = iter->cpu; 729 int cpu = iter->cpu;
691 int *depth = &(per_cpu_ptr(data, cpu)->depth);
692 730
693 *depth = call->depth; 731 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
732 cpu_data->depth = call->depth;
733
734 /* Save this function pointer to see if the exit matches */
735 if (call->depth < FTRACE_RETFUNC_DEPTH)
736 cpu_data->enter_funcs[call->depth] = call->func;
694 } 737 }
695 738
696 /* No overhead */ 739 /* No overhead */
@@ -782,19 +825,34 @@ static enum print_line_t
782print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 825print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
783 struct trace_iterator *iter) 826 struct trace_iterator *iter)
784{ 827{
785 int cpu = iter->cpu; 828 struct fgraph_data *data = iter->private;
786 struct ftrace_graph_ent *call = &field->graph_ent; 829 struct ftrace_graph_ent *call = &field->graph_ent;
787 struct ftrace_graph_ret_entry *leaf_ret; 830 struct ftrace_graph_ret_entry *leaf_ret;
831 static enum print_line_t ret;
832 int cpu = iter->cpu;
788 833
789 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 834 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
790 return TRACE_TYPE_PARTIAL_LINE; 835 return TRACE_TYPE_PARTIAL_LINE;
791 836
792 leaf_ret = get_return_for_leaf(iter, field); 837 leaf_ret = get_return_for_leaf(iter, field);
793 if (leaf_ret) 838 if (leaf_ret)
794 return print_graph_entry_leaf(iter, field, leaf_ret, s); 839 ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
795 else 840 else
796 return print_graph_entry_nested(iter, field, s, cpu); 841 ret = print_graph_entry_nested(iter, field, s, cpu);
842
843 if (data) {
844 /*
845 * If we failed to write our output, then we need to make
846 * note of it. Because we already consumed our entry.
847 */
848 if (s->full) {
849 data->failed = 1;
850 data->cpu = cpu;
851 } else
852 data->failed = 0;
853 }
797 854
855 return ret;
798} 856}
799 857
800static enum print_line_t 858static enum print_line_t
@@ -805,19 +863,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
805 struct fgraph_data *data = iter->private; 863 struct fgraph_data *data = iter->private;
806 pid_t pid = ent->pid; 864 pid_t pid = ent->pid;
807 int cpu = iter->cpu; 865 int cpu = iter->cpu;
866 int func_match = 1;
808 int ret; 867 int ret;
809 int i; 868 int i;
810 869
811 if (data) { 870 if (data) {
871 struct fgraph_cpu_data *cpu_data;
812 int cpu = iter->cpu; 872 int cpu = iter->cpu;
813 int *depth = &(per_cpu_ptr(data, cpu)->depth); 873
874 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
814 875
815 /* 876 /*
816 * Comments display at + 1 to depth. This is the 877 * Comments display at + 1 to depth. This is the
817 * return from a function, we now want the comments 878 * return from a function, we now want the comments
818 * to display at the same level of the bracket. 879 * to display at the same level of the bracket.
819 */ 880 */
820 *depth = trace->depth - 1; 881 cpu_data->depth = trace->depth - 1;
882
883 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
884 if (cpu_data->enter_funcs[trace->depth] != trace->func)
885 func_match = 0;
886 cpu_data->enter_funcs[trace->depth] = 0;
887 }
821 } 888 }
822 889
823 if (print_graph_prologue(iter, s, 0, 0)) 890 if (print_graph_prologue(iter, s, 0, 0))
@@ -842,9 +909,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
842 return TRACE_TYPE_PARTIAL_LINE; 909 return TRACE_TYPE_PARTIAL_LINE;
843 } 910 }
844 911
845 ret = trace_seq_printf(s, "}\n"); 912 /*
846 if (!ret) 913 * If the return function does not have a matching entry,
847 return TRACE_TYPE_PARTIAL_LINE; 914 * then the entry was lost. Instead of just printing
915 * the '}' and letting the user guess what function this
916 * belongs to, write out the function name.
917 */
918 if (func_match) {
919 ret = trace_seq_printf(s, "}\n");
920 if (!ret)
921 return TRACE_TYPE_PARTIAL_LINE;
922 } else {
923 ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
924 if (!ret)
925 return TRACE_TYPE_PARTIAL_LINE;
926 }
848 927
849 /* Overrun */ 928 /* Overrun */
850 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 929 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
@@ -873,7 +952,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
873 int i; 952 int i;
874 953
875 if (data) 954 if (data)
876 depth = per_cpu_ptr(data, iter->cpu)->depth; 955 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
877 956
878 if (print_graph_prologue(iter, s, 0, 0)) 957 if (print_graph_prologue(iter, s, 0, 0))
879 return TRACE_TYPE_PARTIAL_LINE; 958 return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +1020,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
941enum print_line_t 1020enum print_line_t
942print_graph_function(struct trace_iterator *iter) 1021print_graph_function(struct trace_iterator *iter)
943{ 1022{
1023 struct ftrace_graph_ent_entry *field;
1024 struct fgraph_data *data = iter->private;
944 struct trace_entry *entry = iter->ent; 1025 struct trace_entry *entry = iter->ent;
945 struct trace_seq *s = &iter->seq; 1026 struct trace_seq *s = &iter->seq;
1027 int cpu = iter->cpu;
1028 int ret;
1029
1030 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1031 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1032 return TRACE_TYPE_HANDLED;
1033 }
1034
1035 /*
1036 * If the last output failed, there's a possibility we need
1037 * to print out the missing entry which would never go out.
1038 */
1039 if (data && data->failed) {
1040 field = &data->ent;
1041 iter->cpu = data->cpu;
1042 ret = print_graph_entry(field, s, iter);
1043 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1044 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1045 ret = TRACE_TYPE_NO_CONSUME;
1046 }
1047 iter->cpu = cpu;
1048 return ret;
1049 }
946 1050
947 switch (entry->type) { 1051 switch (entry->type) {
948 case TRACE_GRAPH_ENT: { 1052 case TRACE_GRAPH_ENT: {
@@ -952,7 +1056,7 @@ print_graph_function(struct trace_iterator *iter)
952 * sizeof(struct ftrace_graph_ent_entry) is very small, 1056 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack. 1057 * it can be safely saved at the stack.
954 */ 1058 */
955 struct ftrace_graph_ent_entry *field, saved; 1059 struct ftrace_graph_ent_entry saved;
956 trace_assign_type(field, entry); 1060 trace_assign_type(field, entry);
957 saved = *field; 1061 saved = *field;
958 return print_graph_entry(&saved, s, iter); 1062 return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1134,54 @@ static void print_graph_headers(struct seq_file *s)
1030static void graph_trace_open(struct trace_iterator *iter) 1134static void graph_trace_open(struct trace_iterator *iter)
1031{ 1135{
1032 /* pid and depth on the last trace processed */ 1136 /* pid and depth on the last trace processed */
1033 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1137 struct fgraph_data *data;
1034 int cpu; 1138 int cpu;
1035 1139
1140 iter->private = NULL;
1141
1142 data = kzalloc(sizeof(*data), GFP_KERNEL);
1036 if (!data) 1143 if (!data)
1037 pr_warning("function graph tracer: not enough memory\n"); 1144 goto out_err;
1038 else 1145
1039 for_each_possible_cpu(cpu) { 1146 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
1040 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1147 if (!data->cpu_data)
1041 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1148 goto out_err_free;
1042 *pid = -1; 1149
1043 *depth = 0; 1150 for_each_possible_cpu(cpu) {
1044 } 1151 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1152 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1153 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1154 *pid = -1;
1155 *depth = 0;
1156 *ignore = 0;
1157 }
1045 1158
1046 iter->private = data; 1159 iter->private = data;
1160
1161 return;
1162
1163 out_err_free:
1164 kfree(data);
1165 out_err:
1166 pr_warning("function graph tracer: not enough memory\n");
1047} 1167}
1048 1168
1049static void graph_trace_close(struct trace_iterator *iter) 1169static void graph_trace_close(struct trace_iterator *iter)
1050{ 1170{
1051 free_percpu(iter->private); 1171 struct fgraph_data *data = iter->private;
1172
1173 if (data) {
1174 free_percpu(data->cpu_data);
1175 kfree(data);
1176 }
1052} 1177}
1053 1178
1054static struct tracer graph_trace __read_mostly = { 1179static struct tracer graph_trace __read_mostly = {
1055 .name = "function_graph", 1180 .name = "function_graph",
1056 .open = graph_trace_open, 1181 .open = graph_trace_open,
1182 .pipe_open = graph_trace_open,
1057 .close = graph_trace_close, 1183 .close = graph_trace_close,
1184 .pipe_close = graph_trace_close,
1058 .wait_pipe = poll_wait_pipe, 1185 .wait_pipe = poll_wait_pipe,
1059 .init = graph_trace_init, 1186 .init = graph_trace_init,
1060 .reset = graph_trace_reset, 1187 .reset = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 69543a905cd5..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -20,10 +20,10 @@
20 20
21#define BTS_BUFFER_SIZE (1 << 13) 21#define BTS_BUFFER_SIZE (1 << 13)
22 22
23static DEFINE_PER_CPU(struct bts_tracer *, tracer); 23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); 24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25 25
26#define this_tracer per_cpu(tracer, smp_processor_id()) 26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27 27
28static int trace_hw_branches_enabled __read_mostly; 28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly; 29static int trace_hw_branches_suspended __read_mostly;
@@ -32,12 +32,13 @@ static struct trace_array *hw_branch_trace __read_mostly;
32 32
33static void bts_trace_init_cpu(int cpu) 33static void bts_trace_init_cpu(int cpu)
34{ 34{
35 per_cpu(tracer, cpu) = 35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, 36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 NULL, (size_t)-1, BTS_KERNEL); 37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
38 39
39 if (IS_ERR(per_cpu(tracer, cpu))) 40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
40 per_cpu(tracer, cpu) = NULL; 41 per_cpu(hwb_tracer, cpu) = NULL;
41} 42}
42 43
43static int bts_trace_init(struct trace_array *tr) 44static int bts_trace_init(struct trace_array *tr)
@@ -51,7 +52,7 @@ static int bts_trace_init(struct trace_array *tr)
51 for_each_online_cpu(cpu) { 52 for_each_online_cpu(cpu) {
52 bts_trace_init_cpu(cpu); 53 bts_trace_init_cpu(cpu);
53 54
54 if (likely(per_cpu(tracer, cpu))) 55 if (likely(per_cpu(hwb_tracer, cpu)))
55 trace_hw_branches_enabled = 1; 56 trace_hw_branches_enabled = 1;
56 } 57 }
57 trace_hw_branches_suspended = 0; 58 trace_hw_branches_suspended = 0;
@@ -67,9 +68,9 @@ static void bts_trace_reset(struct trace_array *tr)
67 68
68 get_online_cpus(); 69 get_online_cpus();
69 for_each_online_cpu(cpu) { 70 for_each_online_cpu(cpu) {
70 if (likely(per_cpu(tracer, cpu))) { 71 if (likely(per_cpu(hwb_tracer, cpu))) {
71 ds_release_bts(per_cpu(tracer, cpu)); 72 ds_release_bts(per_cpu(hwb_tracer, cpu));
72 per_cpu(tracer, cpu) = NULL; 73 per_cpu(hwb_tracer, cpu) = NULL;
73 } 74 }
74 } 75 }
75 trace_hw_branches_enabled = 0; 76 trace_hw_branches_enabled = 0;
@@ -83,8 +84,8 @@ static void bts_trace_start(struct trace_array *tr)
83 84
84 get_online_cpus(); 85 get_online_cpus();
85 for_each_online_cpu(cpu) 86 for_each_online_cpu(cpu)
86 if (likely(per_cpu(tracer, cpu))) 87 if (likely(per_cpu(hwb_tracer, cpu)))
87 ds_resume_bts(per_cpu(tracer, cpu)); 88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
88 trace_hw_branches_suspended = 0; 89 trace_hw_branches_suspended = 0;
89 put_online_cpus(); 90 put_online_cpus();
90} 91}
@@ -95,8 +96,8 @@ static void bts_trace_stop(struct trace_array *tr)
95 96
96 get_online_cpus(); 97 get_online_cpus();
97 for_each_online_cpu(cpu) 98 for_each_online_cpu(cpu)
98 if (likely(per_cpu(tracer, cpu))) 99 if (likely(per_cpu(hwb_tracer, cpu)))
99 ds_suspend_bts(per_cpu(tracer, cpu)); 100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
100 trace_hw_branches_suspended = 1; 101 trace_hw_branches_suspended = 1;
101 put_online_cpus(); 102 put_online_cpus();
102} 103}
@@ -114,16 +115,16 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
114 bts_trace_init_cpu(cpu); 115 bts_trace_init_cpu(cpu);
115 116
116 if (trace_hw_branches_suspended && 117 if (trace_hw_branches_suspended &&
117 likely(per_cpu(tracer, cpu))) 118 likely(per_cpu(hwb_tracer, cpu)))
118 ds_suspend_bts(per_cpu(tracer, cpu)); 119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
119 } 120 }
120 break; 121 break;
121 122
122 case CPU_DOWN_PREPARE: 123 case CPU_DOWN_PREPARE:
123 /* The notification is sent with interrupts enabled. */ 124 /* The notification is sent with interrupts enabled. */
124 if (likely(per_cpu(tracer, cpu))) { 125 if (likely(per_cpu(hwb_tracer, cpu))) {
125 ds_release_bts(per_cpu(tracer, cpu)); 126 ds_release_bts(per_cpu(hwb_tracer, cpu));
126 per_cpu(tracer, cpu) = NULL; 127 per_cpu(hwb_tracer, cpu) = NULL;
127 } 128 }
128 } 129 }
129 130
@@ -258,8 +259,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
258 259
259 get_online_cpus(); 260 get_online_cpus();
260 for_each_online_cpu(cpu) 261 for_each_online_cpu(cpu)
261 if (likely(per_cpu(tracer, cpu))) 262 if (likely(per_cpu(hwb_tracer, cpu)))
262 ds_suspend_bts(per_cpu(tracer, cpu)); 263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
263 /* 264 /*
264 * We need to collect the trace on the respective cpu since ftrace 265 * We need to collect the trace on the respective cpu since ftrace
265 * implicitly adds the record for the current cpu. 266 * implicitly adds the record for the current cpu.
@@ -268,8 +269,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
268 on_each_cpu(trace_bts_cpu, iter->tr, 1); 269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
269 270
270 for_each_online_cpu(cpu) 271 for_each_online_cpu(cpu)
271 if (likely(per_cpu(tracer, cpu))) 272 if (likely(per_cpu(hwb_tracer, cpu)))
272 ds_resume_bts(per_cpu(tracer, cpu)); 273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
273 put_online_cpus(); 274 put_online_cpus();
274} 275}
275 276
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
151 goto out_unlock; 151 goto out_unlock;
152 152
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc);
154 156
155 if (data->critical_sequence != max_sequence) 157 if (data->critical_sequence != max_sequence)
156 goto out_unlock; 158 goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..505c92273b1a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
91 return retval; 91 return retval;
92} 92}
93 93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, 94static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy) 95 void *dummy)
101{ 96{
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{ 226{
232 int ret = -EINVAL; 227 int ret = -EINVAL;
233 228
234 if (ff->func == fetch_argument) 229 if (ff->func == fetch_register) {
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name; 230 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data)); 231 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name); 232 ret = snprintf(buf, n, "%%%s", name);
@@ -282,6 +275,18 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
282static int kretprobe_dispatcher(struct kretprobe_instance *ri, 275static int kretprobe_dispatcher(struct kretprobe_instance *ri,
283 struct pt_regs *regs); 276 struct pt_regs *regs);
284 277
278/* Check the name is good for event/group */
279static int check_event_name(const char *name)
280{
281 if (!isalpha(*name) && *name != '_')
282 return 0;
283 while (*++name != '\0') {
284 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
285 return 0;
286 }
287 return 1;
288}
289
285/* 290/*
286 * Allocate new trace_probe and initialize it (including kprobes). 291 * Allocate new trace_probe and initialize it (including kprobes).
287 */ 292 */
@@ -293,10 +298,11 @@ static struct trace_probe *alloc_trace_probe(const char *group,
293 int nargs, int is_return) 298 int nargs, int is_return)
294{ 299{
295 struct trace_probe *tp; 300 struct trace_probe *tp;
301 int ret = -ENOMEM;
296 302
297 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); 303 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
298 if (!tp) 304 if (!tp)
299 return ERR_PTR(-ENOMEM); 305 return ERR_PTR(ret);
300 306
301 if (symbol) { 307 if (symbol) {
302 tp->symbol = kstrdup(symbol, GFP_KERNEL); 308 tp->symbol = kstrdup(symbol, GFP_KERNEL);
@@ -312,14 +318,20 @@ static struct trace_probe *alloc_trace_probe(const char *group,
312 else 318 else
313 tp->rp.kp.pre_handler = kprobe_dispatcher; 319 tp->rp.kp.pre_handler = kprobe_dispatcher;
314 320
315 if (!event) 321 if (!event || !check_event_name(event)) {
322 ret = -EINVAL;
316 goto error; 323 goto error;
324 }
325
317 tp->call.name = kstrdup(event, GFP_KERNEL); 326 tp->call.name = kstrdup(event, GFP_KERNEL);
318 if (!tp->call.name) 327 if (!tp->call.name)
319 goto error; 328 goto error;
320 329
321 if (!group) 330 if (!group || !check_event_name(group)) {
331 ret = -EINVAL;
322 goto error; 332 goto error;
333 }
334
323 tp->call.system = kstrdup(group, GFP_KERNEL); 335 tp->call.system = kstrdup(group, GFP_KERNEL);
324 if (!tp->call.system) 336 if (!tp->call.system)
325 goto error; 337 goto error;
@@ -330,7 +342,7 @@ error:
330 kfree(tp->call.name); 342 kfree(tp->call.name);
331 kfree(tp->symbol); 343 kfree(tp->symbol);
332 kfree(tp); 344 kfree(tp);
333 return ERR_PTR(-ENOMEM); 345 return ERR_PTR(ret);
334} 346}
335 347
336static void free_probe_arg(struct probe_arg *arg) 348static void free_probe_arg(struct probe_arg *arg)
@@ -470,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
470 } 482 }
471 } else 483 } else
472 ret = -EINVAL; 484 ret = -EINVAL;
473 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
474 ret = strict_strtoul(arg + 3, 10, &param);
475 if (ret || param > PARAM_MAX_ARGS)
476 ret = -EINVAL;
477 else {
478 ff->func = fetch_argument;
479 ff->data = (void *)param;
480 }
481 } else 485 } else
482 ret = -EINVAL; 486 ret = -EINVAL;
483 return ret; 487 return ret;
@@ -592,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
592 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 596 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
593 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 597 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
594 * Fetch args: 598 * Fetch args:
595 * $argN : fetch Nth of function argument. (N:0-)
596 * $retval : fetch return value 599 * $retval : fetch return value
597 * $stack : fetch stack address 600 * $stack : fetch stack address
598 * $stackN : fetch Nth of stack (N:0-) 601 * $stackN : fetch Nth of stack (N:0-)
@@ -606,23 +609,22 @@ static int create_trace_probe(int argc, char **argv)
606 */ 609 */
607 struct trace_probe *tp; 610 struct trace_probe *tp;
608 int i, ret = 0; 611 int i, ret = 0;
609 int is_return = 0; 612 int is_return = 0, is_delete = 0;
610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; 613 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
611 unsigned long offset = 0; 614 unsigned long offset = 0;
612 void *addr = NULL; 615 void *addr = NULL;
613 char buf[MAX_EVENT_NAME_LEN]; 616 char buf[MAX_EVENT_NAME_LEN];
614 617
615 if (argc < 2) { 618 /* argc must be >= 1 */
616 pr_info("Probe point is not specified.\n");
617 return -EINVAL;
618 }
619
620 if (argv[0][0] == 'p') 619 if (argv[0][0] == 'p')
621 is_return = 0; 620 is_return = 0;
622 else if (argv[0][0] == 'r') 621 else if (argv[0][0] == 'r')
623 is_return = 1; 622 is_return = 1;
623 else if (argv[0][0] == '-')
624 is_delete = 1;
624 else { 625 else {
625 pr_info("Probe definition must be started with 'p' or 'r'.\n"); 626 pr_info("Probe definition must be started with 'p', 'r' or"
627 " '-'.\n");
626 return -EINVAL; 628 return -EINVAL;
627 } 629 }
628 630
@@ -633,23 +635,45 @@ static int create_trace_probe(int argc, char **argv)
633 event = strchr(group, '/') + 1; 635 event = strchr(group, '/') + 1;
634 event[-1] = '\0'; 636 event[-1] = '\0';
635 if (strlen(group) == 0) { 637 if (strlen(group) == 0) {
636 pr_info("Group name is not specifiled\n"); 638 pr_info("Group name is not specified\n");
637 return -EINVAL; 639 return -EINVAL;
638 } 640 }
639 } 641 }
640 if (strlen(event) == 0) { 642 if (strlen(event) == 0) {
641 pr_info("Event name is not specifiled\n"); 643 pr_info("Event name is not specified\n");
642 return -EINVAL; 644 return -EINVAL;
643 } 645 }
644 } 646 }
647 if (!group)
648 group = KPROBE_EVENT_SYSTEM;
645 649
650 if (is_delete) {
651 if (!event) {
652 pr_info("Delete command needs an event name.\n");
653 return -EINVAL;
654 }
655 tp = find_probe_event(event, group);
656 if (!tp) {
657 pr_info("Event %s/%s doesn't exist.\n", group, event);
658 return -ENOENT;
659 }
660 /* delete an event */
661 unregister_trace_probe(tp);
662 free_trace_probe(tp);
663 return 0;
664 }
665
666 if (argc < 2) {
667 pr_info("Probe point is not specified.\n");
668 return -EINVAL;
669 }
646 if (isdigit(argv[1][0])) { 670 if (isdigit(argv[1][0])) {
647 if (is_return) { 671 if (is_return) {
648 pr_info("Return probe point must be a symbol.\n"); 672 pr_info("Return probe point must be a symbol.\n");
649 return -EINVAL; 673 return -EINVAL;
650 } 674 }
651 /* an address specified */ 675 /* an address specified */
652 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); 676 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
653 if (ret) { 677 if (ret) {
654 pr_info("Failed to parse address.\n"); 678 pr_info("Failed to parse address.\n");
655 return ret; 679 return ret;
@@ -671,15 +695,13 @@ static int create_trace_probe(int argc, char **argv)
671 argc -= 2; argv += 2; 695 argc -= 2; argv += 2;
672 696
673 /* setup a probe */ 697 /* setup a probe */
674 if (!group)
675 group = KPROBE_EVENT_SYSTEM;
676 if (!event) { 698 if (!event) {
677 /* Make a new event name */ 699 /* Make a new event name */
678 if (symbol) 700 if (symbol)
679 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld", 701 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
680 is_return ? 'r' : 'p', symbol, offset); 702 is_return ? 'r' : 'p', symbol, offset);
681 else 703 else
682 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p", 704 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
683 is_return ? 'r' : 'p', addr); 705 is_return ? 'r' : 'p', addr);
684 event = buf; 706 event = buf;
685 } 707 }
@@ -920,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = {
920}; 942};
921 943
922/* Kprobe handler */ 944/* Kprobe handler */
923static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 945static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
924{ 946{
925 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 947 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
926 struct kprobe_trace_entry *entry; 948 struct kprobe_trace_entry *entry;
@@ -940,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
940 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 962 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
941 irq_flags, pc); 963 irq_flags, pc);
942 if (!event) 964 if (!event)
943 return 0; 965 return;
944 966
945 entry = ring_buffer_event_data(event); 967 entry = ring_buffer_event_data(event);
946 entry->nargs = tp->nr_args; 968 entry->nargs = tp->nr_args;
@@ -950,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
950 972
951 if (!filter_current_check_discard(buffer, call, entry, event)) 973 if (!filter_current_check_discard(buffer, call, entry, event))
952 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 974 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
953 return 0;
954} 975}
955 976
956/* Kretprobe handler */ 977/* Kretprobe handler */
957static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, 978static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
958 struct pt_regs *regs) 979 struct pt_regs *regs)
959{ 980{
960 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 981 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -973,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
973 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 994 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
974 irq_flags, pc); 995 irq_flags, pc);
975 if (!event) 996 if (!event)
976 return 0; 997 return;
977 998
978 entry = ring_buffer_event_data(event); 999 entry = ring_buffer_event_data(event);
979 entry->nargs = tp->nr_args; 1000 entry->nargs = tp->nr_args;
@@ -984,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
984 1005
985 if (!filter_current_check_discard(buffer, call, entry, event)) 1006 if (!filter_current_check_discard(buffer, call, entry, event))
986 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1007 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
987
988 return 0;
989} 1008}
990 1009
991/* Event entry printers */ 1010/* Event entry printers */
@@ -1113,10 +1132,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1113 struct kprobe_trace_entry field; 1132 struct kprobe_trace_entry field;
1114 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1133 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1115 1134
1116 ret = trace_define_common_fields(event_call);
1117 if (!ret)
1118 return ret;
1119
1120 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1135 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1121 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); 1136 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1122 /* Set argument names as fields */ 1137 /* Set argument names as fields */
@@ -1131,10 +1146,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1131 struct kretprobe_trace_entry field; 1146 struct kretprobe_trace_entry field;
1132 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1147 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1133 1148
1134 ret = trace_define_common_fields(event_call);
1135 if (!ret)
1136 return ret;
1137
1138 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1149 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1139 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1150 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1140 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); 1151 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
@@ -1144,212 +1155,123 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1144 return 0; 1155 return 0;
1145} 1156}
1146 1157
1147static int __probe_event_show_format(struct trace_seq *s, 1158static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1148 struct trace_probe *tp, const char *fmt,
1149 const char *arg)
1150{ 1159{
1151 int i; 1160 int i;
1161 int pos = 0;
1152 1162
1153 /* Show format */ 1163 const char *fmt, *arg;
1154 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1155 return 0;
1156 1164
1157 for (i = 0; i < tp->nr_args; i++) 1165 if (!probe_is_return(tp)) {
1158 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) 1166 fmt = "(%lx)";
1159 return 0; 1167 arg = "REC->" FIELD_STRING_IP;
1168 } else {
1169 fmt = "(%lx <- %lx)";
1170 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1171 }
1160 1172
1161 if (!trace_seq_printf(s, "\", %s", arg)) 1173 /* When len=0, we just calculate the needed length */
1162 return 0; 1174#define LEN_OR_ZERO (len ? len - pos : 0)
1163 1175
1164 for (i = 0; i < tp->nr_args; i++) 1176 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1165 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1166 return 0;
1167 1177
1168 return trace_seq_puts(s, "\n"); 1178 for (i = 0; i < tp->nr_args; i++) {
1169} 1179 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx",
1170 1180 tp->args[i].name);
1171#undef SHOW_FIELD 1181 }
1172#define SHOW_FIELD(type, item, name) \
1173 do { \
1174 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
1175 "offset:%u;\tsize:%u;\n", name, \
1176 (unsigned int)offsetof(typeof(field), item),\
1177 (unsigned int)sizeof(type)); \
1178 if (!ret) \
1179 return 0; \
1180 } while (0)
1181 1182
1182static int kprobe_event_show_format(struct ftrace_event_call *call, 1183 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1183 struct trace_seq *s)
1184{
1185 struct kprobe_trace_entry field __attribute__((unused));
1186 int ret, i;
1187 struct trace_probe *tp = (struct trace_probe *)call->data;
1188 1184
1189 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); 1185 for (i = 0; i < tp->nr_args; i++) {
1190 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1186 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1187 tp->args[i].name);
1188 }
1191 1189
1192 /* Show fields */ 1190#undef LEN_OR_ZERO
1193 for (i = 0; i < tp->nr_args; i++)
1194 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1195 trace_seq_puts(s, "\n");
1196 1191
1197 return __probe_event_show_format(s, tp, "(%lx)", 1192 /* return the length of print_fmt */
1198 "REC->" FIELD_STRING_IP); 1193 return pos;
1199} 1194}
1200 1195
1201static int kretprobe_event_show_format(struct ftrace_event_call *call, 1196static int set_print_fmt(struct trace_probe *tp)
1202 struct trace_seq *s)
1203{ 1197{
1204 struct kretprobe_trace_entry field __attribute__((unused)); 1198 int len;
1205 int ret, i; 1199 char *print_fmt;
1206 struct trace_probe *tp = (struct trace_probe *)call->data;
1207 1200
1208 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); 1201 /* First: called with 0 length to calculate the needed length */
1209 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); 1202 len = __set_print_fmt(tp, NULL, 0);
1210 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1203 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1204 if (!print_fmt)
1205 return -ENOMEM;
1211 1206
1212 /* Show fields */ 1207 /* Second: actually write the @print_fmt */
1213 for (i = 0; i < tp->nr_args; i++) 1208 __set_print_fmt(tp, print_fmt, len + 1);
1214 SHOW_FIELD(unsigned long, args[i], tp->args[i].name); 1209 tp->call.print_fmt = print_fmt;
1215 trace_seq_puts(s, "\n");
1216 1210
1217 return __probe_event_show_format(s, tp, "(%lx <- %lx)", 1211 return 0;
1218 "REC->" FIELD_STRING_FUNC
1219 ", REC->" FIELD_STRING_RETIP);
1220} 1212}
1221 1213
1222#ifdef CONFIG_EVENT_PROFILE 1214#ifdef CONFIG_PERF_EVENTS
1223 1215
1224/* Kprobe profile handler */ 1216/* Kprobe profile handler */
1225static __kprobes int kprobe_profile_func(struct kprobe *kp, 1217static __kprobes void kprobe_profile_func(struct kprobe *kp,
1226 struct pt_regs *regs) 1218 struct pt_regs *regs)
1227{ 1219{
1228 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1229 struct ftrace_event_call *call = &tp->call; 1221 struct ftrace_event_call *call = &tp->call;
1230 struct kprobe_trace_entry *entry; 1222 struct kprobe_trace_entry *entry;
1231 struct trace_entry *ent; 1223 int size, __size, i;
1232 int size, __size, i, pc, __cpu;
1233 unsigned long irq_flags; 1224 unsigned long irq_flags;
1234 char *trace_buf;
1235 char *raw_data;
1236 int rctx; 1225 int rctx;
1237 1226
1238 pc = preempt_count();
1239 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1240 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1241 size -= sizeof(u32); 1229 size -= sizeof(u32);
1242 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1230 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1243 "profile buffer not large enough")) 1231 "profile buffer not large enough"))
1244 return 0; 1232 return;
1245
1246 /*
1247 * Protect the non nmi buffer
1248 * This also protects the rcu read side
1249 */
1250 local_irq_save(irq_flags);
1251
1252 rctx = perf_swevent_get_recursion_context();
1253 if (rctx < 0)
1254 goto end_recursion;
1255 1233
1256 __cpu = smp_processor_id(); 1234 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
1257 1235 if (!entry)
1258 if (in_nmi()) 1236 return;
1259 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1260 else
1261 trace_buf = rcu_dereference(perf_trace_buf);
1262
1263 if (!trace_buf)
1264 goto end;
1265
1266 raw_data = per_cpu_ptr(trace_buf, __cpu);
1267
1268 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1269 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1270 entry = (struct kprobe_trace_entry *)raw_data;
1271 ent = &entry->ent;
1272 1237
1273 tracing_generic_entry_update(ent, irq_flags, pc);
1274 ent->type = call->id;
1275 entry->nargs = tp->nr_args; 1238 entry->nargs = tp->nr_args;
1276 entry->ip = (unsigned long)kp->addr; 1239 entry->ip = (unsigned long)kp->addr;
1277 for (i = 0; i < tp->nr_args; i++) 1240 for (i = 0; i < tp->nr_args; i++)
1278 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1279 perf_tp_event(call->id, entry->ip, 1, entry, size);
1280
1281end:
1282 perf_swevent_put_recursion_context(rctx);
1283end_recursion:
1284 local_irq_restore(irq_flags);
1285 1242
1286 return 0; 1243 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
1287} 1244}
1288 1245
1289/* Kretprobe profile handler */ 1246/* Kretprobe profile handler */
1290static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, 1247static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1291 struct pt_regs *regs) 1248 struct pt_regs *regs)
1292{ 1249{
1293 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1294 struct ftrace_event_call *call = &tp->call; 1251 struct ftrace_event_call *call = &tp->call;
1295 struct kretprobe_trace_entry *entry; 1252 struct kretprobe_trace_entry *entry;
1296 struct trace_entry *ent; 1253 int size, __size, i;
1297 int size, __size, i, pc, __cpu;
1298 unsigned long irq_flags; 1254 unsigned long irq_flags;
1299 char *trace_buf;
1300 char *raw_data;
1301 int rctx; 1255 int rctx;
1302 1256
1303 pc = preempt_count();
1304 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1305 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1306 size -= sizeof(u32); 1259 size -= sizeof(u32);
1307 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1260 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1308 "profile buffer not large enough")) 1261 "profile buffer not large enough"))
1309 return 0; 1262 return;
1310
1311 /*
1312 * Protect the non nmi buffer
1313 * This also protects the rcu read side
1314 */
1315 local_irq_save(irq_flags);
1316
1317 rctx = perf_swevent_get_recursion_context();
1318 if (rctx < 0)
1319 goto end_recursion;
1320 1263
1321 __cpu = smp_processor_id(); 1264 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
1322 1265 if (!entry)
1323 if (in_nmi()) 1266 return;
1324 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1325 else
1326 trace_buf = rcu_dereference(perf_trace_buf);
1327
1328 if (!trace_buf)
1329 goto end;
1330
1331 raw_data = per_cpu_ptr(trace_buf, __cpu);
1332
1333 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1334 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1335 entry = (struct kretprobe_trace_entry *)raw_data;
1336 ent = &entry->ent;
1337 1267
1338 tracing_generic_entry_update(ent, irq_flags, pc);
1339 ent->type = call->id;
1340 entry->nargs = tp->nr_args; 1268 entry->nargs = tp->nr_args;
1341 entry->func = (unsigned long)tp->rp.kp.addr; 1269 entry->func = (unsigned long)tp->rp.kp.addr;
1342 entry->ret_ip = (unsigned long)ri->ret_addr; 1270 entry->ret_ip = (unsigned long)ri->ret_addr;
1343 for (i = 0; i < tp->nr_args; i++) 1271 for (i = 0; i < tp->nr_args; i++)
1344 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1345 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1346
1347end:
1348 perf_swevent_put_recursion_context(rctx);
1349end_recursion:
1350 local_irq_restore(irq_flags);
1351 1273
1352 return 0; 1274 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
1353} 1275}
1354 1276
1355static int probe_profile_enable(struct ftrace_event_call *call) 1277static int probe_profile_enable(struct ftrace_event_call *call)
@@ -1377,7 +1299,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
1377 disable_kprobe(&tp->rp.kp); 1299 disable_kprobe(&tp->rp.kp);
1378 } 1300 }
1379} 1301}
1380#endif /* CONFIG_EVENT_PROFILE */ 1302#endif /* CONFIG_PERF_EVENTS */
1381 1303
1382 1304
1383static __kprobes 1305static __kprobes
@@ -1387,10 +1309,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1387 1309
1388 if (tp->flags & TP_FLAG_TRACE) 1310 if (tp->flags & TP_FLAG_TRACE)
1389 kprobe_trace_func(kp, regs); 1311 kprobe_trace_func(kp, regs);
1390#ifdef CONFIG_EVENT_PROFILE 1312#ifdef CONFIG_PERF_EVENTS
1391 if (tp->flags & TP_FLAG_PROFILE) 1313 if (tp->flags & TP_FLAG_PROFILE)
1392 kprobe_profile_func(kp, regs); 1314 kprobe_profile_func(kp, regs);
1393#endif /* CONFIG_EVENT_PROFILE */ 1315#endif
1394 return 0; /* We don't tweek kernel, so just return 0 */ 1316 return 0; /* We don't tweek kernel, so just return 0 */
1395} 1317}
1396 1318
@@ -1401,10 +1323,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1401 1323
1402 if (tp->flags & TP_FLAG_TRACE) 1324 if (tp->flags & TP_FLAG_TRACE)
1403 kretprobe_trace_func(ri, regs); 1325 kretprobe_trace_func(ri, regs);
1404#ifdef CONFIG_EVENT_PROFILE 1326#ifdef CONFIG_PERF_EVENTS
1405 if (tp->flags & TP_FLAG_PROFILE) 1327 if (tp->flags & TP_FLAG_PROFILE)
1406 kretprobe_profile_func(ri, regs); 1328 kretprobe_profile_func(ri, regs);
1407#endif /* CONFIG_EVENT_PROFILE */ 1329#endif
1408 return 0; /* We don't tweek kernel, so just return 0 */ 1330 return 0; /* We don't tweek kernel, so just return 0 */
1409} 1331}
1410 1332
@@ -1417,24 +1339,25 @@ static int register_probe_event(struct trace_probe *tp)
1417 if (probe_is_return(tp)) { 1339 if (probe_is_return(tp)) {
1418 tp->event.trace = print_kretprobe_event; 1340 tp->event.trace = print_kretprobe_event;
1419 call->raw_init = probe_event_raw_init; 1341 call->raw_init = probe_event_raw_init;
1420 call->show_format = kretprobe_event_show_format;
1421 call->define_fields = kretprobe_event_define_fields; 1342 call->define_fields = kretprobe_event_define_fields;
1422 } else { 1343 } else {
1423 tp->event.trace = print_kprobe_event; 1344 tp->event.trace = print_kprobe_event;
1424 call->raw_init = probe_event_raw_init; 1345 call->raw_init = probe_event_raw_init;
1425 call->show_format = kprobe_event_show_format;
1426 call->define_fields = kprobe_event_define_fields; 1346 call->define_fields = kprobe_event_define_fields;
1427 } 1347 }
1348 if (set_print_fmt(tp) < 0)
1349 return -ENOMEM;
1428 call->event = &tp->event; 1350 call->event = &tp->event;
1429 call->id = register_ftrace_event(&tp->event); 1351 call->id = register_ftrace_event(&tp->event);
1430 if (!call->id) 1352 if (!call->id) {
1353 kfree(call->print_fmt);
1431 return -ENODEV; 1354 return -ENODEV;
1355 }
1432 call->enabled = 0; 1356 call->enabled = 0;
1433 call->regfunc = probe_event_enable; 1357 call->regfunc = probe_event_enable;
1434 call->unregfunc = probe_event_disable; 1358 call->unregfunc = probe_event_disable;
1435 1359
1436#ifdef CONFIG_EVENT_PROFILE 1360#ifdef CONFIG_PERF_EVENTS
1437 atomic_set(&call->profile_count, -1);
1438 call->profile_enable = probe_profile_enable; 1361 call->profile_enable = probe_profile_enable;
1439 call->profile_disable = probe_profile_disable; 1362 call->profile_disable = probe_profile_disable;
1440#endif 1363#endif
@@ -1442,6 +1365,7 @@ static int register_probe_event(struct trace_probe *tp)
1442 ret = trace_add_event_call(call); 1365 ret = trace_add_event_call(call);
1443 if (ret) { 1366 if (ret) {
1444 pr_info("Failed to register kprobe event: %s\n", call->name); 1367 pr_info("Failed to register kprobe event: %s\n", call->name);
1368 kfree(call->print_fmt);
1445 unregister_ftrace_event(&tp->event); 1369 unregister_ftrace_event(&tp->event);
1446 } 1370 }
1447 return ret; 1371 return ret;
@@ -1451,6 +1375,7 @@ static void unregister_probe_event(struct trace_probe *tp)
1451{ 1375{
1452 /* tp->event is unregistered in trace_remove_event_call() */ 1376 /* tp->event is unregistered in trace_remove_event_call() */
1453 trace_remove_event_call(&tp->call); 1377 trace_remove_event_call(&tp->call);
1378 kfree(tp->call.print_fmt);
1454} 1379}
1455 1380
1456/* Make a debugfs interface for controling probe points */ 1381/* Make a debugfs interface for controling probe points */
@@ -1493,28 +1418,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1493 1418
1494static __init int kprobe_trace_self_tests_init(void) 1419static __init int kprobe_trace_self_tests_init(void)
1495{ 1420{
1496 int ret; 1421 int ret, warn = 0;
1497 int (*target)(int, int, int, int, int, int); 1422 int (*target)(int, int, int, int, int, int);
1423 struct trace_probe *tp;
1498 1424
1499 target = kprobe_trace_selftest_target; 1425 target = kprobe_trace_selftest_target;
1500 1426
1501 pr_info("Testing kprobe tracing: "); 1427 pr_info("Testing kprobe tracing: ");
1502 1428
1503 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1429 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1504 "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); 1430 "$stack $stack0 +0($stack)");
1505 if (WARN_ON_ONCE(ret)) 1431 if (WARN_ON_ONCE(ret)) {
1506 pr_warning("error enabling function entry\n"); 1432 pr_warning("error on probing function entry.\n");
1433 warn++;
1434 } else {
1435 /* Enable trace point */
1436 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1437 if (WARN_ON_ONCE(tp == NULL)) {
1438 pr_warning("error on getting new probe.\n");
1439 warn++;
1440 } else
1441 probe_event_enable(&tp->call);
1442 }
1507 1443
1508 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1444 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1509 "$retval"); 1445 "$retval");
1510 if (WARN_ON_ONCE(ret)) 1446 if (WARN_ON_ONCE(ret)) {
1511 pr_warning("error enabling function return\n"); 1447 pr_warning("error on probing function return.\n");
1448 warn++;
1449 } else {
1450 /* Enable trace point */
1451 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1452 if (WARN_ON_ONCE(tp == NULL)) {
1453 pr_warning("error on getting new probe.\n");
1454 warn++;
1455 } else
1456 probe_event_enable(&tp->call);
1457 }
1458
1459 if (warn)
1460 goto end;
1512 1461
1513 ret = target(1, 2, 3, 4, 5, 6); 1462 ret = target(1, 2, 3, 4, 5, 6);
1514 1463
1515 cleanup_all_probes(); 1464 ret = command_trace_probe("-:testprobe");
1465 if (WARN_ON_ONCE(ret)) {
1466 pr_warning("error on deleting a probe.\n");
1467 warn++;
1468 }
1469
1470 ret = command_trace_probe("-:testprobe2");
1471 if (WARN_ON_ONCE(ret)) {
1472 pr_warning("error on deleting a probe.\n");
1473 warn++;
1474 }
1516 1475
1517 pr_cont("OK\n"); 1476end:
1477 cleanup_all_probes();
1478 if (warn)
1479 pr_cont("NG: Some tests are failed. Please check them.\n");
1480 else
1481 pr_cont("OK\n");
1518 return 0; 1482 return 0;
1519} 1483}
1520 1484
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index ddfa0fd43bc0..94103cdcf9d8 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -26,12 +26,13 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27 27
28#include "trace_output.h" 28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h" 29#include "trace.h"
31 30
32#include <linux/hw_breakpoint.h> 31#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h> 32#include <asm/hw_breakpoint.h>
34 33
34#include <asm/atomic.h>
35
35/* 36/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number 37 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers. 38 * of available hardware breakpoint registers.
@@ -44,7 +45,7 @@ struct trace_ksym {
44 struct perf_event **ksym_hbp; 45 struct perf_event **ksym_hbp;
45 struct perf_event_attr attr; 46 struct perf_event_attr attr;
46#ifdef CONFIG_PROFILE_KSYM_TRACER 47#ifdef CONFIG_PROFILE_KSYM_TRACER
47 unsigned long counter; 48 atomic64_t counter;
48#endif 49#endif
49 struct hlist_node ksym_hlist; 50 struct hlist_node ksym_hlist;
50}; 51};
@@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
69 70
70 rcu_read_lock(); 71 rcu_read_lock();
71 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { 72 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
72 if ((entry->attr.bp_addr == hbp_hit_addr) && 73 if (entry->attr.bp_addr == hbp_hit_addr) {
73 (entry->counter <= MAX_UL_INT)) { 74 atomic64_inc(&entry->counter);
74 entry->counter++;
75 break; 75 break;
76 } 76 }
77 } 77 }
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
79} 79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */ 80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81 81
82void ksym_hbp_handler(struct perf_event *hbp, void *data) 82void ksym_hbp_handler(struct perf_event *hbp, int nmi,
83 struct perf_sample_data *data,
84 struct pt_regs *regs)
83{ 85{
84 struct ring_buffer_event *event; 86 struct ring_buffer_event *event;
85 struct ksym_trace_entry *entry; 87 struct ksym_trace_entry *entry;
86 struct pt_regs *regs = data;
87 struct ring_buffer *buffer; 88 struct ring_buffer *buffer;
88 int pc; 89 int pc;
89 90
@@ -196,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
196 entry->attr.bp_addr = addr; 197 entry->attr.bp_addr = addr;
197 entry->attr.bp_len = HW_BREAKPOINT_LEN_4; 198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
198 199
199 ret = -EAGAIN;
200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, 200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
201 ksym_hbp_handler); 201 ksym_hbp_handler);
202 202
@@ -235,7 +235,8 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
235 mutex_lock(&ksym_tracer_mutex); 235 mutex_lock(&ksym_tracer_mutex);
236 236
237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { 237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
238 ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr); 238 ret = trace_seq_printf(s, "%pS:",
239 (void *)(unsigned long)entry->attr.bp_addr);
239 if (entry->attr.bp_type == HW_BREAKPOINT_R) 240 if (entry->attr.bp_type == HW_BREAKPOINT_R)
240 ret = trace_seq_puts(s, "r--\n"); 241 ret = trace_seq_puts(s, "r--\n");
241 else if (entry->attr.bp_type == HW_BREAKPOINT_W) 242 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
@@ -277,21 +278,20 @@ static ssize_t ksym_trace_filter_write(struct file *file,
277{ 278{
278 struct trace_ksym *entry; 279 struct trace_ksym *entry;
279 struct hlist_node *node; 280 struct hlist_node *node;
280 char *input_string, *ksymname = NULL; 281 char *buf, *input_string, *ksymname = NULL;
281 unsigned long ksym_addr = 0; 282 unsigned long ksym_addr = 0;
282 int ret, op, changed = 0; 283 int ret, op, changed = 0;
283 284
284 input_string = kzalloc(count + 1, GFP_KERNEL); 285 buf = kzalloc(count + 1, GFP_KERNEL);
285 if (!input_string) 286 if (!buf)
286 return -ENOMEM; 287 return -ENOMEM;
287 288
288 if (copy_from_user(input_string, buffer, count)) { 289 ret = -EFAULT;
289 kfree(input_string); 290 if (copy_from_user(buf, buffer, count))
290 return -EFAULT; 291 goto out;
291 }
292 input_string[count] = '\0';
293 292
294 strstrip(input_string); 293 buf[count] = '\0';
294 input_string = strstrip(buf);
295 295
296 /* 296 /*
297 * Clear all breakpoints if: 297 * Clear all breakpoints if:
@@ -302,15 +302,13 @@ static ssize_t ksym_trace_filter_write(struct file *file,
302 if (!input_string[0] || !strcmp(input_string, "0") || 302 if (!input_string[0] || !strcmp(input_string, "0") ||
303 !strcmp(input_string, "*:---")) { 303 !strcmp(input_string, "*:---")) {
304 __ksym_trace_reset(); 304 __ksym_trace_reset();
305 kfree(input_string); 305 ret = 0;
306 return count; 306 goto out;
307 } 307 }
308 308
309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); 309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
310 if (ret < 0) { 310 if (ret < 0)
311 kfree(input_string); 311 goto out;
312 return ret;
313 }
314 312
315 mutex_lock(&ksym_tracer_mutex); 313 mutex_lock(&ksym_tracer_mutex);
316 314
@@ -321,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
321 if (entry->attr.bp_type != op) 319 if (entry->attr.bp_type != op)
322 changed = 1; 320 changed = 1;
323 else 321 else
324 goto out; 322 goto out_unlock;
325 break; 323 break;
326 } 324 }
327 } 325 }
@@ -336,28 +334,24 @@ static ssize_t ksym_trace_filter_write(struct file *file,
336 if (IS_ERR(entry->ksym_hbp)) 334 if (IS_ERR(entry->ksym_hbp))
337 ret = PTR_ERR(entry->ksym_hbp); 335 ret = PTR_ERR(entry->ksym_hbp);
338 else 336 else
339 goto out; 337 goto out_unlock;
340 } 338 }
341 /* Error or "symbol:---" case: drop it */ 339 /* Error or "symbol:---" case: drop it */
342 ksym_filter_entry_count--; 340 ksym_filter_entry_count--;
343 hlist_del_rcu(&(entry->ksym_hlist)); 341 hlist_del_rcu(&(entry->ksym_hlist));
344 synchronize_rcu(); 342 synchronize_rcu();
345 kfree(entry); 343 kfree(entry);
346 goto out; 344 goto out_unlock;
347 } else { 345 } else {
348 /* Check for malformed request: (4) */ 346 /* Check for malformed request: (4) */
349 if (op == 0) 347 if (op)
350 goto out; 348 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
351 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
352 } 349 }
353out: 350out_unlock:
354 mutex_unlock(&ksym_tracer_mutex); 351 mutex_unlock(&ksym_tracer_mutex);
355 352out:
356 kfree(input_string); 353 kfree(buf);
357 354 return !ret ? count : ret;
358 if (!ret)
359 ret = count;
360 return ret;
361} 355}
362 356
363static const struct file_operations ksym_tracing_fops = { 357static const struct file_operations ksym_tracing_fops = {
@@ -449,102 +443,77 @@ struct tracer ksym_tracer __read_mostly =
449 .print_line = ksym_trace_output 443 .print_line = ksym_trace_output
450}; 444};
451 445
452__init static int init_ksym_trace(void)
453{
454 struct dentry *d_tracer;
455 struct dentry *entry;
456
457 d_tracer = tracing_init_dentry();
458 ksym_filter_entry_count = 0;
459
460 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
461 NULL, &ksym_tracing_fops);
462 if (!entry)
463 pr_warning("Could not create debugfs "
464 "'ksym_trace_filter' file\n");
465
466 return register_tracer(&ksym_tracer);
467}
468device_initcall(init_ksym_trace);
469
470
471#ifdef CONFIG_PROFILE_KSYM_TRACER 446#ifdef CONFIG_PROFILE_KSYM_TRACER
472static int ksym_tracer_stat_headers(struct seq_file *m) 447static int ksym_profile_show(struct seq_file *m, void *v)
473{ 448{
449 struct hlist_node *node;
450 struct trace_ksym *entry;
451 int access_type = 0;
452 char fn_name[KSYM_NAME_LEN];
453
474 seq_puts(m, " Access Type "); 454 seq_puts(m, " Access Type ");
475 seq_puts(m, " Symbol Counter\n"); 455 seq_puts(m, " Symbol Counter\n");
476 seq_puts(m, " ----------- "); 456 seq_puts(m, " ----------- ");
477 seq_puts(m, " ------ -------\n"); 457 seq_puts(m, " ------ -------\n");
478 return 0;
479}
480 458
481static int ksym_tracer_stat_show(struct seq_file *m, void *v) 459 rcu_read_lock();
482{ 460 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
483 struct hlist_node *stat = v;
484 struct trace_ksym *entry;
485 int access_type = 0;
486 char fn_name[KSYM_NAME_LEN];
487 461
488 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); 462 access_type = entry->attr.bp_type;
489 463
490 access_type = entry->attr.bp_type; 464 switch (access_type) {
465 case HW_BREAKPOINT_R:
466 seq_puts(m, " R ");
467 break;
468 case HW_BREAKPOINT_W:
469 seq_puts(m, " W ");
470 break;
471 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
472 seq_puts(m, " RW ");
473 break;
474 default:
475 seq_puts(m, " NA ");
476 }
491 477
492 switch (access_type) { 478 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
493 case HW_BREAKPOINT_R: 479 seq_printf(m, " %-36s", fn_name);
494 seq_puts(m, " R "); 480 else
495 break; 481 seq_printf(m, " %-36s", "<NA>");
496 case HW_BREAKPOINT_W: 482 seq_printf(m, " %15llu\n",
497 seq_puts(m, " W "); 483 (unsigned long long)atomic64_read(&entry->counter));
498 break;
499 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
500 seq_puts(m, " RW ");
501 break;
502 default:
503 seq_puts(m, " NA ");
504 } 484 }
505 485 rcu_read_unlock();
506 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
507 seq_printf(m, " %-36s", fn_name);
508 else
509 seq_printf(m, " %-36s", "<NA>");
510 seq_printf(m, " %15lu\n", entry->counter);
511 486
512 return 0; 487 return 0;
513} 488}
514 489
515static void *ksym_tracer_stat_start(struct tracer_stat *trace) 490static int ksym_profile_open(struct inode *node, struct file *file)
516{ 491{
517 return ksym_filter_head.first; 492 return single_open(file, ksym_profile_show, NULL);
518} 493}
519 494
520static void * 495static const struct file_operations ksym_profile_fops = {
521ksym_tracer_stat_next(void *v, int idx) 496 .open = ksym_profile_open,
522{ 497 .read = seq_read,
523 struct hlist_node *stat = v; 498 .llseek = seq_lseek,
524 499 .release = single_release,
525 return stat->next;
526}
527
528static struct tracer_stat ksym_tracer_stats = {
529 .name = "ksym_tracer",
530 .stat_start = ksym_tracer_stat_start,
531 .stat_next = ksym_tracer_stat_next,
532 .stat_headers = ksym_tracer_stat_headers,
533 .stat_show = ksym_tracer_stat_show
534}; 500};
501#endif /* CONFIG_PROFILE_KSYM_TRACER */
535 502
536__init static int ksym_tracer_stat_init(void) 503__init static int init_ksym_trace(void)
537{ 504{
538 int ret; 505 struct dentry *d_tracer;
539 506
540 ret = register_stat_tracer(&ksym_tracer_stats); 507 d_tracer = tracing_init_dentry();
541 if (ret) {
542 printk(KERN_WARNING "Warning: could not register "
543 "ksym tracer stats\n");
544 return 1;
545 }
546 508
547 return 0; 509 trace_create_file("ksym_trace_filter", 0644, d_tracer,
510 NULL, &ksym_tracing_fops);
511
512#ifdef CONFIG_PROFILE_KSYM_TRACER
513 trace_create_file("ksym_profile", 0444, d_tracer,
514 NULL, &ksym_profile_fops);
515#endif
516
517 return register_tracer(&ksym_tracer);
548} 518}
549fs_initcall(ksym_tracer_stat_init); 519device_initcall(init_ksym_trace);
550#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
85 va_list ap; 93 va_list ap;
86 int ret; 94 int ret;
87 95
88 if (!len) 96 if (s->full || !len)
89 return 0; 97 return 0;
90 98
91 va_start(ap, fmt); 99 va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
93 va_end(ap); 101 va_end(ap);
94 102
95 /* If we can't write it all, don't bother writing anything */ 103 /* If we can't write it all, don't bother writing anything */
96 if (ret >= len) 104 if (ret >= len) {
105 s->full = 1;
97 return 0; 106 return 0;
107 }
98 108
99 s->len += ret; 109 s->len += ret;
100 110
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
119 int len = (PAGE_SIZE - 1) - s->len; 129 int len = (PAGE_SIZE - 1) - s->len;
120 int ret; 130 int ret;
121 131
122 if (!len) 132 if (s->full || !len)
123 return 0; 133 return 0;
124 134
125 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 135 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
126 136
127 /* If we can't write it all, don't bother writing anything */ 137 /* If we can't write it all, don't bother writing anything */
128 if (ret >= len) 138 if (ret >= len) {
139 s->full = 1;
129 return 0; 140 return 0;
141 }
130 142
131 s->len += ret; 143 s->len += ret;
132 144
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
139 int len = (PAGE_SIZE - 1) - s->len; 151 int len = (PAGE_SIZE - 1) - s->len;
140 int ret; 152 int ret;
141 153
142 if (!len) 154 if (s->full || !len)
143 return 0; 155 return 0;
144 156
145 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 157 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
146 158
147 /* If we can't write it all, don't bother writing anything */ 159 /* If we can't write it all, don't bother writing anything */
148 if (ret >= len) 160 if (ret >= len) {
161 s->full = 1;
149 return 0; 162 return 0;
163 }
150 164
151 s->len += ret; 165 s->len += ret;
152 166
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
167{ 181{
168 int len = strlen(str); 182 int len = strlen(str);
169 183
170 if (len > ((PAGE_SIZE - 1) - s->len)) 184 if (s->full)
185 return 0;
186
187 if (len > ((PAGE_SIZE - 1) - s->len)) {
188 s->full = 1;
171 return 0; 189 return 0;
190 }
172 191
173 memcpy(s->buffer + s->len, str, len); 192 memcpy(s->buffer + s->len, str, len);
174 s->len += len; 193 s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
178 197
179int trace_seq_putc(struct trace_seq *s, unsigned char c) 198int trace_seq_putc(struct trace_seq *s, unsigned char c)
180{ 199{
181 if (s->len >= (PAGE_SIZE - 1)) 200 if (s->full)
182 return 0; 201 return 0;
183 202
203 if (s->len >= (PAGE_SIZE - 1)) {
204 s->full = 1;
205 return 0;
206 }
207
184 s->buffer[s->len++] = c; 208 s->buffer[s->len++] = c;
185 209
186 return 1; 210 return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
188 212
189int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
190{ 214{
191 if (len > ((PAGE_SIZE - 1) - s->len)) 215 if (s->full)
192 return 0; 216 return 0;
193 217
218 if (len > ((PAGE_SIZE - 1) - s->len)) {
219 s->full = 1;
220 return 0;
221 }
222
194 memcpy(s->buffer + s->len, mem, len); 223 memcpy(s->buffer + s->len, mem, len);
195 s->len += len; 224 s->len += len;
196 225
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
203 const unsigned char *data = mem; 232 const unsigned char *data = mem;
204 int i, j; 233 int i, j;
205 234
235 if (s->full)
236 return 0;
237
206#ifdef __BIG_ENDIAN 238#ifdef __BIG_ENDIAN
207 for (i = 0, j = 0; i < len; i++) { 239 for (i = 0, j = 0; i < len; i++) {
208#else 240#else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
220{ 252{
221 void *ret; 253 void *ret;
222 254
223 if (len > ((PAGE_SIZE - 1) - s->len)) 255 if (s->full)
256 return 0;
257
258 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1;
224 return NULL; 260 return NULL;
261 }
225 262
226 ret = s->buffer + s->len; 263 ret = s->buffer + s->len;
227 s->len += len; 264 s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
233{ 270{
234 unsigned char *p; 271 unsigned char *p;
235 272
236 if (s->len >= (PAGE_SIZE - 1)) 273 if (s->full)
274 return 0;
275
276 if (s->len >= (PAGE_SIZE - 1)) {
277 s->full = 1;
237 return 0; 278 return 0;
279 }
280
238 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 281 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
239 if (!IS_ERR(p)) { 282 if (!IS_ERR(p)) {
240 p = mangle_path(s->buffer + s->len, p, "\n"); 283 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
247 return 1; 290 return 1;
248 } 291 }
249 292
293 s->full = 1;
250 return 0; 294 return 0;
251} 295}
252 296
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
373 unsigned long vmstart = 0; 417 unsigned long vmstart = 0;
374 int ret = 1; 418 int ret = 1;
375 419
420 if (s->full)
421 return 0;
422
376 if (mm) { 423 if (mm) {
377 const struct vm_area_struct *vma; 424 const struct vm_area_struct *vma;
378 425
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int wakeup_current_cpu;
28static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
29static int wakeup_rt; 29static int wakeup_rt;
30 30
31static raw_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void __wakeup_reset(struct trace_array *tr); 34static void __wakeup_reset(struct trace_array *tr);
35 35
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
143 goto out; 143 goto out;
144 144
145 local_irq_save(flags); 145 local_irq_save(flags);
146 __raw_spin_lock(&wakeup_lock); 146 arch_spin_lock(&wakeup_lock);
147 147
148 /* We could race with grabbing wakeup_lock */ 148 /* We could race with grabbing wakeup_lock */
149 if (unlikely(!tracer_enabled || next != wakeup_task)) 149 if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
169 169
170out_unlock: 170out_unlock:
171 __wakeup_reset(wakeup_trace); 171 __wakeup_reset(wakeup_trace);
172 __raw_spin_unlock(&wakeup_lock); 172 arch_spin_unlock(&wakeup_lock);
173 local_irq_restore(flags); 173 local_irq_restore(flags);
174out: 174out:
175 atomic_dec(&wakeup_trace->data[cpu]->disabled); 175 atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
193 tracing_reset_online_cpus(tr); 193 tracing_reset_online_cpus(tr);
194 194
195 local_irq_save(flags); 195 local_irq_save(flags);
196 __raw_spin_lock(&wakeup_lock); 196 arch_spin_lock(&wakeup_lock);
197 __wakeup_reset(tr); 197 __wakeup_reset(tr);
198 __raw_spin_unlock(&wakeup_lock); 198 arch_spin_unlock(&wakeup_lock);
199 local_irq_restore(flags); 199 local_irq_restore(flags);
200} 200}
201 201
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
225 goto out; 225 goto out;
226 226
227 /* interrupts should be off from try_to_wake_up */ 227 /* interrupts should be off from try_to_wake_up */
228 __raw_spin_lock(&wakeup_lock); 228 arch_spin_lock(&wakeup_lock);
229 229
230 /* check for races. */ 230 /* check for races. */
231 if (!tracer_enabled || p->prio >= wakeup_prio) 231 if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 256
257out_locked: 257out_locked:
258 __raw_spin_unlock(&wakeup_lock); 258 arch_spin_unlock(&wakeup_lock);
259out: 259out:
260 atomic_dec(&wakeup_trace->data[cpu]->disabled); 260 atomic_dec(&wakeup_trace->data[cpu]->disabled);
261} 261}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index dc98309e839a..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
67 67
68 /* Don't allow flipping of max traces now */ 68 /* Don't allow flipping of max traces now */
69 local_irq_save(flags); 69 local_irq_save(flags);
70 __raw_spin_lock(&ftrace_max_lock); 70 arch_spin_lock(&ftrace_max_lock);
71 71
72 cnt = ring_buffer_entries(tr->buffer); 72 cnt = ring_buffer_entries(tr->buffer);
73 73
@@ -85,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
85 break; 85 break;
86 } 86 }
87 tracing_on(); 87 tracing_on();
88 __raw_spin_unlock(&ftrace_max_lock); 88 arch_spin_unlock(&ftrace_max_lock);
89 local_irq_restore(flags); 89 local_irq_restore(flags);
90 90
91 if (count) 91 if (count)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
27}; 27};
28 28
29static unsigned long max_stack_size; 29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 32
33static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
54 return; 54 return;
55 55
56 local_irq_save(flags); 56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock); 57 arch_spin_lock(&max_stack_lock);
58 58
59 /* a race could have already updated it */ 59 /* a race could have already updated it */
60 if (this_size <= max_stack_size) 60 if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
103 } 103 }
104 104
105 out: 105 out:
106 __raw_spin_unlock(&max_stack_lock); 106 arch_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags); 107 local_irq_restore(flags);
108} 108}
109 109
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock); 175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -206,8 +218,14 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 218
207static void *t_start(struct seq_file *m, loff_t *pos) 219static void *t_start(struct seq_file *m, loff_t *pos)
208{ 220{
221 int cpu;
222
209 local_irq_disable(); 223 local_irq_disable();
210 __raw_spin_lock(&max_stack_lock); 224
225 cpu = smp_processor_id();
226 per_cpu(trace_active, cpu)++;
227
228 arch_spin_lock(&max_stack_lock);
211 229
212 if (*pos == 0) 230 if (*pos == 0)
213 return SEQ_START_TOKEN; 231 return SEQ_START_TOKEN;
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 235
218static void t_stop(struct seq_file *m, void *p) 236static void t_stop(struct seq_file *m, void *p)
219{ 237{
220 __raw_spin_unlock(&max_stack_lock); 238 int cpu;
239
240 arch_spin_unlock(&max_stack_lock);
241
242 cpu = smp_processor_id();
243 per_cpu(trace_active, cpu)--;
244
221 local_irq_enable(); 245 local_irq_enable();
222} 246}
223 247
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d90096a..cba47d7935cc 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -143,70 +143,65 @@ extern char *__bad_type_size(void);
143 #type, #name, offsetof(typeof(trace), name), \ 143 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type) 144 sizeof(trace.name), is_signed_type(type)
145 145
146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 146static
147int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
147{ 148{
148 int i; 149 int i;
149 int ret; 150 int pos = 0;
150 struct syscall_metadata *entry = call->data;
151 struct syscall_trace_enter trace;
152 int offset = offsetof(struct syscall_trace_enter, args);
153 151
154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 152 /* When len=0, we just calculate the needed length */
155 "\tsigned:%u;\n", 153#define LEN_OR_ZERO (len ? len - pos : 0)
156 SYSCALL_FIELD(int, nr));
157 if (!ret)
158 return 0;
159 154
155 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
160 for (i = 0; i < entry->nb_args; i++) { 156 for (i = 0; i < entry->nb_args; i++) {
161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 157 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
162 entry->args[i]); 158 entry->args[i], sizeof(unsigned long),
163 if (!ret) 159 i == entry->nb_args - 1 ? "" : ", ");
164 return 0;
165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
169 if (!ret)
170 return 0;
171 offset += sizeof(unsigned long);
172 } 160 }
161 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
173 162
174 trace_seq_puts(s, "\nprint fmt: \"");
175 for (i = 0; i < entry->nb_args; i++) { 163 for (i = 0; i < entry->nb_args; i++) {
176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 164 pos += snprintf(buf + pos, LEN_OR_ZERO,
177 sizeof(unsigned long), 165 ", ((unsigned long)(REC->%s))", entry->args[i]);
178 i == entry->nb_args - 1 ? "" : ", ");
179 if (!ret)
180 return 0;
181 } 166 }
182 trace_seq_putc(s, '"');
183 167
184 for (i = 0; i < entry->nb_args; i++) { 168#undef LEN_OR_ZERO
185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
186 entry->args[i]);
187 if (!ret)
188 return 0;
189 }
190 169
191 return trace_seq_putc(s, '\n'); 170 /* return the length of print_fmt */
171 return pos;
192} 172}
193 173
194int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 174static int set_syscall_print_fmt(struct ftrace_event_call *call)
195{ 175{
196 int ret; 176 char *print_fmt;
197 struct syscall_trace_exit trace; 177 int len;
178 struct syscall_metadata *entry = call->data;
198 179
199 ret = trace_seq_printf(s, 180 if (entry->enter_event != call) {
200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 181 call->print_fmt = "\"0x%lx\", REC->ret";
201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
204 SYSCALL_FIELD(int, nr),
205 SYSCALL_FIELD(long, ret));
206 if (!ret)
207 return 0; 182 return 0;
183 }
208 184
209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 185 /* First: called with 0 length to calculate the needed length */
186 len = __set_enter_print_fmt(entry, NULL, 0);
187
188 print_fmt = kmalloc(len + 1, GFP_KERNEL);
189 if (!print_fmt)
190 return -ENOMEM;
191
192 /* Second: actually write the @print_fmt */
193 __set_enter_print_fmt(entry, print_fmt, len + 1);
194 call->print_fmt = print_fmt;
195
196 return 0;
197}
198
199static void free_syscall_print_fmt(struct ftrace_event_call *call)
200{
201 struct syscall_metadata *entry = call->data;
202
203 if (entry->enter_event == call)
204 kfree(call->print_fmt);
210} 205}
211 206
212int syscall_enter_define_fields(struct ftrace_event_call *call) 207int syscall_enter_define_fields(struct ftrace_event_call *call)
@@ -217,10 +212,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
217 int i; 212 int i;
218 int offset = offsetof(typeof(trace), args); 213 int offset = offsetof(typeof(trace), args);
219 214
220 ret = trace_define_common_fields(call);
221 if (ret)
222 return ret;
223
224 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 215 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
225 if (ret) 216 if (ret)
226 return ret; 217 return ret;
@@ -241,10 +232,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
241 struct syscall_trace_exit trace; 232 struct syscall_trace_exit trace;
242 int ret; 233 int ret;
243 234
244 ret = trace_define_common_fields(call);
245 if (ret)
246 return ret;
247
248 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 235 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
249 if (ret) 236 if (ret)
250 return ret; 237 return ret;
@@ -333,10 +320,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
333 mutex_lock(&syscall_trace_lock); 320 mutex_lock(&syscall_trace_lock);
334 if (!sys_refcount_enter) 321 if (!sys_refcount_enter)
335 ret = register_trace_sys_enter(ftrace_syscall_enter); 322 ret = register_trace_sys_enter(ftrace_syscall_enter);
336 if (ret) { 323 if (!ret) {
337 pr_info("event trace: Could not activate"
338 "syscall entry trace point");
339 } else {
340 set_bit(num, enabled_enter_syscalls); 324 set_bit(num, enabled_enter_syscalls);
341 sys_refcount_enter++; 325 sys_refcount_enter++;
342 } 326 }
@@ -370,10 +354,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
370 mutex_lock(&syscall_trace_lock); 354 mutex_lock(&syscall_trace_lock);
371 if (!sys_refcount_exit) 355 if (!sys_refcount_exit)
372 ret = register_trace_sys_exit(ftrace_syscall_exit); 356 ret = register_trace_sys_exit(ftrace_syscall_exit);
373 if (ret) { 357 if (!ret) {
374 pr_info("event trace: Could not activate"
375 "syscall exit trace point");
376 } else {
377 set_bit(num, enabled_exit_syscalls); 358 set_bit(num, enabled_exit_syscalls);
378 sys_refcount_exit++; 359 sys_refcount_exit++;
379 } 360 }
@@ -400,12 +381,22 @@ int init_syscall_trace(struct ftrace_event_call *call)
400{ 381{
401 int id; 382 int id;
402 383
403 id = register_ftrace_event(call->event); 384 if (set_syscall_print_fmt(call) < 0)
404 if (!id) 385 return -ENOMEM;
405 return -ENODEV; 386
406 call->id = id; 387 id = trace_event_raw_init(call);
407 INIT_LIST_HEAD(&call->fields); 388
408 return 0; 389 if (id < 0) {
390 free_syscall_print_fmt(call);
391 return id;
392 }
393
394 return id;
395}
396
397unsigned long __init arch_syscall_addr(int nr)
398{
399 return (unsigned long)sys_call_table[nr];
409} 400}
410 401
411int __init init_ftrace_syscalls(void) 402int __init init_ftrace_syscalls(void)
@@ -435,7 +426,7 @@ int __init init_ftrace_syscalls(void)
435} 426}
436core_initcall(init_ftrace_syscalls); 427core_initcall(init_ftrace_syscalls);
437 428
438#ifdef CONFIG_EVENT_PROFILE 429#ifdef CONFIG_PERF_EVENTS
439 430
440static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 431static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
441static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 432static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -447,12 +438,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
447 struct syscall_metadata *sys_data; 438 struct syscall_metadata *sys_data;
448 struct syscall_trace_enter *rec; 439 struct syscall_trace_enter *rec;
449 unsigned long flags; 440 unsigned long flags;
450 char *trace_buf;
451 char *raw_data;
452 int syscall_nr; 441 int syscall_nr;
453 int rctx; 442 int rctx;
454 int size; 443 int size;
455 int cpu;
456 444
457 syscall_nr = syscall_get_nr(current, regs); 445 syscall_nr = syscall_get_nr(current, regs);
458 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 446 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -471,37 +459,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
471 "profile buffer not large enough")) 459 "profile buffer not large enough"))
472 return; 460 return;
473 461
474 /* Protect the per cpu buffer, begin the rcu read side */ 462 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
475 local_irq_save(flags); 463 sys_data->enter_event->id, &rctx, &flags);
476 464 if (!rec)
477 rctx = perf_swevent_get_recursion_context(); 465 return;
478 if (rctx < 0)
479 goto end_recursion;
480
481 cpu = smp_processor_id();
482
483 trace_buf = rcu_dereference(perf_trace_buf);
484
485 if (!trace_buf)
486 goto end;
487
488 raw_data = per_cpu_ptr(trace_buf, cpu);
489
490 /* zero the dead bytes from align to not leak stack to user */
491 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
492 466
493 rec = (struct syscall_trace_enter *) raw_data;
494 tracing_generic_entry_update(&rec->ent, 0, 0);
495 rec->ent.type = sys_data->enter_event->id;
496 rec->nr = syscall_nr; 467 rec->nr = syscall_nr;
497 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 468 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
498 (unsigned long *)&rec->args); 469 (unsigned long *)&rec->args);
499 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); 470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
500
501end:
502 perf_swevent_put_recursion_context(rctx);
503end_recursion:
504 local_irq_restore(flags);
505} 471}
506 472
507int prof_sysenter_enable(struct ftrace_event_call *call) 473int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -545,11 +511,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
545 struct syscall_trace_exit *rec; 511 struct syscall_trace_exit *rec;
546 unsigned long flags; 512 unsigned long flags;
547 int syscall_nr; 513 int syscall_nr;
548 char *trace_buf;
549 char *raw_data;
550 int rctx; 514 int rctx;
551 int size; 515 int size;
552 int cpu;
553 516
554 syscall_nr = syscall_get_nr(current, regs); 517 syscall_nr = syscall_get_nr(current, regs);
555 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 518 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -571,38 +534,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
571 "exit event has grown above profile buffer size")) 534 "exit event has grown above profile buffer size"))
572 return; 535 return;
573 536
574 /* Protect the per cpu buffer, begin the rcu read side */ 537 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
575 local_irq_save(flags); 538 sys_data->exit_event->id, &rctx, &flags);
576 539 if (!rec)
577 rctx = perf_swevent_get_recursion_context(); 540 return;
578 if (rctx < 0)
579 goto end_recursion;
580
581 cpu = smp_processor_id();
582
583 trace_buf = rcu_dereference(perf_trace_buf);
584
585 if (!trace_buf)
586 goto end;
587
588 raw_data = per_cpu_ptr(trace_buf, cpu);
589
590 /* zero the dead bytes from align to not leak stack to user */
591 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
592
593 rec = (struct syscall_trace_exit *)raw_data;
594 541
595 tracing_generic_entry_update(&rec->ent, 0, 0);
596 rec->ent.type = sys_data->exit_event->id;
597 rec->nr = syscall_nr; 542 rec->nr = syscall_nr;
598 rec->ret = syscall_get_return_value(current, regs); 543 rec->ret = syscall_get_return_value(current, regs);
599 544
600 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); 545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
601
602end:
603 perf_swevent_put_recursion_context(rctx);
604end_recursion:
605 local_irq_restore(flags);
606} 546}
607 547
608int prof_sysexit_enable(struct ftrace_event_call *call) 548int prof_sysexit_enable(struct ftrace_event_call *call)
@@ -617,7 +557,7 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
617 ret = register_trace_sys_exit(prof_syscall_exit); 557 ret = register_trace_sys_exit(prof_syscall_exit);
618 if (ret) { 558 if (ret) {
619 pr_info("event trace: Could not activate" 559 pr_info("event trace: Could not activate"
620 "syscall entry trace point"); 560 "syscall exit trace point");
621 } else { 561 } else {
622 set_bit(num, enabled_prof_exit_syscalls); 562 set_bit(num, enabled_prof_exit_syscalls);
623 sys_prof_refcount_exit++; 563 sys_prof_refcount_exit++;
@@ -640,6 +580,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
640 mutex_unlock(&syscall_trace_lock); 580 mutex_unlock(&syscall_trace_lock);
641} 581}
642 582
643#endif 583#endif /* CONFIG_PERF_EVENTS */
644
645 584
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index f6693969287d..a7974a552ca9 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = {
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
95 .address = backtrace_address, 95 .address = backtrace_address,
96 .walk_stack = print_context_stack,
96}; 97};
97 98
98static int 99static int